[Feat-Prometheus] Track exception status on litellm_deployment_failure_responses (#5706)

* add litellm_deployment_cooled_down

* track num cooldowns on prometheus

* track exception status

* fix linting

* docs prom metrics

* cleanup premium user checks

* prom track deployment failure state

* docs prometheus
This commit is contained in:
Ishaan Jaff 2024-09-14 18:44:31 -07:00 committed by GitHub
parent b878a67a7c
commit c8eff2dc65
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 171 additions and 130 deletions

View file

@ -11,7 +11,7 @@ from typing import TYPE_CHECKING, Any, List, Optional, Union
import litellm
from litellm._logging import verbose_router_logger
from litellm.router_utils.cooldown_callbacks import router_cooldown_handler
from litellm.router_utils.cooldown_callbacks import router_cooldown_event_callback
from litellm.utils import get_utc_datetime
from .router_callbacks.track_deployment_metrics import (
@ -184,7 +184,7 @@ def _set_cooldown_deployments(
# Trigger cooldown callback handler
asyncio.create_task(
router_cooldown_handler(
router_cooldown_event_callback(
litellm_router_instance=litellm_router_instance,
deployment_id=deployment,
exception_status=exception_status,