mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
[Feat-Prometheus] Track exception status on litellm_deployment_failure_responses
(#5706)
* add litellm_deployment_cooled_down * track num cooldowns on prometheus * track exception status * fix linting * docs prom metrics * cleanup premium user checks * prom track deployment failure state * docs prometheus
This commit is contained in:
parent
b878a67a7c
commit
c8eff2dc65
6 changed files with 171 additions and 130 deletions
|
@ -16,32 +16,39 @@ else:
|
|||
LitellmRouter = Any
|
||||
|
||||
|
||||
async def router_cooldown_handler(
|
||||
async def router_cooldown_event_callback(
|
||||
litellm_router_instance: LitellmRouter,
|
||||
deployment_id: str,
|
||||
exception_status: Union[str, int],
|
||||
cooldown_time: float,
|
||||
):
|
||||
"""
|
||||
Callback triggered when a deployment is put into cooldown by litellm
|
||||
|
||||
- Updates deploymen state on Prometheus
|
||||
- Increments cooldown metric for deployment on Prometheus
|
||||
"""
|
||||
verbose_logger.debug("In router_cooldown_event_callback - updating prometheus")
|
||||
_deployment = litellm_router_instance.get_deployment(model_id=deployment_id)
|
||||
if _deployment is None:
|
||||
verbose_logger.warning(
|
||||
f"in router_cooldown_handler but _deployment is None for deployment_id={deployment_id}. Doing nothing"
|
||||
f"in router_cooldown_event_callback but _deployment is None for deployment_id={deployment_id}. Doing nothing"
|
||||
)
|
||||
return
|
||||
_litellm_params = _deployment["litellm_params"]
|
||||
temp_litellm_params = copy.deepcopy(_litellm_params)
|
||||
temp_litellm_params = dict(temp_litellm_params)
|
||||
_model_name = _deployment.get("model_name", None)
|
||||
_api_base = litellm.get_api_base(
|
||||
model=_model_name, optional_params=temp_litellm_params
|
||||
_model_name = _deployment.get("model_name", None) or ""
|
||||
_api_base = (
|
||||
litellm.get_api_base(model=_model_name, optional_params=temp_litellm_params)
|
||||
or ""
|
||||
)
|
||||
model_info = _deployment["model_info"]
|
||||
model_id = model_info.id
|
||||
|
||||
litellm_model_name = temp_litellm_params.get("model")
|
||||
litellm_model_name = temp_litellm_params.get("model") or ""
|
||||
llm_provider = ""
|
||||
try:
|
||||
|
||||
_, llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=litellm_model_name,
|
||||
custom_llm_provider=temp_litellm_params.get("custom_llm_provider"),
|
||||
|
@ -50,13 +57,29 @@ async def router_cooldown_handler(
|
|||
pass
|
||||
|
||||
# Trigger cooldown on Prometheus
|
||||
from litellm.litellm_core_utils.litellm_logging import prometheusLogger
|
||||
from litellm.integrations.prometheus import PrometheusLogger
|
||||
|
||||
prometheusLogger = None
|
||||
for callback in litellm.callbacks:
|
||||
if isinstance(callback, PrometheusLogger):
|
||||
prometheusLogger = callback
|
||||
|
||||
if prometheusLogger is not None:
|
||||
prometheusLogger.set_deployment_complete_outage(
|
||||
litellm_model_name=_model_name,
|
||||
model_id=model_id,
|
||||
api_base=_api_base,
|
||||
api_provider=llm_provider,
|
||||
)
|
||||
|
||||
if isinstance(prometheusLogger, PrometheusLogger):
|
||||
prometheusLogger.set_deployment_complete_outage(
|
||||
litellm_model_name=_model_name,
|
||||
model_id=model_id,
|
||||
api_base=_api_base,
|
||||
api_provider=llm_provider,
|
||||
)
|
||||
|
||||
prometheusLogger.increment_deployment_cooled_down(
|
||||
litellm_model_name=_model_name,
|
||||
model_id=model_id,
|
||||
api_base=_api_base,
|
||||
api_provider=llm_provider,
|
||||
exception_status=str(exception_status),
|
||||
)
|
||||
|
||||
return
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue