mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge pull request #5153 from BerriAI/litellm_track_fallback_prometheus
Feat - Proxy track fallback metrics on prometheus
This commit is contained in:
commit
3bc39af1b9
5 changed files with 108 additions and 0 deletions
|
@ -58,6 +58,13 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
|||
def pre_call_check(self, deployment: dict) -> Optional[dict]:
|
||||
pass
|
||||
|
||||
#### Fallback Events - router/proxy only ####
|
||||
async def log_success_fallback_event(self, original_model_group: str, kwargs: dict):
|
||||
pass
|
||||
|
||||
async def log_failure_fallback_event(self, original_model_group: str, kwargs: dict):
|
||||
pass
|
||||
|
||||
#### ADAPTERS #### Allow calling 100+ LLMs in custom format - https://github.com/BerriAI/litellm/pulls
|
||||
|
||||
def translate_completion_input_params(
|
||||
|
|
|
@ -170,6 +170,17 @@ class PrometheusLogger(CustomLogger):
|
|||
labelnames=_logged_llm_labels,
|
||||
)
|
||||
|
||||
self.llm_deployment_successful_fallbacks = Counter(
|
||||
"llm_deployment_successful_fallbacks",
|
||||
"LLM Deployment Analytics - Number of successful fallback workloads",
|
||||
["primary_model", "fallback_model"],
|
||||
)
|
||||
self.llm_deployment_failed_fallbacks = Counter(
|
||||
"llm_deployment_failed_fallbacks",
|
||||
"LLM Deployment Analytics - Number of failed fallback workloads",
|
||||
["primary_model", "fallback_model"],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||
raise e
|
||||
|
@ -479,6 +490,28 @@ class PrometheusLogger(CustomLogger):
|
|||
)
|
||||
return
|
||||
|
||||
async def log_success_fallback_event(self, original_model_group: str, kwargs: dict):
|
||||
verbose_logger.debug(
|
||||
"Prometheus: log_success_fallback_event, original_model_group: %s, kwargs: %s",
|
||||
original_model_group,
|
||||
kwargs,
|
||||
)
|
||||
_new_model = kwargs.get("model")
|
||||
self.llm_deployment_successful_fallbacks.labels(
|
||||
primary_model=original_model_group, fallback_model=_new_model
|
||||
).inc()
|
||||
|
||||
async def log_failure_fallback_event(self, original_model_group: str, kwargs: dict):
|
||||
verbose_logger.debug(
|
||||
"Prometheus: log_failure_fallback_event, original_model_group: %s, kwargs: %s",
|
||||
original_model_group,
|
||||
kwargs,
|
||||
)
|
||||
_new_model = kwargs.get("model")
|
||||
self.llm_deployment_failed_fallbacks.labels(
|
||||
primary_model=original_model_group, fallback_model=_new_model
|
||||
).inc()
|
||||
|
||||
def set_deployment_state(
|
||||
self,
|
||||
state: int,
|
||||
|
|
|
@ -37,5 +37,6 @@ general_settings:
|
|||
master_key: sk-1234
|
||||
|
||||
litellm_settings:
|
||||
fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}]
|
||||
success_callback: ["langfuse", "prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
|
|
|
@ -59,6 +59,10 @@ from litellm.router_utils.client_initalization_utils import (
|
|||
should_initialize_sync_client,
|
||||
)
|
||||
from litellm.router_utils.cooldown_callbacks import router_cooldown_handler
|
||||
from litellm.router_utils.fallback_event_handlers import (
|
||||
log_failure_fallback_event,
|
||||
log_success_fallback_event,
|
||||
)
|
||||
from litellm.router_utils.handle_error import send_llm_exception_alert
|
||||
from litellm.scheduler import FlowItem, Scheduler
|
||||
from litellm.types.llms.openai import (
|
||||
|
@ -2361,6 +2365,7 @@ class Router:
|
|||
verbose_router_logger.debug(f"Traceback{traceback.format_exc()}")
|
||||
original_exception = e
|
||||
fallback_model_group = None
|
||||
original_model_group = kwargs.get("model")
|
||||
fallback_failure_exception_str = ""
|
||||
try:
|
||||
verbose_router_logger.debug("Trying to fallback b/w models")
|
||||
|
@ -2392,8 +2397,18 @@ class Router:
|
|||
verbose_router_logger.info(
|
||||
"Successful fallback b/w models."
|
||||
)
|
||||
# callback for successfull_fallback_event():
|
||||
await log_success_fallback_event(
|
||||
original_model_group=original_model_group,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
await log_failure_fallback_event(
|
||||
original_model_group=original_model_group,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
pass
|
||||
else:
|
||||
error_message = "model={}. context_window_fallbacks={}. fallbacks={}.\n\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
|
||||
|
@ -2435,8 +2450,17 @@ class Router:
|
|||
verbose_router_logger.info(
|
||||
"Successful fallback b/w models."
|
||||
)
|
||||
# callback for successfull_fallback_event():
|
||||
await log_success_fallback_event(
|
||||
original_model_group=original_model_group,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
await log_failure_fallback_event(
|
||||
original_model_group=original_model_group,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
pass
|
||||
else:
|
||||
error_message = "model={}. content_policy_fallback={}. fallbacks={}.\n\nSet 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
|
||||
|
@ -2497,8 +2521,18 @@ class Router:
|
|||
verbose_router_logger.info(
|
||||
"Successful fallback b/w models."
|
||||
)
|
||||
# callback for successfull_fallback_event():
|
||||
await log_success_fallback_event(
|
||||
original_model_group=original_model_group,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
await log_failure_fallback_event(
|
||||
original_model_group=original_model_group,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
raise e
|
||||
except Exception as new_exception:
|
||||
verbose_router_logger.error(
|
||||
|
|
33
litellm/router_utils/fallback_event_handlers.py
Normal file
33
litellm/router_utils/fallback_event_handlers.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_router_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
|
||||
async def log_success_fallback_event(original_model_group: str, kwargs: dict):
|
||||
for _callback in litellm.callbacks:
|
||||
if isinstance(_callback, CustomLogger):
|
||||
try:
|
||||
await _callback.log_success_fallback_event(
|
||||
original_model_group=original_model_group, kwargs=kwargs
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_router_logger.error(
|
||||
f"Error in log_success_fallback_event: {(str(e))}"
|
||||
)
|
||||
pass
|
||||
|
||||
|
||||
async def log_failure_fallback_event(original_model_group: str, kwargs: dict):
|
||||
for _callback in litellm.callbacks:
|
||||
if isinstance(_callback, CustomLogger):
|
||||
try:
|
||||
await _callback.log_failure_fallback_event(
|
||||
original_model_group=original_model_group, kwargs=kwargs
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_router_logger.error(
|
||||
f"Error in log_failure_fallback_event: {(str(e))}"
|
||||
)
|
||||
pass
|
Loading…
Add table
Add a link
Reference in a new issue