mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
Merge pull request #5153 from BerriAI/litellm_track_fallback_prometheus
Feat - Proxy track fallback metrics on prometheus
This commit is contained in:
commit
3bc39af1b9
5 changed files with 108 additions and 0 deletions
|
@ -58,6 +58,13 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
||||||
def pre_call_check(self, deployment: dict) -> Optional[dict]:
|
def pre_call_check(self, deployment: dict) -> Optional[dict]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
#### Fallback Events - router/proxy only ####
|
||||||
|
async def log_success_fallback_event(self, original_model_group: str, kwargs: dict):
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def log_failure_fallback_event(self, original_model_group: str, kwargs: dict):
|
||||||
|
pass
|
||||||
|
|
||||||
#### ADAPTERS #### Allow calling 100+ LLMs in custom format - https://github.com/BerriAI/litellm/pulls
|
#### ADAPTERS #### Allow calling 100+ LLMs in custom format - https://github.com/BerriAI/litellm/pulls
|
||||||
|
|
||||||
def translate_completion_input_params(
|
def translate_completion_input_params(
|
||||||
|
|
|
@ -170,6 +170,17 @@ class PrometheusLogger(CustomLogger):
|
||||||
labelnames=_logged_llm_labels,
|
labelnames=_logged_llm_labels,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.llm_deployment_successful_fallbacks = Counter(
|
||||||
|
"llm_deployment_successful_fallbacks",
|
||||||
|
"LLM Deployment Analytics - Number of successful fallback workloads",
|
||||||
|
["primary_model", "fallback_model"],
|
||||||
|
)
|
||||||
|
self.llm_deployment_failed_fallbacks = Counter(
|
||||||
|
"llm_deployment_failed_fallbacks",
|
||||||
|
"LLM Deployment Analytics - Number of failed fallback workloads",
|
||||||
|
["primary_model", "fallback_model"],
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||||
raise e
|
raise e
|
||||||
|
@ -479,6 +490,28 @@ class PrometheusLogger(CustomLogger):
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
async def log_success_fallback_event(self, original_model_group: str, kwargs: dict):
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Prometheus: log_success_fallback_event, original_model_group: %s, kwargs: %s",
|
||||||
|
original_model_group,
|
||||||
|
kwargs,
|
||||||
|
)
|
||||||
|
_new_model = kwargs.get("model")
|
||||||
|
self.llm_deployment_successful_fallbacks.labels(
|
||||||
|
primary_model=original_model_group, fallback_model=_new_model
|
||||||
|
).inc()
|
||||||
|
|
||||||
|
async def log_failure_fallback_event(self, original_model_group: str, kwargs: dict):
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Prometheus: log_failure_fallback_event, original_model_group: %s, kwargs: %s",
|
||||||
|
original_model_group,
|
||||||
|
kwargs,
|
||||||
|
)
|
||||||
|
_new_model = kwargs.get("model")
|
||||||
|
self.llm_deployment_failed_fallbacks.labels(
|
||||||
|
primary_model=original_model_group, fallback_model=_new_model
|
||||||
|
).inc()
|
||||||
|
|
||||||
def set_deployment_state(
|
def set_deployment_state(
|
||||||
self,
|
self,
|
||||||
state: int,
|
state: int,
|
||||||
|
|
|
@ -37,5 +37,6 @@ general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
|
fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}]
|
||||||
success_callback: ["langfuse", "prometheus"]
|
success_callback: ["langfuse", "prometheus"]
|
||||||
failure_callback: ["prometheus"]
|
failure_callback: ["prometheus"]
|
||||||
|
|
|
@ -59,6 +59,10 @@ from litellm.router_utils.client_initalization_utils import (
|
||||||
should_initialize_sync_client,
|
should_initialize_sync_client,
|
||||||
)
|
)
|
||||||
from litellm.router_utils.cooldown_callbacks import router_cooldown_handler
|
from litellm.router_utils.cooldown_callbacks import router_cooldown_handler
|
||||||
|
from litellm.router_utils.fallback_event_handlers import (
|
||||||
|
log_failure_fallback_event,
|
||||||
|
log_success_fallback_event,
|
||||||
|
)
|
||||||
from litellm.router_utils.handle_error import send_llm_exception_alert
|
from litellm.router_utils.handle_error import send_llm_exception_alert
|
||||||
from litellm.scheduler import FlowItem, Scheduler
|
from litellm.scheduler import FlowItem, Scheduler
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
|
@ -2361,6 +2365,7 @@ class Router:
|
||||||
verbose_router_logger.debug(f"Traceback{traceback.format_exc()}")
|
verbose_router_logger.debug(f"Traceback{traceback.format_exc()}")
|
||||||
original_exception = e
|
original_exception = e
|
||||||
fallback_model_group = None
|
fallback_model_group = None
|
||||||
|
original_model_group = kwargs.get("model")
|
||||||
fallback_failure_exception_str = ""
|
fallback_failure_exception_str = ""
|
||||||
try:
|
try:
|
||||||
verbose_router_logger.debug("Trying to fallback b/w models")
|
verbose_router_logger.debug("Trying to fallback b/w models")
|
||||||
|
@ -2392,8 +2397,18 @@ class Router:
|
||||||
verbose_router_logger.info(
|
verbose_router_logger.info(
|
||||||
"Successful fallback b/w models."
|
"Successful fallback b/w models."
|
||||||
)
|
)
|
||||||
|
# callback for successfull_fallback_event():
|
||||||
|
await log_success_fallback_event(
|
||||||
|
original_model_group=original_model_group,
|
||||||
|
kwargs=kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
await log_failure_fallback_event(
|
||||||
|
original_model_group=original_model_group,
|
||||||
|
kwargs=kwargs,
|
||||||
|
)
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
error_message = "model={}. context_window_fallbacks={}. fallbacks={}.\n\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
|
error_message = "model={}. context_window_fallbacks={}. fallbacks={}.\n\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
|
||||||
|
@ -2435,8 +2450,17 @@ class Router:
|
||||||
verbose_router_logger.info(
|
verbose_router_logger.info(
|
||||||
"Successful fallback b/w models."
|
"Successful fallback b/w models."
|
||||||
)
|
)
|
||||||
|
# callback for successfull_fallback_event():
|
||||||
|
await log_success_fallback_event(
|
||||||
|
original_model_group=original_model_group,
|
||||||
|
kwargs=kwargs,
|
||||||
|
)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
await log_failure_fallback_event(
|
||||||
|
original_model_group=original_model_group,
|
||||||
|
kwargs=kwargs,
|
||||||
|
)
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
error_message = "model={}. content_policy_fallback={}. fallbacks={}.\n\nSet 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
|
error_message = "model={}. content_policy_fallback={}. fallbacks={}.\n\nSet 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
|
||||||
|
@ -2497,8 +2521,18 @@ class Router:
|
||||||
verbose_router_logger.info(
|
verbose_router_logger.info(
|
||||||
"Successful fallback b/w models."
|
"Successful fallback b/w models."
|
||||||
)
|
)
|
||||||
|
# callback for successfull_fallback_event():
|
||||||
|
await log_success_fallback_event(
|
||||||
|
original_model_group=original_model_group,
|
||||||
|
kwargs=kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
await log_failure_fallback_event(
|
||||||
|
original_model_group=original_model_group,
|
||||||
|
kwargs=kwargs,
|
||||||
|
)
|
||||||
raise e
|
raise e
|
||||||
except Exception as new_exception:
|
except Exception as new_exception:
|
||||||
verbose_router_logger.error(
|
verbose_router_logger.error(
|
||||||
|
|
33
litellm/router_utils/fallback_event_handlers.py
Normal file
33
litellm/router_utils/fallback_event_handlers.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_router_logger
|
||||||
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
|
||||||
|
|
||||||
|
async def log_success_fallback_event(original_model_group: str, kwargs: dict):
|
||||||
|
for _callback in litellm.callbacks:
|
||||||
|
if isinstance(_callback, CustomLogger):
|
||||||
|
try:
|
||||||
|
await _callback.log_success_fallback_event(
|
||||||
|
original_model_group=original_model_group, kwargs=kwargs
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_router_logger.error(
|
||||||
|
f"Error in log_success_fallback_event: {(str(e))}"
|
||||||
|
)
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
async def log_failure_fallback_event(original_model_group: str, kwargs: dict):
|
||||||
|
for _callback in litellm.callbacks:
|
||||||
|
if isinstance(_callback, CustomLogger):
|
||||||
|
try:
|
||||||
|
await _callback.log_failure_fallback_event(
|
||||||
|
original_model_group=original_model_group, kwargs=kwargs
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_router_logger.error(
|
||||||
|
f"Error in log_failure_fallback_event: {(str(e))}"
|
||||||
|
)
|
||||||
|
pass
|
Loading…
Add table
Add a link
Reference in a new issue