Merge pull request #5153 from BerriAI/litellm_track_fallback_prometheus

Feat - Proxy track fallback metrics on prometheus
This commit is contained in:
Ishaan Jaff 2024-08-10 14:10:42 -07:00 committed by GitHub
commit 3bc39af1b9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 108 additions and 0 deletions

View file

@ -58,6 +58,13 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
def pre_call_check(self, deployment: dict) -> Optional[dict]:
pass
#### Fallback Events - router/proxy only ####
async def log_success_fallback_event(self, original_model_group: str, kwargs: dict):
pass
async def log_failure_fallback_event(self, original_model_group: str, kwargs: dict):
pass
#### ADAPTERS #### Allow calling 100+ LLMs in custom format - https://github.com/BerriAI/litellm/pulls
def translate_completion_input_params(

View file

@ -170,6 +170,17 @@ class PrometheusLogger(CustomLogger):
labelnames=_logged_llm_labels,
)
self.llm_deployment_successful_fallbacks = Counter(
"llm_deployment_successful_fallbacks",
"LLM Deployment Analytics - Number of successful fallback workloads",
["primary_model", "fallback_model"],
)
self.llm_deployment_failed_fallbacks = Counter(
"llm_deployment_failed_fallbacks",
"LLM Deployment Analytics - Number of failed fallback workloads",
["primary_model", "fallback_model"],
)
except Exception as e:
print_verbose(f"Got exception on init prometheus client {str(e)}")
raise e
@ -479,6 +490,28 @@ class PrometheusLogger(CustomLogger):
)
return
async def log_success_fallback_event(self, original_model_group: str, kwargs: dict):
verbose_logger.debug(
"Prometheus: log_success_fallback_event, original_model_group: %s, kwargs: %s",
original_model_group,
kwargs,
)
_new_model = kwargs.get("model")
self.llm_deployment_successful_fallbacks.labels(
primary_model=original_model_group, fallback_model=_new_model
).inc()
async def log_failure_fallback_event(self, original_model_group: str, kwargs: dict):
verbose_logger.debug(
"Prometheus: log_failure_fallback_event, original_model_group: %s, kwargs: %s",
original_model_group,
kwargs,
)
_new_model = kwargs.get("model")
self.llm_deployment_failed_fallbacks.labels(
primary_model=original_model_group, fallback_model=_new_model
).inc()
def set_deployment_state(
self,
state: int,

View file

@ -37,5 +37,6 @@ general_settings:
master_key: sk-1234
litellm_settings:
fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}]
success_callback: ["langfuse", "prometheus"]
failure_callback: ["prometheus"]

View file

@ -59,6 +59,10 @@ from litellm.router_utils.client_initalization_utils import (
should_initialize_sync_client,
)
from litellm.router_utils.cooldown_callbacks import router_cooldown_handler
from litellm.router_utils.fallback_event_handlers import (
log_failure_fallback_event,
log_success_fallback_event,
)
from litellm.router_utils.handle_error import send_llm_exception_alert
from litellm.scheduler import FlowItem, Scheduler
from litellm.types.llms.openai import (
@ -2361,6 +2365,7 @@ class Router:
verbose_router_logger.debug(f"Traceback{traceback.format_exc()}")
original_exception = e
fallback_model_group = None
original_model_group = kwargs.get("model")
fallback_failure_exception_str = ""
try:
verbose_router_logger.debug("Trying to fallback b/w models")
@ -2392,8 +2397,18 @@ class Router:
verbose_router_logger.info(
"Successful fallback b/w models."
)
# callback for successfull_fallback_event():
await log_success_fallback_event(
original_model_group=original_model_group,
kwargs=kwargs,
)
return response
except Exception as e:
await log_failure_fallback_event(
original_model_group=original_model_group,
kwargs=kwargs,
)
pass
else:
error_message = "model={}. context_window_fallbacks={}. fallbacks={}.\n\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
@ -2435,8 +2450,17 @@ class Router:
verbose_router_logger.info(
"Successful fallback b/w models."
)
# callback for successfull_fallback_event():
await log_success_fallback_event(
original_model_group=original_model_group,
kwargs=kwargs,
)
return response
except Exception as e:
await log_failure_fallback_event(
original_model_group=original_model_group,
kwargs=kwargs,
)
pass
else:
error_message = "model={}. content_policy_fallback={}. fallbacks={}.\n\nSet 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
@ -2497,8 +2521,18 @@ class Router:
verbose_router_logger.info(
"Successful fallback b/w models."
)
# callback for successfull_fallback_event():
await log_success_fallback_event(
original_model_group=original_model_group,
kwargs=kwargs,
)
return response
except Exception as e:
await log_failure_fallback_event(
original_model_group=original_model_group,
kwargs=kwargs,
)
raise e
except Exception as new_exception:
verbose_router_logger.error(

View file

@ -0,0 +1,33 @@
from typing import TYPE_CHECKING, Any
import litellm
from litellm._logging import verbose_router_logger
from litellm.integrations.custom_logger import CustomLogger
async def log_success_fallback_event(original_model_group: str, kwargs: dict):
for _callback in litellm.callbacks:
if isinstance(_callback, CustomLogger):
try:
await _callback.log_success_fallback_event(
original_model_group=original_model_group, kwargs=kwargs
)
except Exception as e:
verbose_router_logger.error(
f"Error in log_success_fallback_event: {(str(e))}"
)
pass
async def log_failure_fallback_event(original_model_group: str, kwargs: dict):
for _callback in litellm.callbacks:
if isinstance(_callback, CustomLogger):
try:
await _callback.log_failure_fallback_event(
original_model_group=original_model_group, kwargs=kwargs
)
except Exception as e:
verbose_router_logger.error(
f"Error in log_failure_fallback_event: {(str(e))}"
)
pass