diff --git a/docs/my-website/docs/proxy/call_hooks.md b/docs/my-website/docs/proxy/call_hooks.md index 0afcb2158..6651393ef 100644 --- a/docs/my-website/docs/proxy/call_hooks.md +++ b/docs/my-website/docs/proxy/call_hooks.md @@ -41,7 +41,10 @@ class MyCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/observabilit return data async def async_post_call_failure_hook( - self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth + self, + request_data: dict, + original_exception: Exception, + user_api_key_dict: UserAPIKeyAuth ): pass diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index b8db542c9..1a0422714 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -70,6 +70,16 @@ Use this for for tracking per [user, key, team, etc.](virtual_keys) | `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` | +### Proxy Level Tracking Metrics + +Use this to track overall LiteLLM Proxy usage. +- Track Actual traffic rate to proxy +- Number of **client side** requests and failures for requests made to proxy + +| Metric Name | Description | +|----------------------|--------------------------------------| +| `litellm_proxy_failed_requests_metric` | Total number of failed responses from proxy - the client did not get a success response from litellm proxy `"user", "key", "model", "team", "end-user"` | +| `litellm_proxy_total_requests_metric` | Total number of requests made to the proxy server - track number of client side requests `"user", "key", "model", "team", "end-user"` | ### LLM API / Provider Metrics diff --git a/litellm/_service_logger.py b/litellm/_service_logger.py index 3c1caa1b7..6412ec801 100644 --- a/litellm/_service_logger.py +++ b/litellm/_service_logger.py @@ -212,13 +212,18 @@ class ServiceLogging(CustomLogger): ) async def async_post_call_failure_hook( - self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth + self, + request_data: dict, + original_exception: Exception, + user_api_key_dict: UserAPIKeyAuth, ): """ Hook to track failed litellm-service calls """ return await super().async_post_call_failure_hook( - original_exception, user_api_key_dict + request_data, + original_exception, + user_api_key_dict, ) async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index 0ea9d4a18..d6e72113e 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -125,7 +125,10 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac pass async def async_post_call_failure_hook( - self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth + self, + request_data: dict, + original_exception: Exception, + user_api_key_dict: UserAPIKeyAuth, ): pass diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index d432372db..27bd25d76 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -221,7 +221,10 @@ class OpenTelemetry(CustomLogger): service_logging_span.end(end_time=_end_time_ns) async def async_post_call_failure_hook( - self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth + self, + request_data: dict, + original_exception: Exception, + user_api_key_dict: UserAPIKeyAuth, ): from opentelemetry import trace from opentelemetry.trace import Status, StatusCode diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index ec5e0522c..d5b6bc2b4 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -15,6 +15,7 @@ import requests # type: ignore import litellm from litellm._logging import print_verbose, verbose_logger from litellm.integrations.custom_logger import CustomLogger +from litellm.proxy._types import UserAPIKeyAuth class PrometheusLogger(CustomLogger): @@ -38,28 +39,30 @@ class PrometheusLogger(CustomLogger): ) return - self.litellm_llm_api_failed_requests_metric = Counter( - name="litellm_llm_api_failed_requests_metric", - documentation="Total number of failed LLM API calls via litellm - track fails per API Key, team, user", + REQUESTED_MODEL = "requested_model" + + self.litellm_proxy_failed_requests_metric = Counter( + name="litellm_proxy_failed_requests_metric", + documentation="Total number of failed responses from proxy - the client did not get a success response from litellm proxy", labelnames=[ "end_user", "hashed_api_key", "api_key_alias", - "model", + REQUESTED_MODEL, "team", "team_alias", "user", ], ) - self.litellm_requests_metric = Counter( - name="litellm_requests_metric", - documentation="Total number of LLM calls to litellm - track total per API Key, team, user", + self.litellm_proxy_total_requests_metric = Counter( + name="litellm_proxy_total_requests_metric", + documentation="Total number of requests made to the proxy server - track number of client side requests", labelnames=[ "end_user", "hashed_api_key", "api_key_alias", - "model", + REQUESTED_MODEL, "team", "team_alias", "user", @@ -201,17 +204,17 @@ class PrometheusLogger(CustomLogger): self.litellm_deployment_success_responses = Counter( name="litellm_deployment_success_responses", documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm", - labelnames=["requested_model"] + _logged_llm_labels, + labelnames=[REQUESTED_MODEL] + _logged_llm_labels, ) self.litellm_deployment_failure_responses = Counter( name="litellm_deployment_failure_responses", documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api", - labelnames=["requested_model", "exception_status"] + _logged_llm_labels, + labelnames=[REQUESTED_MODEL, "exception_status"] + _logged_llm_labels, ) self.litellm_deployment_total_requests = Counter( name="litellm_deployment_total_requests", documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure", - labelnames=["requested_model"] + _logged_llm_labels, + labelnames=[REQUESTED_MODEL] + _logged_llm_labels, ) # Deployment Latency tracking @@ -232,6 +235,34 @@ class PrometheusLogger(CustomLogger): ["primary_model", "fallback_model"], ) + self.litellm_llm_api_failed_requests_metric = Counter( + name="litellm_llm_api_failed_requests_metric", + documentation="deprecated - use litellm_proxy_failed_requests_metric", + labelnames=[ + "end_user", + "hashed_api_key", + "api_key_alias", + "model", + "team", + "team_alias", + "user", + ], + ) + + self.litellm_requests_metric = Counter( + name="litellm_requests_metric", + documentation="deprecated - use litellm_proxy_total_requests_metric. Total number of LLM calls to litellm - track total per API Key, team, user", + labelnames=[ + "end_user", + "hashed_api_key", + "api_key_alias", + "model", + "team", + "team_alias", + "user", + ], + ) + except Exception as e: print_verbose(f"Got exception on init prometheus client {str(e)}") raise e @@ -440,6 +471,76 @@ class PrometheusLogger(CustomLogger): pass pass + async def async_post_call_failure_hook( + self, + request_data: dict, + original_exception: Exception, + user_api_key_dict: UserAPIKeyAuth, + ): + """ + Track client side failures + + Proxy level tracking - failed client side requests + + labelnames=[ + "end_user", + "hashed_api_key", + "api_key_alias", + "model", + "team", + "team_alias", + "user", + ], + """ + try: + self.litellm_proxy_failed_requests_metric.labels( + user_api_key_dict.end_user_id, + user_api_key_dict.api_key, + user_api_key_dict.key_alias, + request_data.get("model", ""), + user_api_key_dict.team_id, + user_api_key_dict.team_alias, + user_api_key_dict.user_id, + ).inc() + + self.litellm_proxy_total_requests_metric.labels( + user_api_key_dict.end_user_id, + user_api_key_dict.api_key, + user_api_key_dict.key_alias, + request_data.get("model", ""), + user_api_key_dict.team_id, + user_api_key_dict.team_alias, + user_api_key_dict.user_id, + ) + pass + except Exception as e: + verbose_logger.exception( + "prometheus Layer Error(): Exception occured - {}".format(str(e)) + ) + pass + + async def async_post_call_success_hook( + self, data: dict, user_api_key_dict: UserAPIKeyAuth, response + ): + """ + Proxy level tracking - triggered when the proxy responds with a success response to the client + """ + try: + self.litellm_proxy_total_requests_metric.labels( + user_api_key_dict.end_user_id, + user_api_key_dict.api_key, + user_api_key_dict.key_alias, + data.get("model", ""), + user_api_key_dict.team_id, + user_api_key_dict.team_alias, + user_api_key_dict.user_id, + ).inc() + except Exception as e: + verbose_logger.exception( + "prometheus Layer Error(): Exception occured - {}".format(str(e)) + ) + pass + def set_llm_deployment_failure_metrics(self, request_kwargs: dict): try: verbose_logger.debug("setting remaining tokens requests metric") diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py index 58072576e..4a62321d9 100644 --- a/litellm/proxy/auth/user_api_key_auth.py +++ b/litellm/proxy/auth/user_api_key_auth.py @@ -1121,6 +1121,7 @@ async def user_api_key_auth( if open_telemetry_logger is not None: await open_telemetry_logger.async_post_call_failure_hook( # type: ignore original_exception=e, + request_data={}, user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span), ) diff --git a/litellm/proxy/custom_callbacks1.py b/litellm/proxy/custom_callbacks1.py index 7f6eafb3c..921111127 100644 --- a/litellm/proxy/custom_callbacks1.py +++ b/litellm/proxy/custom_callbacks1.py @@ -35,7 +35,10 @@ class MyCustomHandler( return data async def async_post_call_failure_hook( - self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth + self, + request_data: dict, + original_exception: Exception, + user_api_key_dict: UserAPIKeyAuth, ): pass diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 89a0bb564..eaf94974b 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,4 +1,19 @@ model_list: + - model_name: gemini-vision + litellm_params: + model: vertex_ai/gemini-1.5-pro + api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001 + vertex_project: "adroit-crow-413218" + vertex_location: "us-central1" + vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json" + - model_name: gemini-vision + litellm_params: + model: vertex_ai/gemini-1.0-pro-vision-001 + api_base: https://exampleopenaiendpoint-production-c715.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001 + vertex_project: "adroit-crow-413218" + vertex_location: "us-central1" + vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json" + - model_name: fake-azure-endpoint litellm_params: model: openai/429 @@ -6,3 +21,6 @@ model_list: api_base: https://exampleopenaiendpoint-production.up.railway.app +litellm_settings: + success_callback: ["prometheus"] + diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 44ae71b15..389cc3aa2 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -632,9 +632,9 @@ class ProxyLogging: async def post_call_failure_hook( self, + request_data: dict, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth, - request_data: dict, ): """ Allows users to raise custom exceptions/log when a call fails, without having to deal with parsing Request body. @@ -750,6 +750,7 @@ class ProxyLogging: _callback = callback # type: ignore if _callback is not None and isinstance(_callback, CustomLogger): await _callback.async_post_call_failure_hook( + request_data=request_data, user_api_key_dict=user_api_key_dict, original_exception=original_exception, )