forked from phoenix/litellm-mirror
[Feat] Add proxy level prometheus metrics (#5789)
* add Proxy Level Tracking Metrics doc * update service logger * prometheus - track litellm_proxy_failed_requests_metric * use REQUESTED_MODEL * fix prom request_data
This commit is contained in:
parent
ae41c0df82
commit
91e58d9049
10 changed files with 166 additions and 18 deletions
|
@ -41,7 +41,10 @@ class MyCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/observabilit
|
|||
return data
|
||||
|
||||
async def async_post_call_failure_hook(
|
||||
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
|
||||
self,
|
||||
request_data: dict,
|
||||
original_exception: Exception,
|
||||
user_api_key_dict: UserAPIKeyAuth
|
||||
):
|
||||
pass
|
||||
|
||||
|
|
|
@ -70,6 +70,16 @@ Use this for for tracking per [user, key, team, etc.](virtual_keys)
|
|||
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |
|
||||
|
||||
|
||||
### Proxy Level Tracking Metrics
|
||||
|
||||
Use this to track overall LiteLLM Proxy usage.
|
||||
- Track Actual traffic rate to proxy
|
||||
- Number of **client side** requests and failures for requests made to proxy
|
||||
|
||||
| Metric Name | Description |
|
||||
|----------------------|--------------------------------------|
|
||||
| `litellm_proxy_failed_requests_metric` | Total number of failed responses from proxy - the client did not get a success response from litellm proxy `"user", "key", "model", "team", "end-user"` |
|
||||
| `litellm_proxy_total_requests_metric` | Total number of requests made to the proxy server - track number of client side requests `"user", "key", "model", "team", "end-user"` |
|
||||
|
||||
### LLM API / Provider Metrics
|
||||
|
||||
|
|
|
@ -212,13 +212,18 @@ class ServiceLogging(CustomLogger):
|
|||
)
|
||||
|
||||
async def async_post_call_failure_hook(
|
||||
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
|
||||
self,
|
||||
request_data: dict,
|
||||
original_exception: Exception,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
):
|
||||
"""
|
||||
Hook to track failed litellm-service calls
|
||||
"""
|
||||
return await super().async_post_call_failure_hook(
|
||||
original_exception, user_api_key_dict
|
||||
request_data,
|
||||
original_exception,
|
||||
user_api_key_dict,
|
||||
)
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
|
|
|
@ -125,7 +125,10 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
|||
pass
|
||||
|
||||
async def async_post_call_failure_hook(
|
||||
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
|
||||
self,
|
||||
request_data: dict,
|
||||
original_exception: Exception,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
):
|
||||
pass
|
||||
|
||||
|
|
|
@ -221,7 +221,10 @@ class OpenTelemetry(CustomLogger):
|
|||
service_logging_span.end(end_time=_end_time_ns)
|
||||
|
||||
async def async_post_call_failure_hook(
|
||||
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
|
||||
self,
|
||||
request_data: dict,
|
||||
original_exception: Exception,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
):
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.trace import Status, StatusCode
|
||||
|
|
|
@ -15,6 +15,7 @@ import requests # type: ignore
|
|||
import litellm
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
|
||||
|
||||
class PrometheusLogger(CustomLogger):
|
||||
|
@ -38,28 +39,30 @@ class PrometheusLogger(CustomLogger):
|
|||
)
|
||||
return
|
||||
|
||||
self.litellm_llm_api_failed_requests_metric = Counter(
|
||||
name="litellm_llm_api_failed_requests_metric",
|
||||
documentation="Total number of failed LLM API calls via litellm - track fails per API Key, team, user",
|
||||
REQUESTED_MODEL = "requested_model"
|
||||
|
||||
self.litellm_proxy_failed_requests_metric = Counter(
|
||||
name="litellm_proxy_failed_requests_metric",
|
||||
documentation="Total number of failed responses from proxy - the client did not get a success response from litellm proxy",
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"api_key_alias",
|
||||
"model",
|
||||
REQUESTED_MODEL,
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
self.litellm_requests_metric = Counter(
|
||||
name="litellm_requests_metric",
|
||||
documentation="Total number of LLM calls to litellm - track total per API Key, team, user",
|
||||
self.litellm_proxy_total_requests_metric = Counter(
|
||||
name="litellm_proxy_total_requests_metric",
|
||||
documentation="Total number of requests made to the proxy server - track number of client side requests",
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"api_key_alias",
|
||||
"model",
|
||||
REQUESTED_MODEL,
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
|
@ -201,17 +204,17 @@ class PrometheusLogger(CustomLogger):
|
|||
self.litellm_deployment_success_responses = Counter(
|
||||
name="litellm_deployment_success_responses",
|
||||
documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
|
||||
labelnames=["requested_model"] + _logged_llm_labels,
|
||||
labelnames=[REQUESTED_MODEL] + _logged_llm_labels,
|
||||
)
|
||||
self.litellm_deployment_failure_responses = Counter(
|
||||
name="litellm_deployment_failure_responses",
|
||||
documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api",
|
||||
labelnames=["requested_model", "exception_status"] + _logged_llm_labels,
|
||||
labelnames=[REQUESTED_MODEL, "exception_status"] + _logged_llm_labels,
|
||||
)
|
||||
self.litellm_deployment_total_requests = Counter(
|
||||
name="litellm_deployment_total_requests",
|
||||
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
|
||||
labelnames=["requested_model"] + _logged_llm_labels,
|
||||
labelnames=[REQUESTED_MODEL] + _logged_llm_labels,
|
||||
)
|
||||
|
||||
# Deployment Latency tracking
|
||||
|
@ -232,6 +235,34 @@ class PrometheusLogger(CustomLogger):
|
|||
["primary_model", "fallback_model"],
|
||||
)
|
||||
|
||||
self.litellm_llm_api_failed_requests_metric = Counter(
|
||||
name="litellm_llm_api_failed_requests_metric",
|
||||
documentation="deprecated - use litellm_proxy_failed_requests_metric",
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"api_key_alias",
|
||||
"model",
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
self.litellm_requests_metric = Counter(
|
||||
name="litellm_requests_metric",
|
||||
documentation="deprecated - use litellm_proxy_total_requests_metric. Total number of LLM calls to litellm - track total per API Key, team, user",
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"api_key_alias",
|
||||
"model",
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print_verbose(f"Got exception on init prometheus client {str(e)}")
|
||||
raise e
|
||||
|
@ -440,6 +471,76 @@ class PrometheusLogger(CustomLogger):
|
|||
pass
|
||||
pass
|
||||
|
||||
async def async_post_call_failure_hook(
|
||||
self,
|
||||
request_data: dict,
|
||||
original_exception: Exception,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
):
|
||||
"""
|
||||
Track client side failures
|
||||
|
||||
Proxy level tracking - failed client side requests
|
||||
|
||||
labelnames=[
|
||||
"end_user",
|
||||
"hashed_api_key",
|
||||
"api_key_alias",
|
||||
"model",
|
||||
"team",
|
||||
"team_alias",
|
||||
"user",
|
||||
],
|
||||
"""
|
||||
try:
|
||||
self.litellm_proxy_failed_requests_metric.labels(
|
||||
user_api_key_dict.end_user_id,
|
||||
user_api_key_dict.api_key,
|
||||
user_api_key_dict.key_alias,
|
||||
request_data.get("model", ""),
|
||||
user_api_key_dict.team_id,
|
||||
user_api_key_dict.team_alias,
|
||||
user_api_key_dict.user_id,
|
||||
).inc()
|
||||
|
||||
self.litellm_proxy_total_requests_metric.labels(
|
||||
user_api_key_dict.end_user_id,
|
||||
user_api_key_dict.api_key,
|
||||
user_api_key_dict.key_alias,
|
||||
request_data.get("model", ""),
|
||||
user_api_key_dict.team_id,
|
||||
user_api_key_dict.team_alias,
|
||||
user_api_key_dict.user_id,
|
||||
)
|
||||
pass
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"prometheus Layer Error(): Exception occured - {}".format(str(e))
|
||||
)
|
||||
pass
|
||||
|
||||
async def async_post_call_success_hook(
|
||||
self, data: dict, user_api_key_dict: UserAPIKeyAuth, response
|
||||
):
|
||||
"""
|
||||
Proxy level tracking - triggered when the proxy responds with a success response to the client
|
||||
"""
|
||||
try:
|
||||
self.litellm_proxy_total_requests_metric.labels(
|
||||
user_api_key_dict.end_user_id,
|
||||
user_api_key_dict.api_key,
|
||||
user_api_key_dict.key_alias,
|
||||
data.get("model", ""),
|
||||
user_api_key_dict.team_id,
|
||||
user_api_key_dict.team_alias,
|
||||
user_api_key_dict.user_id,
|
||||
).inc()
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"prometheus Layer Error(): Exception occured - {}".format(str(e))
|
||||
)
|
||||
pass
|
||||
|
||||
def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
|
||||
try:
|
||||
verbose_logger.debug("setting remaining tokens requests metric")
|
||||
|
|
|
@ -1121,6 +1121,7 @@ async def user_api_key_auth(
|
|||
if open_telemetry_logger is not None:
|
||||
await open_telemetry_logger.async_post_call_failure_hook( # type: ignore
|
||||
original_exception=e,
|
||||
request_data={},
|
||||
user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
|
||||
)
|
||||
|
||||
|
|
|
@ -35,7 +35,10 @@ class MyCustomHandler(
|
|||
return data
|
||||
|
||||
async def async_post_call_failure_hook(
|
||||
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
|
||||
self,
|
||||
request_data: dict,
|
||||
original_exception: Exception,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
):
|
||||
pass
|
||||
|
||||
|
|
|
@ -1,4 +1,19 @@
|
|||
model_list:
|
||||
- model_name: gemini-vision
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.5-pro
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
|
||||
vertex_project: "adroit-crow-413218"
|
||||
vertex_location: "us-central1"
|
||||
vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
|
||||
- model_name: gemini-vision
|
||||
litellm_params:
|
||||
model: vertex_ai/gemini-1.0-pro-vision-001
|
||||
api_base: https://exampleopenaiendpoint-production-c715.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
|
||||
vertex_project: "adroit-crow-413218"
|
||||
vertex_location: "us-central1"
|
||||
vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
|
||||
|
||||
- model_name: fake-azure-endpoint
|
||||
litellm_params:
|
||||
model: openai/429
|
||||
|
@ -6,3 +21,6 @@ model_list:
|
|||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["prometheus"]
|
||||
|
||||
|
|
|
@ -632,9 +632,9 @@ class ProxyLogging:
|
|||
|
||||
async def post_call_failure_hook(
|
||||
self,
|
||||
request_data: dict,
|
||||
original_exception: Exception,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
request_data: dict,
|
||||
):
|
||||
"""
|
||||
Allows users to raise custom exceptions/log when a call fails, without having to deal with parsing Request body.
|
||||
|
@ -750,6 +750,7 @@ class ProxyLogging:
|
|||
_callback = callback # type: ignore
|
||||
if _callback is not None and isinstance(_callback, CustomLogger):
|
||||
await _callback.async_post_call_failure_hook(
|
||||
request_data=request_data,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
original_exception=original_exception,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue