diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index da12d28c45..e7e4a8cdb2 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -3,7 +3,7 @@ # On success, log events to Prometheus import sys from datetime import datetime, timedelta -from typing import Optional +from typing import List, Optional from litellm._logging import print_verbose, verbose_logger from litellm.integrations.custom_logger import CustomLogger @@ -52,48 +52,21 @@ class PrometheusLogger(CustomLogger): self.litellm_proxy_total_requests_metric = Counter( name="litellm_proxy_total_requests_metric", documentation="Total number of requests made to the proxy server - track number of client side requests", - labelnames=[ - "end_user", - "hashed_api_key", - "api_key_alias", - REQUESTED_MODEL, - "team", - "team_alias", - "user", - STATUS_CODE, - ], + labelnames=PrometheusMetricLabels.litellm_proxy_total_requests_metric.value, ) # request latency metrics self.litellm_request_total_latency_metric = Histogram( "litellm_request_total_latency_metric", "Total latency (seconds) for a request to LiteLLM", - labelnames=[ - UserAPIKeyLabelNames.END_USER.value, - UserAPIKeyLabelNames.API_KEY_HASH.value, - UserAPIKeyLabelNames.API_KEY_ALIAS.value, - REQUESTED_MODEL, - UserAPIKeyLabelNames.TEAM.value, - UserAPIKeyLabelNames.TEAM_ALIAS.value, - UserAPIKeyLabelNames.USER.value, - UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value, - ], + labelnames=PrometheusMetricLabels.litellm_request_total_latency_metric.value, buckets=LATENCY_BUCKETS, ) self.litellm_llm_api_latency_metric = Histogram( "litellm_llm_api_latency_metric", "Total latency (seconds) for a models LLM API call", - labelnames=[ - UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value, - UserAPIKeyLabelNames.API_KEY_HASH.value, - UserAPIKeyLabelNames.API_KEY_ALIAS.value, - UserAPIKeyLabelNames.TEAM.value, - UserAPIKeyLabelNames.TEAM_ALIAS.value, - UserAPIKeyLabelNames.REQUESTED_MODEL.value, - UserAPIKeyLabelNames.END_USER.value, - UserAPIKeyLabelNames.USER.value, - ], + labelnames=PrometheusMetricLabels.litellm_llm_api_latency_metric.value, buckets=LATENCY_BUCKETS, ) @@ -419,6 +392,17 @@ class PrometheusLogger(CustomLogger): f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}" ) + enum_values = UserAPIKeyLabelValues( + end_user=end_user_id, + hashed_api_key=user_api_key, + api_key_alias=user_api_key_alias, + requested_model=model, + team=user_api_team, + team_alias=user_api_team_alias, + user=user_id, + status_code="200", + ) + if ( user_api_key is not None and isinstance(user_api_key, str) @@ -494,16 +478,11 @@ class PrometheusLogger(CustomLogger): if ( standard_logging_payload["stream"] is True ): # log successful streaming requests from logging event hook. - self.litellm_proxy_total_requests_metric.labels( - end_user=end_user_id, - hashed_api_key=user_api_key, - api_key_alias=user_api_key_alias, - requested_model=model, - team=user_api_team, - team_alias=user_api_team_alias, - user=user_id, - status_code="200", - ).inc() + _labels = prometheus_label_factory( + supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_metric.value, + enum_values=enum_values, + ) + self.litellm_proxy_total_requests_metric.labels(**_labels).inc() def _increment_token_metrics( self, @@ -683,6 +662,24 @@ class PrometheusLogger(CustomLogger): completion_start_time = kwargs.get("completion_start_time", None) + enum_values = UserAPIKeyLabelValues( + end_user=standard_logging_payload["metadata"]["user_api_key_end_user_id"], + user=standard_logging_payload["metadata"]["user_api_key_user_id"], + hashed_api_key=user_api_key, + api_key_alias=user_api_key_alias, + team=user_api_team, + team_alias=user_api_team_alias, + requested_model=standard_logging_payload["model_group"], + model=model, + litellm_model_name=standard_logging_payload["model_group"], + tags=standard_logging_payload["request_tags"], + model_id=standard_logging_payload["model_id"], + api_base=standard_logging_payload["api_base"], + api_provider=standard_logging_payload["custom_llm_provider"], + exception_status=None, + exception_class=None, + ) + if ( completion_start_time is not None and isinstance(completion_start_time, datetime) @@ -708,46 +705,25 @@ class PrometheusLogger(CustomLogger): ): api_call_total_time: timedelta = end_time - api_call_start_time api_call_total_time_seconds = api_call_total_time.total_seconds() - self.litellm_llm_api_latency_metric.labels( - **{ - UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value: model, - UserAPIKeyLabelNames.API_KEY_HASH.value: user_api_key, - UserAPIKeyLabelNames.API_KEY_ALIAS.value: user_api_key_alias, - UserAPIKeyLabelNames.TEAM.value: user_api_team, - UserAPIKeyLabelNames.TEAM_ALIAS.value: user_api_team_alias, - UserAPIKeyLabelNames.USER.value: standard_logging_payload[ - "metadata" - ]["user_api_key_user_id"], - UserAPIKeyLabelNames.END_USER.value: standard_logging_payload[ - "metadata" - ]["user_api_key_end_user_id"], - UserAPIKeyLabelNames.REQUESTED_MODEL.value: standard_logging_payload[ - "model_group" - ], - } - ).observe(api_call_total_time_seconds) + _labels = prometheus_label_factory( + supported_enum_labels=PrometheusMetricLabels.litellm_llm_api_latency_metric.value, + enum_values=enum_values, + ) + self.litellm_llm_api_latency_metric.labels(**_labels).observe( + api_call_total_time_seconds + ) # total request latency if start_time is not None and isinstance(start_time, datetime): total_time: timedelta = end_time - start_time total_time_seconds = total_time.total_seconds() - - self.litellm_request_total_latency_metric.labels( - **{ - UserAPIKeyLabelNames.END_USER.value: standard_logging_payload[ - "metadata" - ]["user_api_key_end_user_id"], - UserAPIKeyLabelNames.API_KEY_HASH.value: user_api_key, - UserAPIKeyLabelNames.API_KEY_ALIAS.value: user_api_key_alias, - REQUESTED_MODEL: standard_logging_payload["model_group"], - UserAPIKeyLabelNames.TEAM.value: user_api_team, - UserAPIKeyLabelNames.TEAM_ALIAS.value: user_api_team_alias, - UserAPIKeyLabelNames.USER.value: standard_logging_payload[ - "metadata" - ]["user_api_key_user_id"], - UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value: model, - } - ).observe(total_time_seconds) + _labels = prometheus_label_factory( + supported_enum_labels=PrometheusMetricLabels.litellm_request_total_latency_metric.value, + enum_values=enum_values, + ) + self.litellm_request_total_latency_metric.labels(**_labels).observe( + total_time_seconds + ) async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): from litellm.types.utils import StandardLoggingPayload @@ -813,6 +789,18 @@ class PrometheusLogger(CustomLogger): ] + EXCEPTION_LABELS, """ try: + enum_values = UserAPIKeyLabelValues( + end_user=user_api_key_dict.end_user_id, + user=user_api_key_dict.user_id, + hashed_api_key=user_api_key_dict.api_key, + api_key_alias=user_api_key_dict.key_alias, + team=user_api_key_dict.team_id, + team_alias=user_api_key_dict.team_alias, + requested_model=request_data.get("model", ""), + status_code=str(getattr(original_exception, "status_code", None)), + exception_class=str(original_exception.__class__.__name__), + ) + self.litellm_proxy_failed_requests_metric.labels( end_user=user_api_key_dict.end_user_id, hashed_api_key=user_api_key_dict.api_key, @@ -825,16 +813,11 @@ class PrometheusLogger(CustomLogger): exception_class=str(original_exception.__class__.__name__), ).inc() - self.litellm_proxy_total_requests_metric.labels( - end_user=user_api_key_dict.end_user_id, - hashed_api_key=user_api_key_dict.api_key, - api_key_alias=user_api_key_dict.key_alias, - requested_model=request_data.get("model", ""), - team=user_api_key_dict.team_id, - team_alias=user_api_key_dict.team_alias, - user=user_api_key_dict.user_id, - status_code=str(getattr(original_exception, "status_code", None)), - ).inc() + _labels = prometheus_label_factory( + supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_metric.value, + enum_values=enum_values, + ) + self.litellm_proxy_total_requests_metric.labels(**_labels).inc() pass except Exception as e: verbose_logger.exception( @@ -849,7 +832,7 @@ class PrometheusLogger(CustomLogger): Proxy level tracking - triggered when the proxy responds with a success response to the client """ try: - self.litellm_proxy_total_requests_metric.labels( + enum_values = UserAPIKeyLabelValues( end_user=user_api_key_dict.end_user_id, hashed_api_key=user_api_key_dict.api_key, api_key_alias=user_api_key_dict.key_alias, @@ -858,7 +841,12 @@ class PrometheusLogger(CustomLogger): team_alias=user_api_key_dict.team_alias, user=user_api_key_dict.user_id, status_code="200", - ).inc() + ) + _labels = prometheus_label_factory( + supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_metric.value, + enum_values=enum_values, + ) + self.litellm_proxy_total_requests_metric.labels(**_labels).inc() except Exception as e: verbose_logger.exception( "prometheus Layer Error(): Exception occured - {}".format(str(e)) @@ -1278,3 +1266,30 @@ class PrometheusLogger(CustomLogger): return max_budget return max_budget - spend + + +def prometheus_label_factory( + supported_enum_labels: List[str], enum_values: UserAPIKeyLabelValues +) -> dict: + """ + Returns a dictionary of label + values for prometheus. + + Ensures end_user param is not sent to prometheus if it is not supported. + """ + # Extract dictionary from Pydantic object + enum_dict = enum_values.model_dump() + + # Filter supported labels + filtered_labels = { + label: value + for label, value in enum_dict.items() + if label in supported_enum_labels + } + + if UserAPIKeyLabelNames.END_USER.value in filtered_labels: + filtered_labels["end_user"] = get_end_user_id_for_cost_tracking( + litellm_params={"user_api_key_end_user_id": enum_values.end_user}, + service_type="prometheus", + ) + + return filtered_labels diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index daa838e2c1..ee769bb7eb 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -12,7 +12,7 @@ import time import traceback import uuid from datetime import datetime as dt_object -from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast from pydantic import BaseModel @@ -2983,6 +2983,7 @@ def get_standard_logging_object_payload( cache_hit=cache_hit, stream=stream, status=status, + custom_llm_provider=cast(Optional[str], kwargs.get("custom_llm_provider")), saved_cache_cost=saved_cache_cost, startTime=start_time_float, endTime=end_time_float, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 4e0fc27127..18a21b1d0b 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -16,4 +16,5 @@ model_list: mode: audio_transcription litellm_settings: - callbacks: ["prometheus"] \ No newline at end of file + callbacks: ["prometheus"] + disable_end_user_cost_tracking_prometheus_only: true \ No newline at end of file diff --git a/litellm/proxy/example_config_yaml/otel_test_config.yaml b/litellm/proxy/example_config_yaml/otel_test_config.yaml index fae3ee3dae..d3f1236159 100644 --- a/litellm/proxy/example_config_yaml/otel_test_config.yaml +++ b/litellm/proxy/example_config_yaml/otel_test_config.yaml @@ -37,6 +37,7 @@ model_list: litellm_settings: cache: true callbacks: ["otel", "prometheus"] + disable_end_user_cost_tracking_prometheus_only: True guardrails: - guardrail_name: "aporia-pre-guard" diff --git a/litellm/types/integrations/prometheus.py b/litellm/types/integrations/prometheus.py index ec4125e520..52ef6a8a7b 100644 --- a/litellm/types/integrations/prometheus.py +++ b/litellm/types/integrations/prometheus.py @@ -1,4 +1,7 @@ from enum import Enum +from typing import List, Optional, Union + +from pydantic import BaseModel, Field REQUESTED_MODEL = "requested_model" EXCEPTION_STATUS = "exception_status" @@ -61,3 +64,82 @@ class UserAPIKeyLabelNames(Enum): API_PROVIDER = "api_provider" EXCEPTION_STATUS = EXCEPTION_STATUS EXCEPTION_CLASS = EXCEPTION_CLASS + STATUS_CODE = "status_code" + + +class PrometheusMetricLabels(Enum): + litellm_llm_api_latency_metric = [ + UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.REQUESTED_MODEL.value, + UserAPIKeyLabelNames.END_USER.value, + UserAPIKeyLabelNames.USER.value, + ] + + litellm_request_total_latency_metric = [ + UserAPIKeyLabelNames.END_USER.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.REQUESTED_MODEL.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.USER.value, + UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value, + ] + + litellm_proxy_total_requests_metric = [ + UserAPIKeyLabelNames.END_USER.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.REQUESTED_MODEL.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.USER.value, + UserAPIKeyLabelNames.STATUS_CODE.value, + ] + + +from typing import List, Optional + +from pydantic import BaseModel, Field + + +class UserAPIKeyLabelValues(BaseModel): + end_user: Optional[str] = None + user: Optional[str] = None + hashed_api_key: Optional[str] = None + api_key_alias: Optional[str] = None + team: Optional[str] = None + team_alias: Optional[str] = None + requested_model: Optional[str] = None + model: Optional[str] = None + litellm_model_name: Optional[str] = None + tags: List[str] = [] + model_id: Optional[str] = None + api_base: Optional[str] = None + api_provider: Optional[str] = None + exception_status: Optional[str] = None + exception_class: Optional[str] = None + status_code: Optional[str] = None + + class Config: + fields = { + "end_user": {"alias": UserAPIKeyLabelNames.END_USER}, + "user": {"alias": UserAPIKeyLabelNames.USER}, + "hashed_api_key": {"alias": UserAPIKeyLabelNames.API_KEY_HASH}, + "api_key_alias": {"alias": UserAPIKeyLabelNames.API_KEY_ALIAS}, + "team": {"alias": UserAPIKeyLabelNames.TEAM}, + "team_alias": {"alias": UserAPIKeyLabelNames.TEAM_ALIAS}, + "requested_model": {"alias": UserAPIKeyLabelNames.REQUESTED_MODEL}, + "model": {"alias": UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME}, + "litellm_model_name": {"alias": UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME}, + "model_id": {"alias": UserAPIKeyLabelNames.MODEL_ID}, + "api_base": {"alias": UserAPIKeyLabelNames.API_BASE}, + "api_provider": {"alias": UserAPIKeyLabelNames.API_PROVIDER}, + "exception_status": {"alias": UserAPIKeyLabelNames.EXCEPTION_STATUS}, + "exception_class": {"alias": UserAPIKeyLabelNames.EXCEPTION_CLASS}, + "status_code": {"alias": UserAPIKeyLabelNames.STATUS_CODE}, + } diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 621e27221e..934b56e486 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -1513,6 +1513,7 @@ class StandardLoggingPayload(TypedDict): StandardLoggingModelCostFailureDebugInformation ] status: StandardLoggingPayloadStatus + custom_llm_provider: Optional[str] total_tokens: int prompt_tokens: int completion_tokens: int diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py index 8caf5d079f..5ccbf5c02b 100644 --- a/tests/logging_callback_tests/test_prometheus_unit_tests.py +++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py @@ -62,6 +62,7 @@ def create_standard_logging_payload() -> StandardLoggingPayload: model="gpt-3.5-turbo", model_id="model-123", model_group="openai-gpt", + custom_llm_provider="openai", api_base="https://api.openai.com", metadata=StandardLoggingMetadata( user_api_key_hash="test_hash", @@ -793,3 +794,29 @@ def test_increment_deployment_cooled_down(prometheus_logger): "gpt-3.5-turbo", "model-123", "https://api.openai.com", "openai", "429" ) prometheus_logger.litellm_deployment_cooled_down.labels().inc.assert_called_once() + + +@pytest.mark.parametrize("disable_end_user_tracking", [True, False]) +def test_prometheus_factory(monkeypatch, disable_end_user_tracking): + from litellm.integrations.prometheus import prometheus_label_factory + from litellm.types.integrations.prometheus import UserAPIKeyLabelValues + + monkeypatch.setattr( + "litellm.disable_end_user_cost_tracking_prometheus_only", + disable_end_user_tracking, + ) + + enum_values = UserAPIKeyLabelValues( + end_user="test_end_user", + api_key_hash="test_hash", + api_key_alias="test_alias", + ) + supported_labels = ["end_user", "api_key_hash", "api_key_alias"] + returned_dict = prometheus_label_factory( + supported_enum_labels=supported_labels, enum_values=enum_values + ) + + if disable_end_user_tracking: + assert returned_dict["end_user"] == None + else: + assert returned_dict["end_user"] == "test_end_user" diff --git a/tests/otel_tests/test_prometheus.py b/tests/otel_tests/test_prometheus.py index 9bb9ae8d4c..b8d3bd20b3 100644 --- a/tests/otel_tests/test_prometheus.py +++ b/tests/otel_tests/test_prometheus.py @@ -13,6 +13,8 @@ sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path +END_USER_ID = "my-test-user-34" + async def make_bad_chat_completion_request(session, key): url = "http://0.0.0.0:4000/chat/completions" @@ -41,6 +43,7 @@ async def make_good_chat_completion_request(session, key): "model": "fake-openai-endpoint", "messages": [{"role": "user", "content": f"Hello {uuid.uuid4()}"}], "tags": ["teamB"], + "user": END_USER_ID, # test if disable end user tracking for prometheus works } async with session.post(url, headers=headers, json=data) as response: status = response.status @@ -143,6 +146,8 @@ async def test_proxy_success_metrics(): print("/metrics", metrics) + assert END_USER_ID not in metrics + # Check if the success metric is present and correct assert ( 'litellm_request_total_latency_metric_bucket{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",requested_model="fake-openai-endpoint",team="None",team_alias="None",user="default_user_id"}'