diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index 797e74b2b3..12065bdd86 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -77,7 +77,7 @@ Use this to track overall LiteLLM Proxy usage. | Metric Name | Description | |----------------------|--------------------------------------| | `litellm_proxy_failed_requests_metric` | Total number of failed responses from proxy - the client did not get a success response from litellm proxy. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "exception_status", "exception_class"` | -| `litellm_proxy_total_requests_metric` | Total number of requests made to the proxy server - track number of client side requests. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "exception_status", "exception_class"` | +| `litellm_proxy_total_requests_metric` | Total number of requests made to the proxy server - track number of client side requests. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "status_code"` | ## LLM API / Provider Metrics diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 73198d0ba7..c3d81f13e8 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -65,6 +65,7 @@ class PrometheusLogger(CustomLogger): "team", "team_alias", "user", + STATUS_CODE, ], ) @@ -731,13 +732,14 @@ class PrometheusLogger(CustomLogger): ).inc() self.litellm_proxy_total_requests_metric.labels( - user_api_key_dict.end_user_id, - user_api_key_dict.api_key, - user_api_key_dict.key_alias, - request_data.get("model", ""), - user_api_key_dict.team_id, - user_api_key_dict.team_alias, - user_api_key_dict.user_id, + end_user=user_api_key_dict.end_user_id, + hashed_api_key=user_api_key_dict.api_key, + api_key_alias=user_api_key_dict.key_alias, + requested_model=request_data.get("model", ""), + team=user_api_key_dict.team_id, + team_alias=user_api_key_dict.team_alias, + user=user_api_key_dict.user_id, + status_code=str(getattr(original_exception, "status_code", None)), ).inc() pass except Exception as e: @@ -754,13 +756,14 @@ class PrometheusLogger(CustomLogger): """ try: self.litellm_proxy_total_requests_metric.labels( - user_api_key_dict.end_user_id, - user_api_key_dict.api_key, - user_api_key_dict.key_alias, - data.get("model", ""), - user_api_key_dict.team_id, - user_api_key_dict.team_alias, - user_api_key_dict.user_id, + end_user=user_api_key_dict.end_user_id, + hashed_api_key=user_api_key_dict.api_key, + api_key_alias=user_api_key_dict.key_alias, + requested_model=data.get("model", ""), + team=user_api_key_dict.team_id, + team_alias=user_api_key_dict.team_alias, + user=user_api_key_dict.user_id, + status_code="200", ).inc() except Exception as e: verbose_logger.exception( diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 1a90d9090e..5a6aef7d2a 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,14 +1,42 @@ model_list: - - model_name: gpt-4o - litellm_params: - model: openai/gpt-4o - api_key: os.environ/OPENAI_API_KEY + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["teamA"] + model_info: + id: "team-a-model" + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["teamB"] + model_info: + id: "team-b-model" + - model_name: rerank-english-v3.0 + litellm_params: + model: cohere/rerank-english-v3.0 + api_key: os.environ/COHERE_API_KEY + - model_name: fake-azure-endpoint + litellm_params: + model: openai/429 + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app + - model_name: llava-hf + litellm_params: + model: openai/llava-hf/llava-v1.6-vicuna-7b-hf + api_base: http://localhost:8000 + api_key: fake-key + model_info: + supports_vision: True + + litellm_settings: - tag_budget_config: - product:chat-bot: # (Tag) - max_budget: 0.000000000001 # (USD) - budget_duration: 1d # (Duration) - product:chat-bot-2: # (Tag) - max_budget: 100 # (USD) - budget_duration: 1d # (Duration) \ No newline at end of file + cache: true + callbacks: ["otel", "prometheus"] + +router_settings: + enable_tag_filtering: True # 👈 Key Change \ No newline at end of file diff --git a/litellm/types/integrations/prometheus.py b/litellm/types/integrations/prometheus.py index d09ed96702..c5d6fc7ab6 100644 --- a/litellm/types/integrations/prometheus.py +++ b/litellm/types/integrations/prometheus.py @@ -1,6 +1,7 @@ REQUESTED_MODEL = "requested_model" EXCEPTION_STATUS = "exception_status" EXCEPTION_CLASS = "exception_class" +STATUS_CODE = "status_code" EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS] LATENCY_BUCKETS = ( 0.005, diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py index 494f83a654..19c183d738 100644 --- a/tests/logging_callback_tests/test_prometheus_unit_tests.py +++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py @@ -485,13 +485,14 @@ async def test_async_post_call_failure_hook(prometheus_logger): # Assert total requests metric was incremented with correct labels prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with( - "test_end_user", - "test_key", - "test_alias", - "gpt-3.5-turbo", - "test_team", - "test_team_alias", - "test_user", + end_user="test_end_user", + hashed_api_key="test_key", + api_key_alias="test_alias", + requested_model="gpt-3.5-turbo", + team="test_team", + team_alias="test_team_alias", + user="test_user", + status_code="429", ) prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once() @@ -527,13 +528,14 @@ async def test_async_post_call_success_hook(prometheus_logger): # Assert total requests metric was incremented with correct labels prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with( - "test_end_user", - "test_key", - "test_alias", - "gpt-3.5-turbo", - "test_team", - "test_team_alias", - "test_user", + end_user="test_end_user", + hashed_api_key="test_key", + api_key_alias="test_alias", + requested_model="gpt-3.5-turbo", + team="test_team", + team_alias="test_team_alias", + user="test_user", + status_code="200", ) prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once() diff --git a/tests/otel_tests/test_prometheus.py b/tests/otel_tests/test_prometheus.py index 0de1c98961..3c52781ce8 100644 --- a/tests/otel_tests/test_prometheus.py +++ b/tests/otel_tests/test_prometheus.py @@ -110,7 +110,7 @@ async def test_proxy_failure_metrics(): assert expected_llm_deployment_failure assert ( - 'litellm_proxy_total_requests_metric_total{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None",user="default_user_id"} 1.0' + 'litellm_proxy_total_requests_metric_total{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",status_code="429",team="None",team_alias="None",user="default_user_id"} 1.0' in metrics )