(feat - proxy) Add status_code to litellm_proxy_total_requests_metric_total (#7293)

* fix _select_model_name_for_cost_calc docstring

* add STATUS_CODE  to prometheus

* test prometheus unit tests

* test_prometheus_unit_tests.py

* update Proxy Level Tracking Metrics docs

* fix test_proxy_failure_metrics

* fix test_proxy_failure_metrics
This commit is contained in:
Ishaan Jaff 2024-12-18 15:55:02 -08:00 committed by GitHub
parent 225e0581a7
commit 70883bc1b8
6 changed files with 75 additions and 41 deletions

View file

@ -77,7 +77,7 @@ Use this to track overall LiteLLM Proxy usage.
| Metric Name | Description | | Metric Name | Description |
|----------------------|--------------------------------------| |----------------------|--------------------------------------|
| `litellm_proxy_failed_requests_metric` | Total number of failed responses from proxy - the client did not get a success response from litellm proxy. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "exception_status", "exception_class"` | | `litellm_proxy_failed_requests_metric` | Total number of failed responses from proxy - the client did not get a success response from litellm proxy. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "exception_status", "exception_class"` |
| `litellm_proxy_total_requests_metric` | Total number of requests made to the proxy server - track number of client side requests. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "exception_status", "exception_class"` | | `litellm_proxy_total_requests_metric` | Total number of requests made to the proxy server - track number of client side requests. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "status_code"` |
## LLM API / Provider Metrics ## LLM API / Provider Metrics

View file

@ -65,6 +65,7 @@ class PrometheusLogger(CustomLogger):
"team", "team",
"team_alias", "team_alias",
"user", "user",
STATUS_CODE,
], ],
) )
@ -731,13 +732,14 @@ class PrometheusLogger(CustomLogger):
).inc() ).inc()
self.litellm_proxy_total_requests_metric.labels( self.litellm_proxy_total_requests_metric.labels(
user_api_key_dict.end_user_id, end_user=user_api_key_dict.end_user_id,
user_api_key_dict.api_key, hashed_api_key=user_api_key_dict.api_key,
user_api_key_dict.key_alias, api_key_alias=user_api_key_dict.key_alias,
request_data.get("model", ""), requested_model=request_data.get("model", ""),
user_api_key_dict.team_id, team=user_api_key_dict.team_id,
user_api_key_dict.team_alias, team_alias=user_api_key_dict.team_alias,
user_api_key_dict.user_id, user=user_api_key_dict.user_id,
status_code=str(getattr(original_exception, "status_code", None)),
).inc() ).inc()
pass pass
except Exception as e: except Exception as e:
@ -754,13 +756,14 @@ class PrometheusLogger(CustomLogger):
""" """
try: try:
self.litellm_proxy_total_requests_metric.labels( self.litellm_proxy_total_requests_metric.labels(
user_api_key_dict.end_user_id, end_user=user_api_key_dict.end_user_id,
user_api_key_dict.api_key, hashed_api_key=user_api_key_dict.api_key,
user_api_key_dict.key_alias, api_key_alias=user_api_key_dict.key_alias,
data.get("model", ""), requested_model=data.get("model", ""),
user_api_key_dict.team_id, team=user_api_key_dict.team_id,
user_api_key_dict.team_alias, team_alias=user_api_key_dict.team_alias,
user_api_key_dict.user_id, user=user_api_key_dict.user_id,
status_code="200",
).inc() ).inc()
except Exception as e: except Exception as e:
verbose_logger.exception( verbose_logger.exception(

View file

@ -1,14 +1,42 @@
model_list: model_list:
- model_name: gpt-4o - model_name: fake-openai-endpoint
litellm_params: litellm_params:
model: openai/gpt-4o model: openai/fake
api_key: os.environ/OPENAI_API_KEY api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["teamA"]
model_info:
id: "team-a-model"
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["teamB"]
model_info:
id: "team-b-model"
- model_name: rerank-english-v3.0
litellm_params:
model: cohere/rerank-english-v3.0
api_key: os.environ/COHERE_API_KEY
- model_name: fake-azure-endpoint
litellm_params:
model: openai/429
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app
- model_name: llava-hf
litellm_params:
model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
api_base: http://localhost:8000
api_key: fake-key
model_info:
supports_vision: True
litellm_settings: litellm_settings:
tag_budget_config: cache: true
product:chat-bot: # (Tag) callbacks: ["otel", "prometheus"]
max_budget: 0.000000000001 # (USD)
budget_duration: 1d # (Duration) router_settings:
product:chat-bot-2: # (Tag) enable_tag_filtering: True # 👈 Key Change
max_budget: 100 # (USD)
budget_duration: 1d # (Duration)

View file

@ -1,6 +1,7 @@
REQUESTED_MODEL = "requested_model" REQUESTED_MODEL = "requested_model"
EXCEPTION_STATUS = "exception_status" EXCEPTION_STATUS = "exception_status"
EXCEPTION_CLASS = "exception_class" EXCEPTION_CLASS = "exception_class"
STATUS_CODE = "status_code"
EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS] EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS]
LATENCY_BUCKETS = ( LATENCY_BUCKETS = (
0.005, 0.005,

View file

@ -485,13 +485,14 @@ async def test_async_post_call_failure_hook(prometheus_logger):
# Assert total requests metric was incremented with correct labels # Assert total requests metric was incremented with correct labels
prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with( prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with(
"test_end_user", end_user="test_end_user",
"test_key", hashed_api_key="test_key",
"test_alias", api_key_alias="test_alias",
"gpt-3.5-turbo", requested_model="gpt-3.5-turbo",
"test_team", team="test_team",
"test_team_alias", team_alias="test_team_alias",
"test_user", user="test_user",
status_code="429",
) )
prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once() prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once()
@ -527,13 +528,14 @@ async def test_async_post_call_success_hook(prometheus_logger):
# Assert total requests metric was incremented with correct labels # Assert total requests metric was incremented with correct labels
prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with( prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with(
"test_end_user", end_user="test_end_user",
"test_key", hashed_api_key="test_key",
"test_alias", api_key_alias="test_alias",
"gpt-3.5-turbo", requested_model="gpt-3.5-turbo",
"test_team", team="test_team",
"test_team_alias", team_alias="test_team_alias",
"test_user", user="test_user",
status_code="200",
) )
prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once() prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once()

View file

@ -110,7 +110,7 @@ async def test_proxy_failure_metrics():
assert expected_llm_deployment_failure assert expected_llm_deployment_failure
assert ( assert (
'litellm_proxy_total_requests_metric_total{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None",user="default_user_id"} 1.0' 'litellm_proxy_total_requests_metric_total{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",status_code="429",team="None",team_alias="None",user="default_user_id"} 1.0'
in metrics in metrics
) )