forked from phoenix/litellm-mirror
Merge pull request #5463 from BerriAI/litellm_track_error_per_model
[Feat - Prometheus] - Track error_code, model metric
This commit is contained in:
commit
d9f3ac5159
3 changed files with 35 additions and 13 deletions
|
@ -58,6 +58,15 @@ http://localhost:4000/metrics
|
|||
|
||||
## 📈 Metrics Tracked
|
||||
|
||||
### Error Metrics
|
||||
|
||||
| Metric Name | Description |
|
||||
|----------------------|--------------------------------------|
|
||||
| `litellm_error_code_metric_total` | Total number of errors by error code and model |
|
||||
|
||||
This metric provides a count of errors encountered, categorized by error code and model. For example:
|
||||
|
||||
|
||||
|
||||
### Proxy Requests / Spend Metrics
|
||||
|
||||
|
@ -66,7 +75,12 @@ http://localhost:4000/metrics
|
|||
| `litellm_requests_metric` | Number of requests made, per `"user", "key", "model", "team", "end-user"` |
|
||||
| `litellm_spend_metric` | Total Spend, per `"user", "key", "model", "team", "end-user"` |
|
||||
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |
|
||||
|
||||
### Error Monitoring Metrics
|
||||
|
||||
| Metric Name | Description |
|
||||
| `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` |
|
||||
| `litellm_error_code_metric_total` | Total number of errors by error code and model |
|
||||
|
||||
### Request Latency Metrics
|
||||
|
||||
|
|
|
@ -138,6 +138,13 @@ class PrometheusLogger(CustomLogger):
|
|||
labelnames=["hashed_api_key", "api_key_alias", "model"],
|
||||
)
|
||||
|
||||
# New metric for tracking error codes and models
|
||||
self.litellm_error_code_metric = Counter(
|
||||
"litellm_error_code_metric",
|
||||
"Total number of errors by error code and model",
|
||||
labelnames=["error_code", "model"],
|
||||
)
|
||||
|
||||
# Litellm-Enterprise Metrics
|
||||
if premium_user is True:
|
||||
|
||||
|
@ -378,7 +385,7 @@ class PrometheusLogger(CustomLogger):
|
|||
from litellm.proxy.proxy_server import premium_user
|
||||
|
||||
verbose_logger.debug(
|
||||
f"prometheus Logging - Enters success logging function for kwargs {kwargs}"
|
||||
f"prometheus Logging - Enters failure logging function for kwargs {kwargs}"
|
||||
)
|
||||
|
||||
# unpack kwargs
|
||||
|
@ -409,6 +416,16 @@ class PrometheusLogger(CustomLogger):
|
|||
user_id,
|
||||
).inc()
|
||||
self.set_llm_deployment_failure_metrics(kwargs)
|
||||
|
||||
_exception = kwargs.get("exception", None)
|
||||
error_code = "unknown"
|
||||
if _exception is not None and hasattr(_exception, "status_code"):
|
||||
error_code = _exception.status_code
|
||||
|
||||
# Increment the new error code metric
|
||||
self.litellm_error_code_metric.labels(
|
||||
error_code=error_code, model=model
|
||||
).inc()
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
"prometheus Layer Error(): Exception occured - {}".format(str(e))
|
||||
|
|
|
@ -3,21 +3,12 @@ model_list:
|
|||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
tags: ["teamA"] # 👈 Key Change
|
||||
model_info:
|
||||
id: "team-a-model" # used for identifying model in response headers
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
tags: ["teamB"] # 👈 Key Change
|
||||
model_info:
|
||||
id: "team-b-model" # used for identifying model in response headers
|
||||
|
||||
router_settings:
|
||||
enable_tag_filtering: True # 👈 Key Change
|
||||
litellm_settings:
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue