forked from phoenix/litellm-mirror
Merge pull request #5463 from BerriAI/litellm_track_error_per_model
[Feat - Prometheus] - Track error_code, model metric
This commit is contained in:
commit
d9f3ac5159
3 changed files with 35 additions and 13 deletions
|
@ -58,6 +58,15 @@ http://localhost:4000/metrics
|
||||||
|
|
||||||
## 📈 Metrics Tracked
|
## 📈 Metrics Tracked
|
||||||
|
|
||||||
|
### Error Metrics
|
||||||
|
|
||||||
|
| Metric Name | Description |
|
||||||
|
|----------------------|--------------------------------------|
|
||||||
|
| `litellm_error_code_metric_total` | Total number of errors by error code and model |
|
||||||
|
|
||||||
|
This metric provides a count of errors encountered, categorized by error code and model. For example:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Proxy Requests / Spend Metrics
|
### Proxy Requests / Spend Metrics
|
||||||
|
|
||||||
|
@ -66,7 +75,12 @@ http://localhost:4000/metrics
|
||||||
| `litellm_requests_metric` | Number of requests made, per `"user", "key", "model", "team", "end-user"` |
|
| `litellm_requests_metric` | Number of requests made, per `"user", "key", "model", "team", "end-user"` |
|
||||||
| `litellm_spend_metric` | Total Spend, per `"user", "key", "model", "team", "end-user"` |
|
| `litellm_spend_metric` | Total Spend, per `"user", "key", "model", "team", "end-user"` |
|
||||||
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |
|
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |
|
||||||
|
|
||||||
|
### Error Monitoring Metrics
|
||||||
|
|
||||||
|
| Metric Name | Description |
|
||||||
| `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` |
|
| `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` |
|
||||||
|
| `litellm_error_code_metric_total` | Total number of errors by error code and model |
|
||||||
|
|
||||||
### Request Latency Metrics
|
### Request Latency Metrics
|
||||||
|
|
||||||
|
|
|
@ -138,6 +138,13 @@ class PrometheusLogger(CustomLogger):
|
||||||
labelnames=["hashed_api_key", "api_key_alias", "model"],
|
labelnames=["hashed_api_key", "api_key_alias", "model"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# New metric for tracking error codes and models
|
||||||
|
self.litellm_error_code_metric = Counter(
|
||||||
|
"litellm_error_code_metric",
|
||||||
|
"Total number of errors by error code and model",
|
||||||
|
labelnames=["error_code", "model"],
|
||||||
|
)
|
||||||
|
|
||||||
# Litellm-Enterprise Metrics
|
# Litellm-Enterprise Metrics
|
||||||
if premium_user is True:
|
if premium_user is True:
|
||||||
|
|
||||||
|
@ -378,7 +385,7 @@ class PrometheusLogger(CustomLogger):
|
||||||
from litellm.proxy.proxy_server import premium_user
|
from litellm.proxy.proxy_server import premium_user
|
||||||
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"prometheus Logging - Enters success logging function for kwargs {kwargs}"
|
f"prometheus Logging - Enters failure logging function for kwargs {kwargs}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# unpack kwargs
|
# unpack kwargs
|
||||||
|
@ -409,6 +416,16 @@ class PrometheusLogger(CustomLogger):
|
||||||
user_id,
|
user_id,
|
||||||
).inc()
|
).inc()
|
||||||
self.set_llm_deployment_failure_metrics(kwargs)
|
self.set_llm_deployment_failure_metrics(kwargs)
|
||||||
|
|
||||||
|
_exception = kwargs.get("exception", None)
|
||||||
|
error_code = "unknown"
|
||||||
|
if _exception is not None and hasattr(_exception, "status_code"):
|
||||||
|
error_code = _exception.status_code
|
||||||
|
|
||||||
|
# Increment the new error code metric
|
||||||
|
self.litellm_error_code_metric.labels(
|
||||||
|
error_code=error_code, model=model
|
||||||
|
).inc()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.exception(
|
verbose_logger.exception(
|
||||||
"prometheus Layer Error(): Exception occured - {}".format(str(e))
|
"prometheus Layer Error(): Exception occured - {}".format(str(e))
|
||||||
|
|
|
@ -3,21 +3,12 @@ model_list:
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/fake
|
model: openai/fake
|
||||||
api_key: fake-key
|
api_key: fake-key
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
||||||
tags: ["teamA"] # 👈 Key Change
|
|
||||||
model_info:
|
model_info:
|
||||||
id: "team-a-model" # used for identifying model in response headers
|
id: "team-a-model" # used for identifying model in response headers
|
||||||
- model_name: fake-openai-endpoint
|
|
||||||
litellm_params:
|
|
||||||
model: openai/fake
|
|
||||||
api_key: fake-key
|
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
||||||
tags: ["teamB"] # 👈 Key Change
|
|
||||||
model_info:
|
|
||||||
id: "team-b-model" # used for identifying model in response headers
|
|
||||||
|
|
||||||
router_settings:
|
litellm_settings:
|
||||||
enable_tag_filtering: True # 👈 Key Change
|
success_callback: ["prometheus"]
|
||||||
|
failure_callback: ["prometheus"]
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue