feat prometheus add metric for failure / model

This commit is contained in:
Ishaan Jaff 2024-08-31 10:05:23 -07:00
parent 2202a02aef
commit 3fae5eb94e
2 changed files with 21 additions and 13 deletions

View file

@ -138,6 +138,13 @@ class PrometheusLogger(CustomLogger):
labelnames=["hashed_api_key", "api_key_alias", "model"],
)
# New metric for tracking error codes and models
self.litellm_error_code_metric = Counter(
"litellm_error_code_metric",
"Total number of errors by error code and model",
labelnames=["error_code", "model"],
)
# Litellm-Enterprise Metrics
if premium_user is True:
@ -378,7 +385,7 @@ class PrometheusLogger(CustomLogger):
from litellm.proxy.proxy_server import premium_user
verbose_logger.debug(
f"prometheus Logging - Enters success logging function for kwargs {kwargs}"
f"prometheus Logging - Enters failure logging function for kwargs {kwargs}"
)
# unpack kwargs
@ -409,6 +416,16 @@ class PrometheusLogger(CustomLogger):
user_id,
).inc()
self.set_llm_deployment_failure_metrics(kwargs)
_exception = kwargs.get("exception", None)
error_code = "unknown"
if _exception is not None and hasattr(_exception, "status_code"):
error_code = _exception.status_code
# Increment the new error code metric
self.litellm_error_code_metric.labels(
error_code=error_code, model=model
).inc()
except Exception as e:
verbose_logger.exception(
"prometheus Layer Error(): Exception occured - {}".format(str(e))

View file

@ -3,21 +3,12 @@ model_list:
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["teamA"] # 👈 Key Change
model_info:
id: "team-a-model" # used for identifying model in response headers
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["teamB"] # 👈 Key Change
model_info:
id: "team-b-model" # used for identifying model in response headers
router_settings:
enable_tag_filtering: True # 👈 Key Change
litellm_settings:
success_callback: ["prometheus"]
failure_callback: ["prometheus"]
general_settings:
master_key: sk-1234