feat prometheus add metric for failure / model

This commit is contained in:
Ishaan Jaff 2024-08-31 10:05:23 -07:00
parent 2202a02aef
commit 3fae5eb94e
2 changed files with 21 additions and 13 deletions

View file

@ -138,6 +138,13 @@ class PrometheusLogger(CustomLogger):
labelnames=["hashed_api_key", "api_key_alias", "model"],
)
# New metric for tracking error codes and models
self.litellm_error_code_metric = Counter(
"litellm_error_code_metric",
"Total number of errors by error code and model",
labelnames=["error_code", "model"],
)
# Litellm-Enterprise Metrics
if premium_user is True:
@ -378,7 +385,7 @@ class PrometheusLogger(CustomLogger):
from litellm.proxy.proxy_server import premium_user
verbose_logger.debug(
f"prometheus Logging - Enters success logging function for kwargs {kwargs}"
f"prometheus Logging - Enters failure logging function for kwargs {kwargs}"
)
# unpack kwargs
@ -409,6 +416,16 @@ class PrometheusLogger(CustomLogger):
user_id,
).inc()
self.set_llm_deployment_failure_metrics(kwargs)
_exception = kwargs.get("exception", None)
error_code = "unknown"
if _exception is not None and hasattr(_exception, "status_code"):
error_code = _exception.status_code
# Increment the new error code metric
self.litellm_error_code_metric.labels(
error_code=error_code, model=model
).inc()
except Exception as e:
verbose_logger.exception(
"prometheus Layer Error(): Exception occured - {}".format(str(e))