fix(utils.py): support prometheus failed call metrics

This commit is contained in:
Krrish Dholakia 2024-04-18 12:29:15 -07:00
parent 05d6d9e45f
commit 28edb77350
4 changed files with 38 additions and 4 deletions

View file

@ -22,6 +22,12 @@ class PrometheusLogger:
verbose_logger.debug(f"in init prometheus metrics")
from prometheus_client import Counter
self.litellm_failed_requests_metric = Counter(
name="litellm_failed_requests_metric",
documentation="Total number of failed LLM calls to litellm",
labelnames=["end_user", "hashed_api_key", "model", "team"],
)
self.litellm_requests_metric = Counter(
name="litellm_requests_metric",
documentation="Total number of LLM calls to litellm",
@ -69,7 +75,10 @@ class PrometheusLogger:
user_api_team = litellm_params.get("metadata", {}).get(
"user_api_key_team_id", None
)
if response_obj is not None:
tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
else:
tokens_used = 0
print_verbose(
f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}"
@ -93,6 +102,12 @@ class PrometheusLogger:
self.litellm_tokens_metric.labels(
end_user_id, user_api_key, model, user_api_team
).inc(tokens_used)
### FAILURE INCREMENT ###
if "exception" in kwargs:
self.litellm_failed_requests_metric.labels(
end_user_id, user_api_key, model, user_api_team
).inc()
except Exception as e:
traceback.print_exc()
verbose_logger.debug(

View file

@ -26,6 +26,7 @@ model_list:
litellm_settings:
success_callback: ["prometheus"]
failure_callback: ["prometheus"]
service_callback: ["prometheus_system"]
upperbound_key_generate_params:
max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET

View file

@ -2205,9 +2205,9 @@ class ProxyConfig:
# these are litellm callbacks - "langfuse", "sentry", "wandb"
else:
litellm.failure_callback.append(callback)
verbose_proxy_logger.debug(
f"{blue_color_code} Initialized Success Callbacks - {litellm.failure_callback} {reset_color_code}"
)
print( # noqa
f"{blue_color_code} Initialized Failure Callbacks - {litellm.failure_callback} {reset_color_code}"
) # noqa
elif key == "cache_params":
# this is set in the cache branch
# see usage here: https://docs.litellm.ai/docs/proxy/caching

View file

@ -2249,6 +2249,24 @@ class Logging:
level="ERROR",
kwargs=self.model_call_details,
)
elif callback == "prometheus":
global prometheusLogger
verbose_logger.debug("reaches prometheus for success logging!")
kwargs = {}
for k, v in self.model_call_details.items():
if (
k != "original_response"
): # copy.deepcopy raises errors as this could be a coroutine
kwargs[k] = v
kwargs["exception"] = str(exception)
prometheusLogger.log_event(
kwargs=kwargs,
response_obj=result,
start_time=start_time,
end_time=end_time,
user_id=kwargs.get("user", None),
print_verbose=print_verbose,
)
except Exception as e:
print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"