forked from phoenix/litellm-mirror
fix(utils.py): support prometheus failed call metrics
This commit is contained in:
parent
05d6d9e45f
commit
28edb77350
4 changed files with 38 additions and 4 deletions
|
@ -22,6 +22,12 @@ class PrometheusLogger:
|
|||
verbose_logger.debug(f"in init prometheus metrics")
|
||||
from prometheus_client import Counter
|
||||
|
||||
self.litellm_failed_requests_metric = Counter(
|
||||
name="litellm_failed_requests_metric",
|
||||
documentation="Total number of failed LLM calls to litellm",
|
||||
labelnames=["end_user", "hashed_api_key", "model", "team"],
|
||||
)
|
||||
|
||||
self.litellm_requests_metric = Counter(
|
||||
name="litellm_requests_metric",
|
||||
documentation="Total number of LLM calls to litellm",
|
||||
|
@ -69,7 +75,10 @@ class PrometheusLogger:
|
|||
user_api_team = litellm_params.get("metadata", {}).get(
|
||||
"user_api_key_team_id", None
|
||||
)
|
||||
tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
|
||||
if response_obj is not None:
|
||||
tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
|
||||
else:
|
||||
tokens_used = 0
|
||||
|
||||
print_verbose(
|
||||
f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}"
|
||||
|
@ -93,6 +102,12 @@ class PrometheusLogger:
|
|||
self.litellm_tokens_metric.labels(
|
||||
end_user_id, user_api_key, model, user_api_team
|
||||
).inc(tokens_used)
|
||||
|
||||
### FAILURE INCREMENT ###
|
||||
if "exception" in kwargs:
|
||||
self.litellm_failed_requests_metric.labels(
|
||||
end_user_id, user_api_key, model, user_api_team
|
||||
).inc()
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
verbose_logger.debug(
|
||||
|
|
|
@ -26,6 +26,7 @@ model_list:
|
|||
|
||||
litellm_settings:
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
service_callback: ["prometheus_system"]
|
||||
upperbound_key_generate_params:
|
||||
max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
|
||||
|
|
|
@ -2205,9 +2205,9 @@ class ProxyConfig:
|
|||
# these are litellm callbacks - "langfuse", "sentry", "wandb"
|
||||
else:
|
||||
litellm.failure_callback.append(callback)
|
||||
verbose_proxy_logger.debug(
|
||||
f"{blue_color_code} Initialized Success Callbacks - {litellm.failure_callback} {reset_color_code}"
|
||||
)
|
||||
print( # noqa
|
||||
f"{blue_color_code} Initialized Failure Callbacks - {litellm.failure_callback} {reset_color_code}"
|
||||
) # noqa
|
||||
elif key == "cache_params":
|
||||
# this is set in the cache branch
|
||||
# see usage here: https://docs.litellm.ai/docs/proxy/caching
|
||||
|
|
|
@ -2249,6 +2249,24 @@ class Logging:
|
|||
level="ERROR",
|
||||
kwargs=self.model_call_details,
|
||||
)
|
||||
elif callback == "prometheus":
|
||||
global prometheusLogger
|
||||
verbose_logger.debug("reaches prometheus for success logging!")
|
||||
kwargs = {}
|
||||
for k, v in self.model_call_details.items():
|
||||
if (
|
||||
k != "original_response"
|
||||
): # copy.deepcopy raises errors as this could be a coroutine
|
||||
kwargs[k] = v
|
||||
kwargs["exception"] = str(exception)
|
||||
prometheusLogger.log_event(
|
||||
kwargs=kwargs,
|
||||
response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
user_id=kwargs.get("user", None),
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
except Exception as e:
|
||||
print_verbose(
|
||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue