fix(utils.py): support prometheus failed call metrics

2024-04-18 12:29:15 -07:00 · 2024-04-18 12:29:15 -07:00 · 28edb77350
commit 28edb77350
parent 05d6d9e45f
4 changed files with 38 additions and 4 deletions
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -22,6 +22,12 @@ class PrometheusLogger:
            verbose_logger.debug(f"in init prometheus metrics")
            from prometheus_client import Counter

+            self.litellm_failed_requests_metric = Counter(
+                name="litellm_failed_requests_metric",
+                documentation="Total number of failed LLM calls to litellm",
+                labelnames=["end_user", "hashed_api_key", "model", "team"],
+            )
+
            self.litellm_requests_metric = Counter(
                name="litellm_requests_metric",
                documentation="Total number of LLM calls to litellm",
@ -69,7 +75,10 @@ class PrometheusLogger:
            user_api_team = litellm_params.get("metadata", {}).get(
                "user_api_key_team_id", None
            )
+            if response_obj is not None:
                tokens_used = response_obj.get("usage", {}).get("total_tokens", 0)
+            else:
+                tokens_used = 0

            print_verbose(
                f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}"
@ -93,6 +102,12 @@ class PrometheusLogger:
            self.litellm_tokens_metric.labels(
                end_user_id, user_api_key, model, user_api_team
            ).inc(tokens_used)
+
+            ### FAILURE INCREMENT ###
+            if "exception" in kwargs:
+                self.litellm_failed_requests_metric.labels(
+                    end_user_id, user_api_key, model, user_api_team
+                ).inc()
        except Exception as e:
            traceback.print_exc()
            verbose_logger.debug(
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -26,6 +26,7 @@ model_list:

 litellm_settings:
  success_callback: ["prometheus"]
+  failure_callback: ["prometheus"]
  service_callback: ["prometheus_system"]
  upperbound_key_generate_params: 
    max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -2205,9 +2205,9 @@ class ProxyConfig:
                        # these are litellm callbacks - "langfuse", "sentry", "wandb"
                        else:
                            litellm.failure_callback.append(callback)
-                    verbose_proxy_logger.debug(
-                        f"{blue_color_code} Initialized Success Callbacks - {litellm.failure_callback} {reset_color_code}"
-                    )
+                    print(  # noqa
+                        f"{blue_color_code} Initialized Failure Callbacks - {litellm.failure_callback} {reset_color_code}"
+                    )  # noqa
                elif key == "cache_params":
                    # this is set in the cache branch
                    # see usage here: https://docs.litellm.ai/docs/proxy/caching
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2249,6 +2249,24 @@ class Logging:
                            level="ERROR",
                            kwargs=self.model_call_details,
                        )
+                    elif callback == "prometheus":
+                        global prometheusLogger
+                        verbose_logger.debug("reaches prometheus for success logging!")
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+                        kwargs["exception"] = str(exception)
+                        prometheusLogger.log_event(
+                            kwargs=kwargs,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                        )
                except Exception as e:
                    print_verbose(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"