Litellm dev 12 24 2024 p4 (#7407)

* fix(invoke_handler.py): fix mock response iterator to handle tool calling returns tool call if returned by model response * fix(prometheus.py): add new 'tokens_by_tag' metric on prometheus allows tracking 'token usage' by task * feat(prometheus.py): add input + output token tracking by tag * feat(prometheus.py): add tag based deployment failure tracking allows admin to track failure by use-case
2025-04-26 03:04:13 +00:00 · 2024-12-24 20:24:06 -08:00 · 2024-12-24 20:24:06 -08:00 · 39dabb2e89
commit 39dabb2e89
parent 81be0b4090
5 changed files with 209 additions and 12 deletions
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -76,7 +76,7 @@ class PrometheusLogger(CustomLogger):
                    UserAPIKeyLabelNames.TEAM.value,
                    UserAPIKeyLabelNames.TEAM_ALIAS.value,
                    UserAPIKeyLabelNames.USER.value,
-                    UserAPIKeyLabelNames.LITELLM_MODEL.value,
+                    UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
                ],
                buckets=LATENCY_BUCKETS,
            )
@ -85,7 +85,7 @@ class PrometheusLogger(CustomLogger):
                "litellm_llm_api_latency_metric",
                "Total latency (seconds) for a models LLM API call",
                labelnames=[
-                    UserAPIKeyLabelNames.LITELLM_MODEL.value,
+                    UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
                    UserAPIKeyLabelNames.API_KEY_HASH.value,
                    UserAPIKeyLabelNames.API_KEY_ALIAS.value,
                    UserAPIKeyLabelNames.TEAM.value,
@ -140,6 +140,14 @@ class PrometheusLogger(CustomLogger):
                ],
            )

+            # Counter for tokens by tag
+            self.litellm_tokens_by_tag_metric = Counter(
+                "litellm_total_tokens_by_tag",
+                "Total number of input + output tokens from LLM requests by custom metadata tags",
+                labelnames=[
+                    UserAPIKeyLabelNames.TAG.value,
+                ],
+            )
            self.litellm_input_tokens_metric = Counter(
                "litellm_input_tokens",
                "Total number of input tokens from LLM requests",
@ -153,6 +161,16 @@ class PrometheusLogger(CustomLogger):
                    "user",
                ],
            )
+
+            # Counter for input tokens by tag
+            self.litellm_input_tokens_by_tag_metric = Counter(
+                "litellm_input_tokens_by_tag",
+                "Total number of input tokens from LLM requests by custom metadata tags",
+                labelnames=[
+                    UserAPIKeyLabelNames.TAG.value,
+                ],
+            )
+
            self.litellm_output_tokens_metric = Counter(
                "litellm_output_tokens",
                "Total number of output tokens from LLM requests",
@ -167,6 +185,15 @@ class PrometheusLogger(CustomLogger):
                ],
            )

+            # Counter for output tokens by tag
+            self.litellm_output_tokens_by_tag_metric = Counter(
+                "litellm_output_tokens_by_tag",
+                "Total number of output tokens from LLM requests by custom metadata tags",
+                labelnames=[
+                    UserAPIKeyLabelNames.TAG.value,
+                ],
+            )
+
            # Remaining Budget for Team
            self.litellm_remaining_team_budget_metric = Gauge(
                "litellm_remaining_team_budget_metric",
@ -237,10 +264,10 @@ class PrometheusLogger(CustomLogger):

            # Get all keys
            _logged_llm_labels = [
-                "litellm_model_name",
-                "model_id",
-                "api_base",
-                "api_provider",
+                UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value,
+                UserAPIKeyLabelNames.MODEL_ID.value,
+                UserAPIKeyLabelNames.API_BASE.value,
+                UserAPIKeyLabelNames.API_PROVIDER.value,
            ]
            team_and_key_labels = [
                "hashed_api_key",
@ -275,6 +302,16 @@ class PrometheusLogger(CustomLogger):
                + EXCEPTION_LABELS
                + team_and_key_labels,
            )
+            self.litellm_deployment_failure_by_tag_responses = Counter(
+                "litellm_deployment_failure_by_tag_responses",
+                "Total number of failed LLM API calls for a specific LLM deploymeny by custom metadata tags",
+                labelnames=[
+                    UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+                    UserAPIKeyLabelNames.TAG.value,
+                ]
+                + _logged_llm_labels
+                + EXCEPTION_LABELS,
+            )
            self.litellm_deployment_total_requests = Counter(
                name="litellm_deployment_total_requests",
                documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
@ -490,6 +527,14 @@ class PrometheusLogger(CustomLogger):
            user_id,
        ).inc(standard_logging_payload["total_tokens"])

+        _tags = standard_logging_payload["request_tags"]
+        for tag in _tags:
+            self.litellm_tokens_by_tag_metric.labels(
+                **{
+                    UserAPIKeyLabelNames.TAG.value: tag,
+                }
+            ).inc(standard_logging_payload["total_tokens"])
+
        self.litellm_input_tokens_metric.labels(
            end_user_id,
            user_api_key,
@ -500,6 +545,13 @@ class PrometheusLogger(CustomLogger):
            user_id,
        ).inc(standard_logging_payload["prompt_tokens"])

+        for tag in _tags:
+            self.litellm_input_tokens_by_tag_metric.labels(
+                **{
+                    UserAPIKeyLabelNames.TAG.value: tag,
+                }
+            ).inc(standard_logging_payload["prompt_tokens"])
+
        self.litellm_output_tokens_metric.labels(
            end_user_id,
            user_api_key,
@ -510,6 +562,13 @@ class PrometheusLogger(CustomLogger):
            user_id,
        ).inc(standard_logging_payload["completion_tokens"])

+        for tag in _tags:
+            self.litellm_output_tokens_by_tag_metric.labels(
+                **{
+                    UserAPIKeyLabelNames.TAG.value: tag,
+                }
+            ).inc(standard_logging_payload["completion_tokens"])
+
    def _increment_remaining_budget_metrics(
        self,
        user_api_team: Optional[str],
@ -651,7 +710,7 @@ class PrometheusLogger(CustomLogger):
            api_call_total_time_seconds = api_call_total_time.total_seconds()
            self.litellm_llm_api_latency_metric.labels(
                **{
-                    UserAPIKeyLabelNames.LITELLM_MODEL.value: model,
+                    UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value: model,
                    UserAPIKeyLabelNames.API_KEY_HASH.value: user_api_key,
                    UserAPIKeyLabelNames.API_KEY_ALIAS.value: user_api_key_alias,
                    UserAPIKeyLabelNames.TEAM.value: user_api_team,
@ -686,7 +745,7 @@ class PrometheusLogger(CustomLogger):
                    UserAPIKeyLabelNames.USER.value: standard_logging_payload[
                        "metadata"
                    ]["user_api_key_user_id"],
-                    UserAPIKeyLabelNames.LITELLM_MODEL.value: model,
+                    UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value: model,
                }
            ).observe(total_time_seconds)

@ -862,6 +921,24 @@ class PrometheusLogger(CustomLogger):
                ],
            ).inc()

+            # tag based tracking
+            _tags = standard_logging_payload["request_tags"]
+            for tag in _tags:
+                self.litellm_deployment_failure_by_tag_responses.labels(
+                    **{
+                        UserAPIKeyLabelNames.REQUESTED_MODEL.value: model_group,
+                        UserAPIKeyLabelNames.TAG.value: tag,
+                        UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value: litellm_model_name,
+                        UserAPIKeyLabelNames.MODEL_ID.value: model_id,
+                        UserAPIKeyLabelNames.API_BASE.value: api_base,
+                        UserAPIKeyLabelNames.API_PROVIDER.value: llm_provider,
+                        UserAPIKeyLabelNames.EXCEPTION_CLASS.value: exception.__class__.__name__,
+                        UserAPIKeyLabelNames.EXCEPTION_STATUS.value: str(
+                            getattr(exception, "status_code", None)
+                        ),
+                    }
+                ).inc()
+
            self.litellm_deployment_total_requests.labels(
                litellm_model_name=litellm_model_name,
                model_id=model_id,
@ -881,8 +958,12 @@ class PrometheusLogger(CustomLogger):
            ).inc()

            pass
-        except Exception:
-            pass
+        except Exception as e:
+            verbose_logger.debug(
+                "Prometheus Error: set_llm_deployment_failure_metrics. Exception occured - {}".format(
+                    str(e)
+                )
+            )

    def set_llm_deployment_success_metrics(
        self,