feat(proxy_cli.py): add new 'log_config' cli param (#6352)

* feat(proxy_cli.py): add new 'log_config' cli param Allows passing logging.conf to uvicorn on startup * docs(cli.md): add logging conf to uvicorn cli docs * fix(get_llm_provider_logic.py): fix default api base for litellm_proxy Fixes https://github.com/BerriAI/litellm/issues/6332 * feat(openai_like/embedding): Add support for jina ai embeddings Closes https://github.com/BerriAI/litellm/issues/6337 * docs(deploy.md): update entrypoint.sh filepath post-refactor Fixes outdated docs * feat(prometheus.py): emit time_to_first_token metric on prometheus Closes https://github.com/BerriAI/litellm/issues/6334 * fix(prometheus.py): only emit time to first token metric if stream is True enables more accurate ttft usage * test: handle vertex api instability * fix(get_llm_provider_logic.py): fix import * fix(openai.py): fix deepinfra default api base * fix(anthropic/transformation.py): remove anthropic beta header (#6361)
2025-04-26 11:14:04 +00:00 · 2024-10-21 21:25:58 -07:00 · 2024-10-21 21:25:58 -07:00 · 71f13c8c12
commit 71f13c8c12
parent b4456a990e
23 changed files with 839 additions and 263 deletions
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -97,6 +97,19 @@ class PrometheusLogger(CustomLogger):
                buckets=LATENCY_BUCKETS,
            )

+            self.litellm_llm_api_time_to_first_token_metric = Histogram(
+                "litellm_llm_api_time_to_first_token_metric",
+                "Time to first token for a models LLM API call",
+                labelnames=[
+                    "model",
+                    "hashed_api_key",
+                    "api_key_alias",
+                    "team",
+                    "team_alias",
+                ],
+                buckets=LATENCY_BUCKETS,
+            )
+
            # Counter for spend
            self.litellm_spend_metric = Counter(
                "litellm_spend_metric",
@ -335,14 +348,17 @@ class PrometheusLogger(CustomLogger):
        )

        # unpack kwargs
-        standard_logging_payload: StandardLoggingPayload = kwargs.get(
-            "standard_logging_object", {}
+        standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
+            "standard_logging_object"
        )
+        if standard_logging_payload is None:
+            raise ValueError("standard_logging_object is required")
        model = kwargs.get("model", "")
        litellm_params = kwargs.get("litellm_params", {}) or {}
        _metadata = litellm_params.get("metadata", {})
        proxy_server_request = litellm_params.get("proxy_server_request") or {}
        end_user_id = proxy_server_request.get("body", {}).get("user", None)
+        model_parameters: dict = standard_logging_payload["model_parameters"]
        user_id = standard_logging_payload["metadata"]["user_api_key_user_id"]
        user_api_key = standard_logging_payload["metadata"]["user_api_key_hash"]
        user_api_key_alias = standard_logging_payload["metadata"]["user_api_key_alias"]
@ -468,6 +484,28 @@ class PrometheusLogger(CustomLogger):
        total_time_seconds = total_time.total_seconds()
        api_call_start_time = kwargs.get("api_call_start_time", None)

+        completion_start_time = kwargs.get("completion_start_time", None)
+
+        if (
+            completion_start_time is not None
+            and isinstance(completion_start_time, datetime)
+            and model_parameters.get("stream")
+            is True  # only emit for streaming requests
+        ):
+            time_to_first_token_seconds = (
+                completion_start_time - api_call_start_time
+            ).total_seconds()
+            self.litellm_llm_api_time_to_first_token_metric.labels(
+                model,
+                user_api_key,
+                user_api_key_alias,
+                user_api_team,
+                user_api_team_alias,
+            ).observe(time_to_first_token_seconds)
+        else:
+            verbose_logger.debug(
+                "Time to first token metric not emitted, stream option in model_parameters is not True"
+            )
        if api_call_start_time is not None and isinstance(
            api_call_start_time, datetime
        ):
@ -512,6 +550,7 @@ class PrometheusLogger(CustomLogger):
            "standard_logging_object", {}
        )
        proxy_server_request = litellm_params.get("proxy_server_request") or {}
+
        end_user_id = proxy_server_request.get("body", {}).get("user", None)
        user_id = standard_logging_payload["metadata"]["user_api_key_user_id"]
        user_api_key = standard_logging_payload["metadata"]["user_api_key_hash"]