(feat) add response_time to StandardLoggingPayload - logged on datadog, gcs_bucket, s3_bucket etc (#7199)

* feat - add response_time to slp * test_get_response_time * docs slp * fix test_datadog_logging_http_request
2025-04-26 03:04:13 +00:00 · 2024-12-12 12:04:43 -08:00 · 2024-12-12 12:04:43 -08:00 · 153ab055d6
commit 153ab055d6
parent aa7f416b7f
6 changed files with 73 additions and 40 deletions
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -127,9 +127,10 @@ class StandardLoggingPayload(TypedDict):
    total_tokens: int
    prompt_tokens: int
    completion_tokens: int
-    startTime: float
+    startTime: float # Note: making this camelCase was a mistake, everything should be snake case
    endTime: float
-    completionStartTime: float
+    completionStartTime: float # time the first token of the LLM response is returned (for streaming responses)
+    response_time: float # time the LLM takes to respond (for streaming uses time to first token)
    model_map_information: StandardLoggingModelInformation
    model: str
    model_id: Optional[str]
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -2729,6 +2729,30 @@ class StandardLoggingPayloadSetup:
            return api_base.rstrip("/")
        return api_base

+    @staticmethod
+    def get_response_time(
+        start_time_float: float,
+        end_time_float: float,
+        completion_start_time_float: float,
+        stream: bool,
+    ) -> float:
+        """
+        Get the response time for the LLM response
+
+        Args:
+            start_time_float: float - start time of the LLM call
+            end_time_float: float - end time of the LLM call
+            completion_start_time_float: float - time to first token of the LLM response (for streaming responses)
+            stream: bool - True when a stream response is returned
+
+        Returns:
+            float: The response time for the LLM response
+        """
+        if stream is True:
+            return completion_start_time_float - start_time_float
+        else:
+            return end_time_float - start_time_float
+

 def get_standard_logging_object_payload(
    kwargs: Optional[dict],
@ -2802,6 +2826,12 @@ def get_standard_logging_object_payload(
                completion_start_time=completion_start_time,
            )
        )
+        response_time = StandardLoggingPayloadSetup.get_response_time(
+            start_time_float=start_time_float,
+            end_time_float=end_time_float,
+            completion_start_time_float=completion_start_time_float,
+            stream=kwargs.get("stream", False),
+        )
        # clean up litellm hidden params
        clean_hidden_params = StandardLoggingPayloadSetup.get_hidden_params(
            hidden_params
@ -2850,6 +2880,7 @@ def get_standard_logging_object_payload(
            startTime=start_time_float,
            endTime=end_time_float,
            completionStartTime=completion_start_time_float,
+            response_time=response_time,
            model=kwargs.get("model", "") or "",
            metadata=clean_metadata,
            cache_key=clean_hidden_params["cache_key"],
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -2,40 +2,5 @@ model_list:
  - model_name: gpt-4o
    litellm_params:
      model: openai/gpt-4o
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
-  - model_name: anthropic/*
-    litellm_params:
-      model: anthropic/fake
-      api_base: https://exampleanthropicendpoint-production.up.railway.app/
-
-
-
 litellm_settings:
  callbacks: ["datadog"] 
-
-  turn_off_message_logging: True
-
-
-router_settings:
-  provider_budget_config: 
-    openai: 
-      budget_limit: 0.000000000001 # float of $ value budget for time period
-      time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
-    azure:
-      budget_limit: 100
-      time_period: 1d
-    anthropic:
-      budget_limit: 100
-      time_period: 10d
-    vertex_ai:
-      budget_limit: 100
-      time_period: 12d
-    gemini:
-      budget_limit: 100
-      time_period: 12d
-  
-  # OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
-  redis_host: os.environ/REDIS_HOST
-  redis_port: os.environ/REDIS_PORT
-  redis_password: os.environ/REDIS_PASSWORD
-
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -1560,9 +1560,10 @@ class StandardLoggingPayload(TypedDict):
    total_tokens: int
    prompt_tokens: int
    completion_tokens: int
-    startTime: float
+    startTime: float  # Note: making this camelCase was a mistake, everything should be snake case
    endTime: float
    completionStartTime: float
+    response_time: float
    model_map_information: StandardLoggingModelInformation
    model: str
    model_id: Optional[str]
--- a/tests/logging_callback_tests/test_datadog.py
+++ b/tests/logging_callback_tests/test_datadog.py
@ -226,6 +226,7 @@ async def test_datadog_logging_http_request():

        # Parse the 'message' field as JSON and check its structure
        message = json.loads(body[0]["message"])
+        print("logged message", json.dumps(message, indent=4))

        expected_message_fields = StandardLoggingPayload.__annotations__.keys()

--- a/tests/logging_callback_tests/test_standard_logging_payload.py
+++ b/tests/logging_callback_tests/test_standard_logging_payload.py
@ -329,7 +329,6 @@ def test_get_final_response_obj():
        litellm.turn_off_message_logging = False


-
 def test_truncate_standard_logging_payload():
    """
    1. original messages, response, and error_str should NOT BE MODIFIED, since these are from kwargs
@ -368,6 +367,7 @@ def test_truncate_standard_logging_payload():
    # assert len of error_str is less than 10_500
    assert len(str(standard_logging_payload["error_str"])) < 10_500

+
 def test_strip_trailing_slash():
    common_api_base = "https://api.test.com"
    assert (
@ -379,3 +379,37 @@ def test_strip_trailing_slash():
        == common_api_base
    )

+
+def test_get_response_time():
+    """Test get_response_time with different streaming scenarios"""
+    # Test case 1: Non-streaming response
+    start_time = 1000.0
+    end_time = 1005.0
+    completion_start_time = 1003.0
+    stream = False
+
+    response_time = StandardLoggingPayloadSetup.get_response_time(
+        start_time_float=start_time,
+        end_time_float=end_time,
+        completion_start_time_float=completion_start_time,
+        stream=stream,
+    )
+
+    # For non-streaming, should return end_time - start_time
+    assert response_time == 5.0
+
+    # Test case 2: Streaming response
+    start_time = 1000.0
+    end_time = 1010.0
+    completion_start_time = 1002.0
+    stream = True
+
+    response_time = StandardLoggingPayloadSetup.get_response_time(
+        start_time_float=start_time,
+        end_time_float=end_time,
+        completion_start_time_float=completion_start_time,
+        stream=stream,
+    )
+
+    # For streaming, should return completion_start_time - start_time
+    assert response_time == 2.0