(feat) add response_time to StandardLoggingPayload - logged on datadog, gcs_bucket, s3_bucket etc (#7199)

* feat - add response_time to slp * test_get_response_time * docs slp * fix test_datadog_logging_http_request
2025-04-26 11:14:04 +00:00 · 2024-12-12 12:04:43 -08:00 · 2024-12-12 12:04:43 -08:00 · 153ab055d6
commit 153ab055d6
parent aa7f416b7f
6 changed files with 73 additions and 40 deletions
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -127,9 +127,10 @@ class StandardLoggingPayload(TypedDict):
    total_tokens: int
    prompt_tokens: int
    completion_tokens: int
-    startTime: float
+    startTime: float # Note: making this camelCase was a mistake, everything should be snake case
    endTime: float
-    completionStartTime: float
+    completionStartTime: float # time the first token of the LLM response is returned (for streaming responses)
    response_time: float # time the LLM takes to respond (for streaming uses time to first token)
    model_map_information: StandardLoggingModelInformation
    model: str
    model_id: Optional[str]
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -2729,6 +2729,30 @@ class StandardLoggingPayloadSetup:
            return api_base.rstrip("/")
        return api_base
    @staticmethod
    def get_response_time(
        start_time_float: float,
        end_time_float: float,
        completion_start_time_float: float,
        stream: bool,
    ) -> float:
        """
        Get the response time for the LLM response
        Args:
            start_time_float: float - start time of the LLM call
            end_time_float: float - end time of the LLM call
            completion_start_time_float: float - time to first token of the LLM response (for streaming responses)
            stream: bool - True when a stream response is returned
        Returns:
            float: The response time for the LLM response
        """
        if stream is True:
            return completion_start_time_float - start_time_float
        else:
            return end_time_float - start_time_float
 def get_standard_logging_object_payload(
    kwargs: Optional[dict],
@ -2802,6 +2826,12 @@ def get_standard_logging_object_payload(
                completion_start_time=completion_start_time,
            )
        )
        response_time = StandardLoggingPayloadSetup.get_response_time(
            start_time_float=start_time_float,
            end_time_float=end_time_float,
            completion_start_time_float=completion_start_time_float,
            stream=kwargs.get("stream", False),
        )
        # clean up litellm hidden params
        clean_hidden_params = StandardLoggingPayloadSetup.get_hidden_params(
            hidden_params
@ -2850,6 +2880,7 @@ def get_standard_logging_object_payload(
            startTime=start_time_float,
            endTime=end_time_float,
            completionStartTime=completion_start_time_float,
            response_time=response_time,
            model=kwargs.get("model", "") or "",
            metadata=clean_metadata,
            cache_key=clean_hidden_params["cache_key"],
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -2,40 +2,5 @@ model_list:
  - model_name: gpt-4o
    litellm_params:
      model: openai/gpt-4o
      api_base: https://exampleopenaiendpoint-production.up.railway.app/
  - model_name: anthropic/*
    litellm_params:
      model: anthropic/fake
      api_base: https://exampleanthropicendpoint-production.up.railway.app/
 litellm_settings:
  callbacks: ["datadog"] 
  turn_off_message_logging: True
 router_settings:
  provider_budget_config: 
    openai: 
      budget_limit: 0.000000000001 # float of $ value budget for time period
      time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
    azure:
      budget_limit: 100
      time_period: 1d
    anthropic:
      budget_limit: 100
      time_period: 10d
    vertex_ai:
      budget_limit: 100
      time_period: 12d
    gemini:
      budget_limit: 100
      time_period: 12d
  # OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
  redis_host: os.environ/REDIS_HOST
  redis_port: os.environ/REDIS_PORT
  redis_password: os.environ/REDIS_PASSWORD
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -1560,9 +1560,10 @@ class StandardLoggingPayload(TypedDict):
    total_tokens: int
    prompt_tokens: int
    completion_tokens: int
-    startTime: float
+    startTime: float  # Note: making this camelCase was a mistake, everything should be snake case
    endTime: float
    completionStartTime: float
    response_time: float
    model_map_information: StandardLoggingModelInformation
    model: str
    model_id: Optional[str]
--- a/tests/logging_callback_tests/test_datadog.py
+++ b/tests/logging_callback_tests/test_datadog.py
@ -226,6 +226,7 @@ async def test_datadog_logging_http_request():
        # Parse the 'message' field as JSON and check its structure
        message = json.loads(body[0]["message"])
        print("logged message", json.dumps(message, indent=4))
        expected_message_fields = StandardLoggingPayload.__annotations__.keys()
--- a/tests/logging_callback_tests/test_standard_logging_payload.py
+++ b/tests/logging_callback_tests/test_standard_logging_payload.py
@ -329,7 +329,6 @@ def test_get_final_response_obj():
        litellm.turn_off_message_logging = False
 def test_truncate_standard_logging_payload():
    """
    1. original messages, response, and error_str should NOT BE MODIFIED, since these are from kwargs
@ -368,6 +367,7 @@ def test_truncate_standard_logging_payload():
    # assert len of error_str is less than 10_500
    assert len(str(standard_logging_payload["error_str"])) < 10_500
 def test_strip_trailing_slash():
    common_api_base = "https://api.test.com"
    assert (
@ -379,3 +379,37 @@ def test_strip_trailing_slash():
        == common_api_base
    )
 def test_get_response_time():
    """Test get_response_time with different streaming scenarios"""
    # Test case 1: Non-streaming response
    start_time = 1000.0
    end_time = 1005.0
    completion_start_time = 1003.0
    stream = False
    response_time = StandardLoggingPayloadSetup.get_response_time(
        start_time_float=start_time,
        end_time_float=end_time,
        completion_start_time_float=completion_start_time,
        stream=stream,
    )
    # For non-streaming, should return end_time - start_time
    assert response_time == 5.0
    # Test case 2: Streaming response
    start_time = 1000.0
    end_time = 1010.0
    completion_start_time = 1002.0
    stream = True
    response_time = StandardLoggingPayloadSetup.get_response_time(
        start_time_float=start_time,
        end_time_float=end_time,
        completion_start_time_float=completion_start_time,
        stream=stream,
    )
    # For streaming, should return completion_start_time - start_time
    assert response_time == 2.0