(feat) add response_time to StandardLoggingPayload - logged on datadog, gcs_bucket, s3_bucket etc (#7199)

* feat - add response_time to slp

* test_get_response_time

* docs slp

* fix test_datadog_logging_http_request
This commit is contained in:
Ishaan Jaff 2024-12-12 12:04:43 -08:00 committed by GitHub
parent aa7f416b7f
commit 153ab055d6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 73 additions and 40 deletions

View file

@ -127,9 +127,10 @@ class StandardLoggingPayload(TypedDict):
total_tokens: int
prompt_tokens: int
completion_tokens: int
startTime: float
startTime: float # Note: making this camelCase was a mistake, everything should be snake case
endTime: float
completionStartTime: float
completionStartTime: float # time the first token of the LLM response is returned (for streaming responses)
response_time: float # time the LLM takes to respond (for streaming uses time to first token)
model_map_information: StandardLoggingModelInformation
model: str
model_id: Optional[str]

View file

@ -2729,6 +2729,30 @@ class StandardLoggingPayloadSetup:
return api_base.rstrip("/")
return api_base
@staticmethod
def get_response_time(
start_time_float: float,
end_time_float: float,
completion_start_time_float: float,
stream: bool,
) -> float:
"""
Get the response time for the LLM response
Args:
start_time_float: float - start time of the LLM call
end_time_float: float - end time of the LLM call
completion_start_time_float: float - time to first token of the LLM response (for streaming responses)
stream: bool - True when a stream response is returned
Returns:
float: The response time for the LLM response
"""
if stream is True:
return completion_start_time_float - start_time_float
else:
return end_time_float - start_time_float
def get_standard_logging_object_payload(
kwargs: Optional[dict],
@ -2802,6 +2826,12 @@ def get_standard_logging_object_payload(
completion_start_time=completion_start_time,
)
)
response_time = StandardLoggingPayloadSetup.get_response_time(
start_time_float=start_time_float,
end_time_float=end_time_float,
completion_start_time_float=completion_start_time_float,
stream=kwargs.get("stream", False),
)
# clean up litellm hidden params
clean_hidden_params = StandardLoggingPayloadSetup.get_hidden_params(
hidden_params
@ -2850,6 +2880,7 @@ def get_standard_logging_object_payload(
startTime=start_time_float,
endTime=end_time_float,
completionStartTime=completion_start_time_float,
response_time=response_time,
model=kwargs.get("model", "") or "",
metadata=clean_metadata,
cache_key=clean_hidden_params["cache_key"],

View file

@ -2,40 +2,5 @@ model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_base: https://exampleopenaiendpoint-production.up.railway.app/
- model_name: anthropic/*
litellm_params:
model: anthropic/fake
api_base: https://exampleanthropicendpoint-production.up.railway.app/
litellm_settings:
callbacks: ["datadog"]
turn_off_message_logging: True
router_settings:
provider_budget_config:
openai:
budget_limit: 0.000000000001 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
azure:
budget_limit: 100
time_period: 1d
anthropic:
budget_limit: 100
time_period: 10d
vertex_ai:
budget_limit: 100
time_period: 12d
gemini:
budget_limit: 100
time_period: 12d
# OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD

View file

@ -1560,9 +1560,10 @@ class StandardLoggingPayload(TypedDict):
total_tokens: int
prompt_tokens: int
completion_tokens: int
startTime: float
startTime: float # Note: making this camelCase was a mistake, everything should be snake case
endTime: float
completionStartTime: float
response_time: float
model_map_information: StandardLoggingModelInformation
model: str
model_id: Optional[str]

View file

@ -226,6 +226,7 @@ async def test_datadog_logging_http_request():
# Parse the 'message' field as JSON and check its structure
message = json.loads(body[0]["message"])
print("logged message", json.dumps(message, indent=4))
expected_message_fields = StandardLoggingPayload.__annotations__.keys()

View file

@ -329,7 +329,6 @@ def test_get_final_response_obj():
litellm.turn_off_message_logging = False
def test_truncate_standard_logging_payload():
"""
1. original messages, response, and error_str should NOT BE MODIFIED, since these are from kwargs
@ -368,6 +367,7 @@ def test_truncate_standard_logging_payload():
# assert len of error_str is less than 10_500
assert len(str(standard_logging_payload["error_str"])) < 10_500
def test_strip_trailing_slash():
common_api_base = "https://api.test.com"
assert (
@ -379,3 +379,37 @@ def test_strip_trailing_slash():
== common_api_base
)
def test_get_response_time():
"""Test get_response_time with different streaming scenarios"""
# Test case 1: Non-streaming response
start_time = 1000.0
end_time = 1005.0
completion_start_time = 1003.0
stream = False
response_time = StandardLoggingPayloadSetup.get_response_time(
start_time_float=start_time,
end_time_float=end_time,
completion_start_time_float=completion_start_time,
stream=stream,
)
# For non-streaming, should return end_time - start_time
assert response_time == 5.0
# Test case 2: Streaming response
start_time = 1000.0
end_time = 1010.0
completion_start_time = 1002.0
stream = True
response_time = StandardLoggingPayloadSetup.get_response_time(
start_time_float=start_time,
end_time_float=end_time,
completion_start_time_float=completion_start_time,
stream=stream,
)
# For streaming, should return completion_start_time - start_time
assert response_time == 2.0