mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
(feat) add response_time
to StandardLoggingPayload - logged on datadog
, gcs_bucket
, s3_bucket
etc (#7199)
* feat - add response_time to slp * test_get_response_time * docs slp * fix test_datadog_logging_http_request
This commit is contained in:
parent
aa7f416b7f
commit
153ab055d6
6 changed files with 73 additions and 40 deletions
|
@ -127,9 +127,10 @@ class StandardLoggingPayload(TypedDict):
|
|||
total_tokens: int
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
startTime: float
|
||||
startTime: float # Note: making this camelCase was a mistake, everything should be snake case
|
||||
endTime: float
|
||||
completionStartTime: float
|
||||
completionStartTime: float # time the first token of the LLM response is returned (for streaming responses)
|
||||
response_time: float # time the LLM takes to respond (for streaming uses time to first token)
|
||||
model_map_information: StandardLoggingModelInformation
|
||||
model: str
|
||||
model_id: Optional[str]
|
||||
|
|
|
@ -2729,6 +2729,30 @@ class StandardLoggingPayloadSetup:
|
|||
return api_base.rstrip("/")
|
||||
return api_base
|
||||
|
||||
@staticmethod
|
||||
def get_response_time(
|
||||
start_time_float: float,
|
||||
end_time_float: float,
|
||||
completion_start_time_float: float,
|
||||
stream: bool,
|
||||
) -> float:
|
||||
"""
|
||||
Get the response time for the LLM response
|
||||
|
||||
Args:
|
||||
start_time_float: float - start time of the LLM call
|
||||
end_time_float: float - end time of the LLM call
|
||||
completion_start_time_float: float - time to first token of the LLM response (for streaming responses)
|
||||
stream: bool - True when a stream response is returned
|
||||
|
||||
Returns:
|
||||
float: The response time for the LLM response
|
||||
"""
|
||||
if stream is True:
|
||||
return completion_start_time_float - start_time_float
|
||||
else:
|
||||
return end_time_float - start_time_float
|
||||
|
||||
|
||||
def get_standard_logging_object_payload(
|
||||
kwargs: Optional[dict],
|
||||
|
@ -2802,6 +2826,12 @@ def get_standard_logging_object_payload(
|
|||
completion_start_time=completion_start_time,
|
||||
)
|
||||
)
|
||||
response_time = StandardLoggingPayloadSetup.get_response_time(
|
||||
start_time_float=start_time_float,
|
||||
end_time_float=end_time_float,
|
||||
completion_start_time_float=completion_start_time_float,
|
||||
stream=kwargs.get("stream", False),
|
||||
)
|
||||
# clean up litellm hidden params
|
||||
clean_hidden_params = StandardLoggingPayloadSetup.get_hidden_params(
|
||||
hidden_params
|
||||
|
@ -2850,6 +2880,7 @@ def get_standard_logging_object_payload(
|
|||
startTime=start_time_float,
|
||||
endTime=end_time_float,
|
||||
completionStartTime=completion_start_time_float,
|
||||
response_time=response_time,
|
||||
model=kwargs.get("model", "") or "",
|
||||
metadata=clean_metadata,
|
||||
cache_key=clean_hidden_params["cache_key"],
|
||||
|
|
|
@ -2,40 +2,5 @@ model_list:
|
|||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: openai/gpt-4o
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
- model_name: anthropic/*
|
||||
litellm_params:
|
||||
model: anthropic/fake
|
||||
api_base: https://exampleanthropicendpoint-production.up.railway.app/
|
||||
|
||||
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["datadog"]
|
||||
|
||||
turn_off_message_logging: True
|
||||
|
||||
|
||||
router_settings:
|
||||
provider_budget_config:
|
||||
openai:
|
||||
budget_limit: 0.000000000001 # float of $ value budget for time period
|
||||
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
|
||||
azure:
|
||||
budget_limit: 100
|
||||
time_period: 1d
|
||||
anthropic:
|
||||
budget_limit: 100
|
||||
time_period: 10d
|
||||
vertex_ai:
|
||||
budget_limit: 100
|
||||
time_period: 12d
|
||||
gemini:
|
||||
budget_limit: 100
|
||||
time_period: 12d
|
||||
|
||||
# OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
|
||||
redis_host: os.environ/REDIS_HOST
|
||||
redis_port: os.environ/REDIS_PORT
|
||||
redis_password: os.environ/REDIS_PASSWORD
|
||||
|
||||
|
|
|
@ -1560,9 +1560,10 @@ class StandardLoggingPayload(TypedDict):
|
|||
total_tokens: int
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
startTime: float
|
||||
startTime: float # Note: making this camelCase was a mistake, everything should be snake case
|
||||
endTime: float
|
||||
completionStartTime: float
|
||||
response_time: float
|
||||
model_map_information: StandardLoggingModelInformation
|
||||
model: str
|
||||
model_id: Optional[str]
|
||||
|
|
|
@ -226,6 +226,7 @@ async def test_datadog_logging_http_request():
|
|||
|
||||
# Parse the 'message' field as JSON and check its structure
|
||||
message = json.loads(body[0]["message"])
|
||||
print("logged message", json.dumps(message, indent=4))
|
||||
|
||||
expected_message_fields = StandardLoggingPayload.__annotations__.keys()
|
||||
|
||||
|
|
|
@ -329,7 +329,6 @@ def test_get_final_response_obj():
|
|||
litellm.turn_off_message_logging = False
|
||||
|
||||
|
||||
|
||||
def test_truncate_standard_logging_payload():
|
||||
"""
|
||||
1. original messages, response, and error_str should NOT BE MODIFIED, since these are from kwargs
|
||||
|
@ -368,6 +367,7 @@ def test_truncate_standard_logging_payload():
|
|||
# assert len of error_str is less than 10_500
|
||||
assert len(str(standard_logging_payload["error_str"])) < 10_500
|
||||
|
||||
|
||||
def test_strip_trailing_slash():
|
||||
common_api_base = "https://api.test.com"
|
||||
assert (
|
||||
|
@ -379,3 +379,37 @@ def test_strip_trailing_slash():
|
|||
== common_api_base
|
||||
)
|
||||
|
||||
|
||||
def test_get_response_time():
|
||||
"""Test get_response_time with different streaming scenarios"""
|
||||
# Test case 1: Non-streaming response
|
||||
start_time = 1000.0
|
||||
end_time = 1005.0
|
||||
completion_start_time = 1003.0
|
||||
stream = False
|
||||
|
||||
response_time = StandardLoggingPayloadSetup.get_response_time(
|
||||
start_time_float=start_time,
|
||||
end_time_float=end_time,
|
||||
completion_start_time_float=completion_start_time,
|
||||
stream=stream,
|
||||
)
|
||||
|
||||
# For non-streaming, should return end_time - start_time
|
||||
assert response_time == 5.0
|
||||
|
||||
# Test case 2: Streaming response
|
||||
start_time = 1000.0
|
||||
end_time = 1010.0
|
||||
completion_start_time = 1002.0
|
||||
stream = True
|
||||
|
||||
response_time = StandardLoggingPayloadSetup.get_response_time(
|
||||
start_time_float=start_time,
|
||||
end_time_float=end_time,
|
||||
completion_start_time_float=completion_start_time,
|
||||
stream=stream,
|
||||
)
|
||||
|
||||
# For streaming, should return completion_start_time - start_time
|
||||
assert response_time == 2.0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue