mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
(feat) add response_time
to StandardLoggingPayload - logged on datadog
, gcs_bucket
, s3_bucket
etc (#7199)
* feat - add response_time to slp * test_get_response_time * docs slp * fix test_datadog_logging_http_request
This commit is contained in:
parent
aa7f416b7f
commit
153ab055d6
6 changed files with 73 additions and 40 deletions
|
@ -127,9 +127,10 @@ class StandardLoggingPayload(TypedDict):
|
||||||
total_tokens: int
|
total_tokens: int
|
||||||
prompt_tokens: int
|
prompt_tokens: int
|
||||||
completion_tokens: int
|
completion_tokens: int
|
||||||
startTime: float
|
startTime: float # Note: making this camelCase was a mistake, everything should be snake case
|
||||||
endTime: float
|
endTime: float
|
||||||
completionStartTime: float
|
completionStartTime: float # time the first token of the LLM response is returned (for streaming responses)
|
||||||
|
response_time: float # time the LLM takes to respond (for streaming uses time to first token)
|
||||||
model_map_information: StandardLoggingModelInformation
|
model_map_information: StandardLoggingModelInformation
|
||||||
model: str
|
model: str
|
||||||
model_id: Optional[str]
|
model_id: Optional[str]
|
||||||
|
|
|
@ -2729,6 +2729,30 @@ class StandardLoggingPayloadSetup:
|
||||||
return api_base.rstrip("/")
|
return api_base.rstrip("/")
|
||||||
return api_base
|
return api_base
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_response_time(
|
||||||
|
start_time_float: float,
|
||||||
|
end_time_float: float,
|
||||||
|
completion_start_time_float: float,
|
||||||
|
stream: bool,
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Get the response time for the LLM response
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_time_float: float - start time of the LLM call
|
||||||
|
end_time_float: float - end time of the LLM call
|
||||||
|
completion_start_time_float: float - time to first token of the LLM response (for streaming responses)
|
||||||
|
stream: bool - True when a stream response is returned
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: The response time for the LLM response
|
||||||
|
"""
|
||||||
|
if stream is True:
|
||||||
|
return completion_start_time_float - start_time_float
|
||||||
|
else:
|
||||||
|
return end_time_float - start_time_float
|
||||||
|
|
||||||
|
|
||||||
def get_standard_logging_object_payload(
|
def get_standard_logging_object_payload(
|
||||||
kwargs: Optional[dict],
|
kwargs: Optional[dict],
|
||||||
|
@ -2802,6 +2826,12 @@ def get_standard_logging_object_payload(
|
||||||
completion_start_time=completion_start_time,
|
completion_start_time=completion_start_time,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
response_time = StandardLoggingPayloadSetup.get_response_time(
|
||||||
|
start_time_float=start_time_float,
|
||||||
|
end_time_float=end_time_float,
|
||||||
|
completion_start_time_float=completion_start_time_float,
|
||||||
|
stream=kwargs.get("stream", False),
|
||||||
|
)
|
||||||
# clean up litellm hidden params
|
# clean up litellm hidden params
|
||||||
clean_hidden_params = StandardLoggingPayloadSetup.get_hidden_params(
|
clean_hidden_params = StandardLoggingPayloadSetup.get_hidden_params(
|
||||||
hidden_params
|
hidden_params
|
||||||
|
@ -2850,6 +2880,7 @@ def get_standard_logging_object_payload(
|
||||||
startTime=start_time_float,
|
startTime=start_time_float,
|
||||||
endTime=end_time_float,
|
endTime=end_time_float,
|
||||||
completionStartTime=completion_start_time_float,
|
completionStartTime=completion_start_time_float,
|
||||||
|
response_time=response_time,
|
||||||
model=kwargs.get("model", "") or "",
|
model=kwargs.get("model", "") or "",
|
||||||
metadata=clean_metadata,
|
metadata=clean_metadata,
|
||||||
cache_key=clean_hidden_params["cache_key"],
|
cache_key=clean_hidden_params["cache_key"],
|
||||||
|
|
|
@ -2,40 +2,5 @@ model_list:
|
||||||
- model_name: gpt-4o
|
- model_name: gpt-4o
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/gpt-4o
|
model: openai/gpt-4o
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
||||||
- model_name: anthropic/*
|
|
||||||
litellm_params:
|
|
||||||
model: anthropic/fake
|
|
||||||
api_base: https://exampleanthropicendpoint-production.up.railway.app/
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
callbacks: ["datadog"]
|
callbacks: ["datadog"]
|
||||||
|
|
||||||
turn_off_message_logging: True
|
|
||||||
|
|
||||||
|
|
||||||
router_settings:
|
|
||||||
provider_budget_config:
|
|
||||||
openai:
|
|
||||||
budget_limit: 0.000000000001 # float of $ value budget for time period
|
|
||||||
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
|
|
||||||
azure:
|
|
||||||
budget_limit: 100
|
|
||||||
time_period: 1d
|
|
||||||
anthropic:
|
|
||||||
budget_limit: 100
|
|
||||||
time_period: 10d
|
|
||||||
vertex_ai:
|
|
||||||
budget_limit: 100
|
|
||||||
time_period: 12d
|
|
||||||
gemini:
|
|
||||||
budget_limit: 100
|
|
||||||
time_period: 12d
|
|
||||||
|
|
||||||
# OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
|
|
||||||
redis_host: os.environ/REDIS_HOST
|
|
||||||
redis_port: os.environ/REDIS_PORT
|
|
||||||
redis_password: os.environ/REDIS_PASSWORD
|
|
||||||
|
|
||||||
|
|
|
@ -1560,9 +1560,10 @@ class StandardLoggingPayload(TypedDict):
|
||||||
total_tokens: int
|
total_tokens: int
|
||||||
prompt_tokens: int
|
prompt_tokens: int
|
||||||
completion_tokens: int
|
completion_tokens: int
|
||||||
startTime: float
|
startTime: float # Note: making this camelCase was a mistake, everything should be snake case
|
||||||
endTime: float
|
endTime: float
|
||||||
completionStartTime: float
|
completionStartTime: float
|
||||||
|
response_time: float
|
||||||
model_map_information: StandardLoggingModelInformation
|
model_map_information: StandardLoggingModelInformation
|
||||||
model: str
|
model: str
|
||||||
model_id: Optional[str]
|
model_id: Optional[str]
|
||||||
|
|
|
@ -226,6 +226,7 @@ async def test_datadog_logging_http_request():
|
||||||
|
|
||||||
# Parse the 'message' field as JSON and check its structure
|
# Parse the 'message' field as JSON and check its structure
|
||||||
message = json.loads(body[0]["message"])
|
message = json.loads(body[0]["message"])
|
||||||
|
print("logged message", json.dumps(message, indent=4))
|
||||||
|
|
||||||
expected_message_fields = StandardLoggingPayload.__annotations__.keys()
|
expected_message_fields = StandardLoggingPayload.__annotations__.keys()
|
||||||
|
|
||||||
|
|
|
@ -329,7 +329,6 @@ def test_get_final_response_obj():
|
||||||
litellm.turn_off_message_logging = False
|
litellm.turn_off_message_logging = False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_truncate_standard_logging_payload():
|
def test_truncate_standard_logging_payload():
|
||||||
"""
|
"""
|
||||||
1. original messages, response, and error_str should NOT BE MODIFIED, since these are from kwargs
|
1. original messages, response, and error_str should NOT BE MODIFIED, since these are from kwargs
|
||||||
|
@ -368,6 +367,7 @@ def test_truncate_standard_logging_payload():
|
||||||
# assert len of error_str is less than 10_500
|
# assert len of error_str is less than 10_500
|
||||||
assert len(str(standard_logging_payload["error_str"])) < 10_500
|
assert len(str(standard_logging_payload["error_str"])) < 10_500
|
||||||
|
|
||||||
|
|
||||||
def test_strip_trailing_slash():
|
def test_strip_trailing_slash():
|
||||||
common_api_base = "https://api.test.com"
|
common_api_base = "https://api.test.com"
|
||||||
assert (
|
assert (
|
||||||
|
@ -379,3 +379,37 @@ def test_strip_trailing_slash():
|
||||||
== common_api_base
|
== common_api_base
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_response_time():
|
||||||
|
"""Test get_response_time with different streaming scenarios"""
|
||||||
|
# Test case 1: Non-streaming response
|
||||||
|
start_time = 1000.0
|
||||||
|
end_time = 1005.0
|
||||||
|
completion_start_time = 1003.0
|
||||||
|
stream = False
|
||||||
|
|
||||||
|
response_time = StandardLoggingPayloadSetup.get_response_time(
|
||||||
|
start_time_float=start_time,
|
||||||
|
end_time_float=end_time,
|
||||||
|
completion_start_time_float=completion_start_time,
|
||||||
|
stream=stream,
|
||||||
|
)
|
||||||
|
|
||||||
|
# For non-streaming, should return end_time - start_time
|
||||||
|
assert response_time == 5.0
|
||||||
|
|
||||||
|
# Test case 2: Streaming response
|
||||||
|
start_time = 1000.0
|
||||||
|
end_time = 1010.0
|
||||||
|
completion_start_time = 1002.0
|
||||||
|
stream = True
|
||||||
|
|
||||||
|
response_time = StandardLoggingPayloadSetup.get_response_time(
|
||||||
|
start_time_float=start_time,
|
||||||
|
end_time_float=end_time,
|
||||||
|
completion_start_time_float=completion_start_time,
|
||||||
|
stream=stream,
|
||||||
|
)
|
||||||
|
|
||||||
|
# For streaming, should return completion_start_time - start_time
|
||||||
|
assert response_time == 2.0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue