feat(realtime/): add token tracking + log usage object in spend logs … (#9843)

* feat(realtime/): add token tracking + log usage object in spend logs metadata

* test: fix test

* test: update tests

* test: update testing

* test: update test

* test: update test

* test: update test

* test: update test

* test: update tesdt

* test: update test
This commit is contained in:
Krish Dholakia 2025-04-09 22:11:00 -07:00 committed by GitHub
parent 87733c8193
commit 0c5b4aa96d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 313 additions and 41 deletions

View file

@ -1149,8 +1149,128 @@ def batch_cost_calculator(
return total_prompt_cost, total_completion_cost
class RealtimeAPITokenUsageProcessor:
@staticmethod
def collect_usage_from_realtime_stream_results(
results: OpenAIRealtimeStreamList,
) -> List[Usage]:
"""
Collect usage from realtime stream results
"""
response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
List[OpenAIRealtimeStreamResponseBaseObject],
[result for result in results if result["type"] == "response.done"],
)
usage_objects: List[Usage] = []
for result in response_done_events:
usage_object = (
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
result["response"].get("usage", {})
)
)
usage_objects.append(usage_object)
return usage_objects
@staticmethod
def combine_usage_objects(usage_objects: List[Usage]) -> Usage:
"""
Combine multiple Usage objects into a single Usage object, checking model keys for nested values.
"""
from litellm.types.utils import (
CompletionTokensDetails,
PromptTokensDetailsWrapper,
Usage,
)
combined = Usage()
# Sum basic token counts
for usage in usage_objects:
# Handle direct attributes by checking what exists in the model
for attr in dir(usage):
if not attr.startswith("_") and not callable(getattr(usage, attr)):
current_val = getattr(combined, attr, 0)
new_val = getattr(usage, attr, 0)
if (
new_val is not None
and isinstance(new_val, (int, float))
and isinstance(current_val, (int, float))
):
setattr(combined, attr, current_val + new_val)
# Handle nested prompt_tokens_details
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
if (
not hasattr(combined, "prompt_tokens_details")
or not combined.prompt_tokens_details
):
combined.prompt_tokens_details = PromptTokensDetailsWrapper()
# Check what keys exist in the model's prompt_tokens_details
for attr in dir(usage.prompt_tokens_details):
if not attr.startswith("_") and not callable(
getattr(usage.prompt_tokens_details, attr)
):
current_val = getattr(combined.prompt_tokens_details, attr, 0)
new_val = getattr(usage.prompt_tokens_details, attr, 0)
if new_val is not None:
setattr(
combined.prompt_tokens_details,
attr,
current_val + new_val,
)
# Handle nested completion_tokens_details
if (
hasattr(usage, "completion_tokens_details")
and usage.completion_tokens_details
):
if (
not hasattr(combined, "completion_tokens_details")
or not combined.completion_tokens_details
):
combined.completion_tokens_details = CompletionTokensDetails()
# Check what keys exist in the model's completion_tokens_details
for attr in dir(usage.completion_tokens_details):
if not attr.startswith("_") and not callable(
getattr(usage.completion_tokens_details, attr)
):
current_val = getattr(
combined.completion_tokens_details, attr, 0
)
new_val = getattr(usage.completion_tokens_details, attr, 0)
if new_val is not None:
setattr(
combined.completion_tokens_details,
attr,
current_val + new_val,
)
return combined
@staticmethod
def collect_and_combine_usage_from_realtime_stream_results(
results: OpenAIRealtimeStreamList,
) -> Usage:
"""
Collect and combine usage from realtime stream results
"""
collected_usage_objects = (
RealtimeAPITokenUsageProcessor.collect_usage_from_realtime_stream_results(
results
)
)
combined_usage_object = RealtimeAPITokenUsageProcessor.combine_usage_objects(
collected_usage_objects
)
return combined_usage_object
def handle_realtime_stream_cost_calculation(
results: OpenAIRealtimeStreamList, custom_llm_provider: str, litellm_model_name: str
results: OpenAIRealtimeStreamList,
combined_usage_object: Usage,
custom_llm_provider: str,
litellm_model_name: str,
) -> float:
"""
Handles the cost calculation for realtime stream responses.
@ -1160,10 +1280,6 @@ def handle_realtime_stream_cost_calculation(
Args:
results: A list of OpenAIRealtimeStreamBaseObject objects
"""
response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
List[OpenAIRealtimeStreamResponseBaseObject],
[result for result in results if result["type"] == "response.done"],
)
received_model = None
potential_model_names = []
for result in results:
@ -1176,21 +1292,19 @@ def handle_realtime_stream_cost_calculation(
potential_model_names.append(litellm_model_name)
input_cost_per_token = 0.0
output_cost_per_token = 0.0
for result in response_done_events:
usage_object = (
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
result["response"].get("usage", {})
)
)
for model_name in potential_model_names:
for model_name in potential_model_names:
try:
_input_cost_per_token, _output_cost_per_token = generic_cost_per_token(
model=model_name,
usage=usage_object,
usage=combined_usage_object,
custom_llm_provider=custom_llm_provider,
)
input_cost_per_token += _input_cost_per_token
output_cost_per_token += _output_cost_per_token
except Exception:
continue
input_cost_per_token += _input_cost_per_token
output_cost_per_token += _output_cost_per_token
break # exit if we find a valid model
total_cost = input_cost_per_token + output_cost_per_token
return total_cost

View file

@ -33,6 +33,7 @@ from litellm.constants import (
DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT,
)
from litellm.cost_calculator import (
RealtimeAPITokenUsageProcessor,
_select_model_name_for_cost_calc,
handle_realtime_stream_cost_calculation,
)
@ -1054,11 +1055,18 @@ class Logging(LiteLLMLoggingBaseClass):
## else set cost to None
if self.call_type == CallTypes.arealtime.value and isinstance(result, list):
combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
results=result
)
self.model_call_details[
"response_cost"
] = handle_realtime_stream_cost_calculation(
result, self.custom_llm_provider, self.model
results=result,
combined_usage_object=combined_usage_object,
custom_llm_provider=self.custom_llm_provider,
litellm_model_name=self.model,
)
self.model_call_details["combined_usage_object"] = combined_usage_object
if (
standard_logging_object is None
and result is not None
@ -3132,6 +3140,7 @@ class StandardLoggingPayloadSetup:
prompt_integration: Optional[str] = None,
applied_guardrails: Optional[List[str]] = None,
mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
usage_object: Optional[dict] = None,
) -> StandardLoggingMetadata:
"""
Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
@ -3179,6 +3188,7 @@ class StandardLoggingPayloadSetup:
prompt_management_metadata=prompt_management_metadata,
applied_guardrails=applied_guardrails,
mcp_tool_call_metadata=mcp_tool_call_metadata,
usage_object=usage_object,
)
if isinstance(metadata, dict):
# Filter the metadata dictionary to include only the specified keys
@ -3204,8 +3214,12 @@ class StandardLoggingPayloadSetup:
return clean_metadata
@staticmethod
def get_usage_from_response_obj(response_obj: Optional[dict]) -> Usage:
def get_usage_from_response_obj(
response_obj: Optional[dict], combined_usage_object: Optional[Usage] = None
) -> Usage:
## BASE CASE ##
if combined_usage_object is not None:
return combined_usage_object
if response_obj is None:
return Usage(
prompt_tokens=0,
@ -3334,6 +3348,7 @@ class StandardLoggingPayloadSetup:
litellm_overhead_time_ms=None,
batch_models=None,
litellm_model_name=None,
usage_object=None,
)
if hidden_params is not None:
for key in StandardLoggingHiddenParams.__annotations__.keys():
@ -3450,6 +3465,7 @@ def get_standard_logging_object_payload(
litellm_overhead_time_ms=None,
batch_models=None,
litellm_model_name=None,
usage_object=None,
)
)
@ -3466,8 +3482,12 @@ def get_standard_logging_object_payload(
call_type = kwargs.get("call_type")
cache_hit = kwargs.get("cache_hit", False)
usage = StandardLoggingPayloadSetup.get_usage_from_response_obj(
response_obj=response_obj
response_obj=response_obj,
combined_usage_object=cast(
Optional[Usage], kwargs.get("combined_usage_object")
),
)
id = response_obj.get("id", kwargs.get("litellm_call_id"))
_model_id = metadata.get("model_info", {}).get("id", "")
@ -3506,6 +3526,7 @@ def get_standard_logging_object_payload(
prompt_integration=kwargs.get("prompt_integration", None),
applied_guardrails=kwargs.get("applied_guardrails", None),
mcp_tool_call_metadata=kwargs.get("mcp_tool_call_metadata", None),
usage_object=usage.model_dump(),
)
_request_body = proxy_server_request.get("body", {})
@ -3646,6 +3667,7 @@ def get_standard_logging_metadata(
prompt_management_metadata=None,
applied_guardrails=None,
mcp_tool_call_metadata=None,
usage_object=None,
)
if isinstance(metadata, dict):
# Filter the metadata dictionary to include only the specified keys
@ -3740,6 +3762,7 @@ def create_dummy_standard_logging_payload() -> StandardLoggingPayload:
litellm_overhead_time_ms=None,
batch_models=None,
litellm_model_name=None,
usage_object=None,
)
# Convert numeric values to appropriate types

View file

@ -28,6 +28,7 @@ from litellm.types.utils import (
ProviderField,
StandardCallbackDynamicParams,
StandardLoggingMCPToolCall,
StandardLoggingModelInformation,
StandardLoggingPayloadErrorInformation,
StandardLoggingPayloadStatus,
StandardPassThroughResponseObject,
@ -1936,6 +1937,8 @@ class SpendLogsMetadata(TypedDict):
proxy_server_request: Optional[str]
batch_models: Optional[List[str]]
error_information: Optional[StandardLoggingPayloadErrorInformation]
usage_object: Optional[dict]
model_map_information: Optional[StandardLoggingModelInformation]
class SpendLogsPayload(TypedDict):

View file

@ -13,7 +13,11 @@ from litellm._logging import verbose_proxy_logger
from litellm.litellm_core_utils.core_helpers import get_litellm_metadata_from_kwargs
from litellm.proxy._types import SpendLogsMetadata, SpendLogsPayload
from litellm.proxy.utils import PrismaClient, hash_token
from litellm.types.utils import StandardLoggingMCPToolCall, StandardLoggingPayload
from litellm.types.utils import (
StandardLoggingMCPToolCall,
StandardLoggingModelInformation,
StandardLoggingPayload,
)
from litellm.utils import get_end_user_id_for_cost_tracking
@ -39,6 +43,8 @@ def _get_spend_logs_metadata(
applied_guardrails: Optional[List[str]] = None,
batch_models: Optional[List[str]] = None,
mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
usage_object: Optional[dict] = None,
model_map_information: Optional[StandardLoggingModelInformation] = None,
) -> SpendLogsMetadata:
if metadata is None:
return SpendLogsMetadata(
@ -57,6 +63,8 @@ def _get_spend_logs_metadata(
proxy_server_request=None,
batch_models=None,
mcp_tool_call_metadata=None,
model_map_information=None,
usage_object=None,
)
verbose_proxy_logger.debug(
"getting payload for SpendLogs, available keys in metadata: "
@ -74,6 +82,8 @@ def _get_spend_logs_metadata(
clean_metadata["applied_guardrails"] = applied_guardrails
clean_metadata["batch_models"] = batch_models
clean_metadata["mcp_tool_call_metadata"] = mcp_tool_call_metadata
clean_metadata["usage_object"] = usage_object
clean_metadata["model_map_information"] = model_map_information
return clean_metadata
@ -153,6 +163,17 @@ def get_logging_payload( # noqa: PLR0915
api_key = metadata.get("user_api_key", "")
standard_logging_prompt_tokens: int = 0
standard_logging_completion_tokens: int = 0
standard_logging_total_tokens: int = 0
if standard_logging_payload is not None:
standard_logging_prompt_tokens = standard_logging_payload.get(
"prompt_tokens", 0
)
standard_logging_completion_tokens = standard_logging_payload.get(
"completion_tokens", 0
)
standard_logging_total_tokens = standard_logging_payload.get("total_tokens", 0)
if api_key is not None and isinstance(api_key, str):
if api_key.startswith("sk-"):
# hash the api_key
@ -208,6 +229,16 @@ def get_logging_payload( # noqa: PLR0915
if standard_logging_payload is not None
else None
),
usage_object=(
standard_logging_payload["metadata"].get("usage_object", None)
if standard_logging_payload is not None
else None
),
model_map_information=(
standard_logging_payload["model_map_information"]
if standard_logging_payload is not None
else None
),
)
special_usage_fields = ["completion_tokens", "prompt_tokens", "total_tokens"]
@ -227,6 +258,7 @@ def get_logging_payload( # noqa: PLR0915
import time
id = f"{id}_cache_hit{time.time()}" # SpendLogs does not allow duplicate request_id
try:
payload: SpendLogsPayload = SpendLogsPayload(
request_id=str(id),
@ -242,9 +274,11 @@ def get_logging_payload( # noqa: PLR0915
metadata=json.dumps(clean_metadata),
cache_key=cache_key,
spend=kwargs.get("response_cost", 0),
total_tokens=usage.get("total_tokens", 0),
prompt_tokens=usage.get("prompt_tokens", 0),
completion_tokens=usage.get("completion_tokens", 0),
total_tokens=usage.get("total_tokens", standard_logging_total_tokens),
prompt_tokens=usage.get("prompt_tokens", standard_logging_prompt_tokens),
completion_tokens=usage.get(
"completion_tokens", standard_logging_completion_tokens
),
request_tags=request_tags,
end_user=end_user_id or "",
api_base=litellm_params.get("api_base", ""),

View file

@ -1709,6 +1709,7 @@ class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata):
prompt_management_metadata: Optional[StandardLoggingPromptManagementMetadata]
mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall]
applied_guardrails: Optional[List[str]]
usage_object: Optional[dict]
class StandardLoggingAdditionalHeaders(TypedDict, total=False):
@ -1729,6 +1730,7 @@ class StandardLoggingHiddenParams(TypedDict):
additional_headers: Optional[StandardLoggingAdditionalHeaders]
batch_models: Optional[List[str]]
litellm_model_name: Optional[str] # the model name sent to the provider by litellm
usage_object: Optional[dict]
class StandardLoggingModelInformation(TypedDict):

View file

@ -457,7 +457,7 @@ class TestSpendLogsPayload:
"model": "gpt-4o",
"user": "",
"team_id": "",
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
"cache_key": "Cache OFF",
"spend": 0.00022500000000000002,
"total_tokens": 30,
@ -555,7 +555,7 @@ class TestSpendLogsPayload:
"model": "claude-3-7-sonnet-20250219",
"user": "",
"team_id": "",
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
"cache_key": "Cache OFF",
"spend": 0.01383,
"total_tokens": 2598,
@ -651,7 +651,7 @@ class TestSpendLogsPayload:
"model": "claude-3-7-sonnet-20250219",
"user": "",
"team_id": "",
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
"metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
"cache_key": "Cache OFF",
"spend": 0.01383,
"total_tokens": 2598,

View file

@ -78,6 +78,8 @@ def test_cost_calculator_with_usage():
def test_handle_realtime_stream_cost_calculation():
from litellm.cost_calculator import RealtimeAPITokenUsageProcessor
# Setup test data
results: OpenAIRealtimeStreamList = [
{"type": "session.created", "session": {"model": "gpt-3.5-turbo"}},
@ -99,9 +101,14 @@ def test_handle_realtime_stream_cost_calculation():
},
]
combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
results=results,
)
# Test with explicit model name
cost = handle_realtime_stream_cost_calculation(
results=results,
combined_usage_object=combined_usage_object,
custom_llm_provider="openai",
litellm_model_name="gpt-3.5-turbo",
)
@ -117,8 +124,10 @@ def test_handle_realtime_stream_cost_calculation():
# Test with different model name in session
results[0]["session"]["model"] = "gpt-4"
cost = handle_realtime_stream_cost_calculation(
results=results,
combined_usage_object=combined_usage_object,
custom_llm_provider="openai",
litellm_model_name="gpt-3.5-turbo",
)
@ -132,8 +141,12 @@ def test_handle_realtime_stream_cost_calculation():
# Test with no response.done events
results = [{"type": "session.created", "session": {"model": "gpt-3.5-turbo"}}]
combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
results=results,
)
cost = handle_realtime_stream_cost_calculation(
results=results,
combined_usage_object=combined_usage_object,
custom_llm_provider="openai",
litellm_model_name="gpt-3.5-turbo",
)

View file

@ -9,7 +9,7 @@
"model": "gpt-4o",
"user": "",
"team_id": "",
"metadata": "{\"applied_guardrails\": [], \"batch_models\": null, \"mcp_tool_call_metadata\": null, \"additional_usage_values\": {\"completion_tokens_details\": null, \"prompt_tokens_details\": null}}",
"metadata": "{\"applied_guardrails\": [], \"batch_models\": null, \"mcp_tool_call_metadata\": null, \"usage_object\": {\"completion_tokens\": 20, \"prompt_tokens\": 10, \"total_tokens\": 30, \"completion_tokens_details\": null, \"prompt_tokens_details\": null}, \"model_map_information\": {\"model_map_key\": \"gpt-4o\", \"model_map_value\": {\"key\": \"gpt-4o\", \"max_tokens\": 16384, \"max_input_tokens\": 128000, \"max_output_tokens\": 16384, \"input_cost_per_token\": 2.5e-06, \"cache_creation_input_token_cost\": null, \"cache_read_input_token_cost\": 1.25e-06, \"input_cost_per_character\": null, \"input_cost_per_token_above_128k_tokens\": null, \"input_cost_per_query\": null, \"input_cost_per_second\": null, \"input_cost_per_audio_token\": null, \"input_cost_per_token_batches\": 1.25e-06, \"output_cost_per_token_batches\": 5e-06, \"output_cost_per_token\": 1e-05, \"output_cost_per_audio_token\": null, \"output_cost_per_character\": null, \"output_cost_per_token_above_128k_tokens\": null, \"output_cost_per_character_above_128k_tokens\": null, \"output_cost_per_second\": null, \"output_cost_per_image\": null, \"output_vector_size\": null, \"litellm_provider\": \"openai\", \"mode\": \"chat\", \"supports_system_messages\": true, \"supports_response_schema\": true, \"supports_vision\": true, \"supports_function_calling\": true, \"supports_tool_choice\": true, \"supports_assistant_prefill\": false, \"supports_prompt_caching\": true, \"supports_audio_input\": false, \"supports_audio_output\": false, \"supports_pdf_input\": false, \"supports_embedding_image_input\": false, \"supports_native_streaming\": null, \"supports_web_search\": true, \"search_context_cost_per_query\": {\"search_context_size_low\": 0.03, \"search_context_size_medium\": 0.035, \"search_context_size_high\": 0.05}, \"tpm\": null, \"rpm\": null, \"supported_openai_params\": [\"frequency_penalty\", \"logit_bias\", \"logprobs\", \"top_logprobs\", \"max_tokens\", \"max_completion_tokens\", \"modalities\", \"prediction\", \"n\", \"presence_penalty\", \"seed\", \"stop\", \"stream\", \"stream_options\", \"temperature\", \"top_p\", \"tools\", \"tool_choice\", \"function_call\", \"functions\", \"max_retries\", \"extra_headers\", \"parallel_tool_calls\", \"audio\", \"response_format\", \"user\"]}}, \"additional_usage_values\": {\"completion_tokens_details\": null, \"prompt_tokens_details\": null}}",
"cache_key": "Cache OFF",
"spend": 0.00022500000000000002,
"total_tokens": 30,

View file

@ -6,10 +6,10 @@ plugins: snapshot-0.9.0, cov-5.0.0, timeout-2.2.0, postgresql-7.0.1, respx-0.21.
asyncio: mode=Mode.STRICT
collecting ... collected 4 items
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config0-search_context_size_low-True] PASSED [ 25%]
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config1-search_context_size_low-False] PASSED [ 50%]
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config2-search_context_size_medium-True] PASSED [ 75%]
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config3-search_context_size_medium-False] PASSED [100%]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False] PASSED [ 25%]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-True] PASSED [ 50%]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-False] PASSED [ 75%]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-True] PASSED [100%]
=============================== warnings summary ===============================
../../../../../../Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pydantic/_internal/_config.py:295
@ -17,10 +17,10 @@ test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_
warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)
../../litellm/litellm_core_utils/get_model_cost_map.py:24
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config0-search_context_size_low-True]
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config1-search_context_size_low-False]
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config2-search_context_size_medium-True]
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config3-search_context_size_medium-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-True]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-True]
/Users/krrishdholakia/Documents/litellm/litellm/litellm_core_utils/get_model_cost_map.py:24: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
with importlib.resources.open_text(
@ -28,12 +28,85 @@ test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_
/Users/krrishdholakia/Documents/litellm/litellm/utils.py:183: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
with resources.open_text(
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config0-search_context_size_low-True]
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config1-search_context_size_low-False]
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config2-search_context_size_medium-True]
test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config3-search_context_size_medium-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/httpx/_content.py:204: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
warnings.warn(message, DeprecationWarning)
test_otel_logging.py:145
/Users/krrishdholakia/Documents/litellm/tests/logging_callback_tests/test_otel_logging.py:145: PytestUnknownMarkWarning: Unknown pytest.mark.flaky - is this a typo? You can register custom marks to avoid this warning - for details, see https://docs.pytest.org/en/stable/how-to/mark.html
@pytest.mark.flaky(retries=6, delay=2)
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/google/rpc/__init__.py:18: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html
import pkg_resources
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
declare_namespace(pkg)
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.cloud')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
declare_namespace(pkg)
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2348: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
declare_namespace(parent)
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.logging')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
declare_namespace(pkg)
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.iam')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
declare_namespace(pkg)
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('mpl_toolkits')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
declare_namespace(pkg)
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('sphinxcontrib')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
declare_namespace(pkg)
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('testing')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
declare_namespace(pkg)
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('testing.common')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
declare_namespace(pkg)
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2348: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('testing')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
declare_namespace(parent)
test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/google/rpc/__init__.py:20: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.rpc')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
pkg_resources.declare_namespace(__name__)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
======================= 4 passed, 11 warnings in 18.95s ========================
======================== 4 passed, 37 warnings in 5.63s ========================

View file

@ -62,6 +62,7 @@ def assert_gcs_pubsub_request_matches_expected(
actual_request_body[field] = expected_request_body[field]
# Assert the entire request body matches
print("actual_request_body", actual_request_body)
assert (
actual_request_body == expected_request_body
), f"Difference in request bodies: {json.dumps(actual_request_body, indent=2)} != {json.dumps(expected_request_body, indent=2)}"

View file

@ -268,6 +268,7 @@ def validate_redacted_message_span_attributes(span):
"metadata.requester_metadata",
"metadata.user_api_key_team_id",
"metadata.spend_logs_metadata",
"metadata.usage_object",
"metadata.user_api_key_alias",
"metadata.user_api_key_user_id",
"metadata.user_api_key_org_id",

View file

@ -178,6 +178,10 @@ def test_spend_logs_payload(model_id: Optional[str]):
"metadata": {
"user_api_key_end_user_id": "test-user",
},
"model_map_information": {
"tpm": 1000,
"rpm": 1000,
},
},
},
"response_obj": litellm.ModelResponse(
@ -357,6 +361,10 @@ def test_spend_logs_payload_with_prompts_enabled(monkeypatch):
"user_api_key_end_user_id": "test-user",
},
"request_tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"],
"model_map_information": {
"tpm": 1000,
"rpm": 1000,
},
}
litellm_params = {
"proxy_server_request": {