feat(realtime/): add token tracking + log usage object in spend logs … (#9843)

* feat(realtime/): add token tracking + log usage object in spend logs metadata * test: fix test * test: update tests * test: update testing * test: update test * test: update test * test: update test * test: update test * test: update tesdt * test: update test
2025-04-24 18:24:20 +00:00 · 2025-04-09 22:11:00 -07:00 · 2025-04-09 22:11:00 -07:00 · 0c5b4aa96d
commit 0c5b4aa96d
parent 87733c8193
12 changed files with 313 additions and 41 deletions
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -1149,8 +1149,128 @@ def batch_cost_calculator(
    return total_prompt_cost, total_completion_cost


+class RealtimeAPITokenUsageProcessor:
+    @staticmethod
+    def collect_usage_from_realtime_stream_results(
+        results: OpenAIRealtimeStreamList,
+    ) -> List[Usage]:
+        """
+        Collect usage from realtime stream results
+        """
+        response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
+            List[OpenAIRealtimeStreamResponseBaseObject],
+            [result for result in results if result["type"] == "response.done"],
+        )
+        usage_objects: List[Usage] = []
+        for result in response_done_events:
+            usage_object = (
+                ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+                    result["response"].get("usage", {})
+                )
+            )
+            usage_objects.append(usage_object)
+        return usage_objects
+
+    @staticmethod
+    def combine_usage_objects(usage_objects: List[Usage]) -> Usage:
+        """
+        Combine multiple Usage objects into a single Usage object, checking model keys for nested values.
+        """
+        from litellm.types.utils import (
+            CompletionTokensDetails,
+            PromptTokensDetailsWrapper,
+            Usage,
+        )
+
+        combined = Usage()
+
+        # Sum basic token counts
+        for usage in usage_objects:
+            # Handle direct attributes by checking what exists in the model
+            for attr in dir(usage):
+                if not attr.startswith("_") and not callable(getattr(usage, attr)):
+                    current_val = getattr(combined, attr, 0)
+                    new_val = getattr(usage, attr, 0)
+                    if (
+                        new_val is not None
+                        and isinstance(new_val, (int, float))
+                        and isinstance(current_val, (int, float))
+                    ):
+                        setattr(combined, attr, current_val + new_val)
+            # Handle nested prompt_tokens_details
+            if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
+                if (
+                    not hasattr(combined, "prompt_tokens_details")
+                    or not combined.prompt_tokens_details
+                ):
+                    combined.prompt_tokens_details = PromptTokensDetailsWrapper()
+
+                # Check what keys exist in the model's prompt_tokens_details
+                for attr in dir(usage.prompt_tokens_details):
+                    if not attr.startswith("_") and not callable(
+                        getattr(usage.prompt_tokens_details, attr)
+                    ):
+                        current_val = getattr(combined.prompt_tokens_details, attr, 0)
+                        new_val = getattr(usage.prompt_tokens_details, attr, 0)
+                        if new_val is not None:
+                            setattr(
+                                combined.prompt_tokens_details,
+                                attr,
+                                current_val + new_val,
+                            )
+
+            # Handle nested completion_tokens_details
+            if (
+                hasattr(usage, "completion_tokens_details")
+                and usage.completion_tokens_details
+            ):
+                if (
+                    not hasattr(combined, "completion_tokens_details")
+                    or not combined.completion_tokens_details
+                ):
+                    combined.completion_tokens_details = CompletionTokensDetails()
+
+                # Check what keys exist in the model's completion_tokens_details
+                for attr in dir(usage.completion_tokens_details):
+                    if not attr.startswith("_") and not callable(
+                        getattr(usage.completion_tokens_details, attr)
+                    ):
+                        current_val = getattr(
+                            combined.completion_tokens_details, attr, 0
+                        )
+                        new_val = getattr(usage.completion_tokens_details, attr, 0)
+                        if new_val is not None:
+                            setattr(
+                                combined.completion_tokens_details,
+                                attr,
+                                current_val + new_val,
+                            )
+
+        return combined
+
+    @staticmethod
+    def collect_and_combine_usage_from_realtime_stream_results(
+        results: OpenAIRealtimeStreamList,
+    ) -> Usage:
+        """
+        Collect and combine usage from realtime stream results
+        """
+        collected_usage_objects = (
+            RealtimeAPITokenUsageProcessor.collect_usage_from_realtime_stream_results(
+                results
+            )
+        )
+        combined_usage_object = RealtimeAPITokenUsageProcessor.combine_usage_objects(
+            collected_usage_objects
+        )
+        return combined_usage_object
+
+
 def handle_realtime_stream_cost_calculation(
-    results: OpenAIRealtimeStreamList, custom_llm_provider: str, litellm_model_name: str
+    results: OpenAIRealtimeStreamList,
+    combined_usage_object: Usage,
+    custom_llm_provider: str,
+    litellm_model_name: str,
 ) -> float:
    """
    Handles the cost calculation for realtime stream responses.
@ -1160,10 +1280,6 @@ def handle_realtime_stream_cost_calculation(
    Args:
        results: A list of OpenAIRealtimeStreamBaseObject objects
    """
-    response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
-        List[OpenAIRealtimeStreamResponseBaseObject],
-        [result for result in results if result["type"] == "response.done"],
-    )
    received_model = None
    potential_model_names = []
    for result in results:
@ -1176,21 +1292,19 @@ def handle_realtime_stream_cost_calculation(
    potential_model_names.append(litellm_model_name)
    input_cost_per_token = 0.0
    output_cost_per_token = 0.0
-    for result in response_done_events:
-        usage_object = (
-            ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
-                result["response"].get("usage", {})
-            )
-        )

-        for model_name in potential_model_names:
+    for model_name in potential_model_names:
+        try:
            _input_cost_per_token, _output_cost_per_token = generic_cost_per_token(
                model=model_name,
-                usage=usage_object,
+                usage=combined_usage_object,
                custom_llm_provider=custom_llm_provider,
            )
-            input_cost_per_token += _input_cost_per_token
-            output_cost_per_token += _output_cost_per_token
+        except Exception:
+            continue
+        input_cost_per_token += _input_cost_per_token
+        output_cost_per_token += _output_cost_per_token
+        break  # exit if we find a valid model
    total_cost = input_cost_per_token + output_cost_per_token

    return total_cost
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -33,6 +33,7 @@ from litellm.constants import (
    DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT,
 )
 from litellm.cost_calculator import (
+    RealtimeAPITokenUsageProcessor,
    _select_model_name_for_cost_calc,
    handle_realtime_stream_cost_calculation,
 )
@ -1054,11 +1055,18 @@ class Logging(LiteLLMLoggingBaseClass):
            ## else set cost to None

            if self.call_type == CallTypes.arealtime.value and isinstance(result, list):
+                combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
+                    results=result
+                )
                self.model_call_details[
                    "response_cost"
                ] = handle_realtime_stream_cost_calculation(
-                    result, self.custom_llm_provider, self.model
+                    results=result,
+                    combined_usage_object=combined_usage_object,
+                    custom_llm_provider=self.custom_llm_provider,
+                    litellm_model_name=self.model,
                )
+                self.model_call_details["combined_usage_object"] = combined_usage_object
            if (
                standard_logging_object is None
                and result is not None
@ -3132,6 +3140,7 @@ class StandardLoggingPayloadSetup:
        prompt_integration: Optional[str] = None,
        applied_guardrails: Optional[List[str]] = None,
        mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
+        usage_object: Optional[dict] = None,
    ) -> StandardLoggingMetadata:
        """
        Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
@ -3179,6 +3188,7 @@ class StandardLoggingPayloadSetup:
            prompt_management_metadata=prompt_management_metadata,
            applied_guardrails=applied_guardrails,
            mcp_tool_call_metadata=mcp_tool_call_metadata,
+            usage_object=usage_object,
        )
        if isinstance(metadata, dict):
            # Filter the metadata dictionary to include only the specified keys
@ -3204,8 +3214,12 @@ class StandardLoggingPayloadSetup:
        return clean_metadata

    @staticmethod
-    def get_usage_from_response_obj(response_obj: Optional[dict]) -> Usage:
+    def get_usage_from_response_obj(
+        response_obj: Optional[dict], combined_usage_object: Optional[Usage] = None
+    ) -> Usage:
        ## BASE CASE ##
+        if combined_usage_object is not None:
+            return combined_usage_object
        if response_obj is None:
            return Usage(
                prompt_tokens=0,
@ -3334,6 +3348,7 @@ class StandardLoggingPayloadSetup:
            litellm_overhead_time_ms=None,
            batch_models=None,
            litellm_model_name=None,
+            usage_object=None,
        )
        if hidden_params is not None:
            for key in StandardLoggingHiddenParams.__annotations__.keys():
@ -3450,6 +3465,7 @@ def get_standard_logging_object_payload(
                        litellm_overhead_time_ms=None,
                        batch_models=None,
                        litellm_model_name=None,
+                        usage_object=None,
                    )
                )

@ -3466,8 +3482,12 @@ def get_standard_logging_object_payload(
        call_type = kwargs.get("call_type")
        cache_hit = kwargs.get("cache_hit", False)
        usage = StandardLoggingPayloadSetup.get_usage_from_response_obj(
-            response_obj=response_obj
+            response_obj=response_obj,
+            combined_usage_object=cast(
+                Optional[Usage], kwargs.get("combined_usage_object")
+            ),
        )
+
        id = response_obj.get("id", kwargs.get("litellm_call_id"))

        _model_id = metadata.get("model_info", {}).get("id", "")
@ -3506,6 +3526,7 @@ def get_standard_logging_object_payload(
            prompt_integration=kwargs.get("prompt_integration", None),
            applied_guardrails=kwargs.get("applied_guardrails", None),
            mcp_tool_call_metadata=kwargs.get("mcp_tool_call_metadata", None),
+            usage_object=usage.model_dump(),
        )

        _request_body = proxy_server_request.get("body", {})
@ -3646,6 +3667,7 @@ def get_standard_logging_metadata(
        prompt_management_metadata=None,
        applied_guardrails=None,
        mcp_tool_call_metadata=None,
+        usage_object=None,
    )
    if isinstance(metadata, dict):
        # Filter the metadata dictionary to include only the specified keys
@ -3740,6 +3762,7 @@ def create_dummy_standard_logging_payload() -> StandardLoggingPayload:
        litellm_overhead_time_ms=None,
        batch_models=None,
        litellm_model_name=None,
+        usage_object=None,
    )

    # Convert numeric values to appropriate types
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -28,6 +28,7 @@ from litellm.types.utils import (
    ProviderField,
    StandardCallbackDynamicParams,
    StandardLoggingMCPToolCall,
+    StandardLoggingModelInformation,
    StandardLoggingPayloadErrorInformation,
    StandardLoggingPayloadStatus,
    StandardPassThroughResponseObject,
@ -1936,6 +1937,8 @@ class SpendLogsMetadata(TypedDict):
    proxy_server_request: Optional[str]
    batch_models: Optional[List[str]]
    error_information: Optional[StandardLoggingPayloadErrorInformation]
+    usage_object: Optional[dict]
+    model_map_information: Optional[StandardLoggingModelInformation]


 class SpendLogsPayload(TypedDict):
--- a/litellm/proxy/spend_tracking/spend_tracking_utils.py
+++ b/litellm/proxy/spend_tracking/spend_tracking_utils.py
@ -13,7 +13,11 @@ from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.core_helpers import get_litellm_metadata_from_kwargs
 from litellm.proxy._types import SpendLogsMetadata, SpendLogsPayload
 from litellm.proxy.utils import PrismaClient, hash_token
-from litellm.types.utils import StandardLoggingMCPToolCall, StandardLoggingPayload
+from litellm.types.utils import (
+    StandardLoggingMCPToolCall,
+    StandardLoggingModelInformation,
+    StandardLoggingPayload,
+)
 from litellm.utils import get_end_user_id_for_cost_tracking


@ -39,6 +43,8 @@ def _get_spend_logs_metadata(
    applied_guardrails: Optional[List[str]] = None,
    batch_models: Optional[List[str]] = None,
    mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall] = None,
+    usage_object: Optional[dict] = None,
+    model_map_information: Optional[StandardLoggingModelInformation] = None,
 ) -> SpendLogsMetadata:
    if metadata is None:
        return SpendLogsMetadata(
@ -57,6 +63,8 @@ def _get_spend_logs_metadata(
            proxy_server_request=None,
            batch_models=None,
            mcp_tool_call_metadata=None,
+            model_map_information=None,
+            usage_object=None,
        )
    verbose_proxy_logger.debug(
        "getting payload for SpendLogs, available keys in metadata: "
@ -74,6 +82,8 @@ def _get_spend_logs_metadata(
    clean_metadata["applied_guardrails"] = applied_guardrails
    clean_metadata["batch_models"] = batch_models
    clean_metadata["mcp_tool_call_metadata"] = mcp_tool_call_metadata
+    clean_metadata["usage_object"] = usage_object
+    clean_metadata["model_map_information"] = model_map_information
    return clean_metadata


@ -153,6 +163,17 @@ def get_logging_payload(  # noqa: PLR0915

    api_key = metadata.get("user_api_key", "")

+    standard_logging_prompt_tokens: int = 0
+    standard_logging_completion_tokens: int = 0
+    standard_logging_total_tokens: int = 0
+    if standard_logging_payload is not None:
+        standard_logging_prompt_tokens = standard_logging_payload.get(
+            "prompt_tokens", 0
+        )
+        standard_logging_completion_tokens = standard_logging_payload.get(
+            "completion_tokens", 0
+        )
+        standard_logging_total_tokens = standard_logging_payload.get("total_tokens", 0)
    if api_key is not None and isinstance(api_key, str):
        if api_key.startswith("sk-"):
            # hash the api_key
@ -208,6 +229,16 @@ def get_logging_payload(  # noqa: PLR0915
            if standard_logging_payload is not None
            else None
        ),
+        usage_object=(
+            standard_logging_payload["metadata"].get("usage_object", None)
+            if standard_logging_payload is not None
+            else None
+        ),
+        model_map_information=(
+            standard_logging_payload["model_map_information"]
+            if standard_logging_payload is not None
+            else None
+        ),
    )

    special_usage_fields = ["completion_tokens", "prompt_tokens", "total_tokens"]
@ -227,6 +258,7 @@ def get_logging_payload(  # noqa: PLR0915
        import time

        id = f"{id}_cache_hit{time.time()}"  # SpendLogs does not allow duplicate request_id
+
    try:
        payload: SpendLogsPayload = SpendLogsPayload(
            request_id=str(id),
@ -242,9 +274,11 @@ def get_logging_payload(  # noqa: PLR0915
            metadata=json.dumps(clean_metadata),
            cache_key=cache_key,
            spend=kwargs.get("response_cost", 0),
-            total_tokens=usage.get("total_tokens", 0),
-            prompt_tokens=usage.get("prompt_tokens", 0),
-            completion_tokens=usage.get("completion_tokens", 0),
+            total_tokens=usage.get("total_tokens", standard_logging_total_tokens),
+            prompt_tokens=usage.get("prompt_tokens", standard_logging_prompt_tokens),
+            completion_tokens=usage.get(
+                "completion_tokens", standard_logging_completion_tokens
+            ),
            request_tags=request_tags,
            end_user=end_user_id or "",
            api_base=litellm_params.get("api_base", ""),
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -1709,6 +1709,7 @@ class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata):
    prompt_management_metadata: Optional[StandardLoggingPromptManagementMetadata]
    mcp_tool_call_metadata: Optional[StandardLoggingMCPToolCall]
    applied_guardrails: Optional[List[str]]
+    usage_object: Optional[dict]


 class StandardLoggingAdditionalHeaders(TypedDict, total=False):
@ -1729,6 +1730,7 @@ class StandardLoggingHiddenParams(TypedDict):
    additional_headers: Optional[StandardLoggingAdditionalHeaders]
    batch_models: Optional[List[str]]
    litellm_model_name: Optional[str]  # the model name sent to the provider by litellm
+    usage_object: Optional[dict]


 class StandardLoggingModelInformation(TypedDict):
--- a/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py
+++ b/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py
@ -457,7 +457,7 @@ class TestSpendLogsPayload:
                    "model": "gpt-4o",
                    "user": "",
                    "team_id": "",
-                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
+                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}',
                    "cache_key": "Cache OFF",
                    "spend": 0.00022500000000000002,
                    "total_tokens": 30,
@ -555,7 +555,7 @@ class TestSpendLogsPayload:
                    "model": "claude-3-7-sonnet-20250219",
                    "user": "",
                    "team_id": "",
-                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
+                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
                    "cache_key": "Cache OFF",
                    "spend": 0.01383,
                    "total_tokens": 2598,
@ -651,7 +651,7 @@ class TestSpendLogsPayload:
                    "model": "claude-3-7-sonnet-20250219",
                    "user": "",
                    "team_id": "",
-                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
+                    "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 503, "prompt_tokens": 2095, "total_tokens": 2598, "completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}, "model_map_information": {"model_map_key": "claude-3-7-sonnet-20250219", "model_map_value": {"key": "claude-3-7-sonnet-20250219", "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, "input_cost_per_token": 3e-06, "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": null, "output_cost_per_token_batches": null, "output_cost_per_token": 1.5e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "anthropic", "mode": "chat", "supports_system_messages": null, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": true, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": false, "search_context_cost_per_query": null, "tpm": null, "rpm": null, "supported_openai_params": ["stream", "stop", "temperature", "top_p", "max_tokens", "max_completion_tokens", "tools", "tool_choice", "extra_headers", "parallel_tool_calls", "response_format", "user", "reasoning_effort", "thinking"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": {"audio_tokens": null, "cached_tokens": 0, "text_tokens": null, "image_tokens": null}, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}}',
                    "cache_key": "Cache OFF",
                    "spend": 0.01383,
                    "total_tokens": 2598,
--- a/tests/litellm/test_cost_calculator.py
+++ b/tests/litellm/test_cost_calculator.py
@ -78,6 +78,8 @@ def test_cost_calculator_with_usage():


 def test_handle_realtime_stream_cost_calculation():
+    from litellm.cost_calculator import RealtimeAPITokenUsageProcessor
+
    # Setup test data
    results: OpenAIRealtimeStreamList = [
        {"type": "session.created", "session": {"model": "gpt-3.5-turbo"}},
@ -99,9 +101,14 @@ def test_handle_realtime_stream_cost_calculation():
        },
    ]

+    combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
+        results=results,
+    )
+
    # Test with explicit model name
    cost = handle_realtime_stream_cost_calculation(
        results=results,
+        combined_usage_object=combined_usage_object,
        custom_llm_provider="openai",
        litellm_model_name="gpt-3.5-turbo",
    )
@ -117,8 +124,10 @@ def test_handle_realtime_stream_cost_calculation():

    # Test with different model name in session
    results[0]["session"]["model"] = "gpt-4"
+
    cost = handle_realtime_stream_cost_calculation(
        results=results,
+        combined_usage_object=combined_usage_object,
        custom_llm_provider="openai",
        litellm_model_name="gpt-3.5-turbo",
    )
@ -132,8 +141,12 @@ def test_handle_realtime_stream_cost_calculation():

    # Test with no response.done events
    results = [{"type": "session.created", "session": {"model": "gpt-3.5-turbo"}}]
+    combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results(
+        results=results,
+    )
    cost = handle_realtime_stream_cost_calculation(
        results=results,
+        combined_usage_object=combined_usage_object,
        custom_llm_provider="openai",
        litellm_model_name="gpt-3.5-turbo",
    )
--- a/tests/logging_callback_tests/gcs_pub_sub_body/spend_logs_payload.json
+++ b/tests/logging_callback_tests/gcs_pub_sub_body/spend_logs_payload.json
@ -9,7 +9,7 @@
    "model": "gpt-4o",
    "user": "",
    "team_id": "",
-    "metadata": "{\"applied_guardrails\": [], \"batch_models\": null, \"mcp_tool_call_metadata\": null, \"additional_usage_values\": {\"completion_tokens_details\": null, \"prompt_tokens_details\": null}}",
+    "metadata": "{\"applied_guardrails\": [], \"batch_models\": null, \"mcp_tool_call_metadata\": null, \"usage_object\": {\"completion_tokens\": 20, \"prompt_tokens\": 10, \"total_tokens\": 30, \"completion_tokens_details\": null, \"prompt_tokens_details\": null}, \"model_map_information\": {\"model_map_key\": \"gpt-4o\", \"model_map_value\": {\"key\": \"gpt-4o\", \"max_tokens\": 16384, \"max_input_tokens\": 128000, \"max_output_tokens\": 16384, \"input_cost_per_token\": 2.5e-06, \"cache_creation_input_token_cost\": null, \"cache_read_input_token_cost\": 1.25e-06, \"input_cost_per_character\": null, \"input_cost_per_token_above_128k_tokens\": null, \"input_cost_per_query\": null, \"input_cost_per_second\": null, \"input_cost_per_audio_token\": null, \"input_cost_per_token_batches\": 1.25e-06, \"output_cost_per_token_batches\": 5e-06, \"output_cost_per_token\": 1e-05, \"output_cost_per_audio_token\": null, \"output_cost_per_character\": null, \"output_cost_per_token_above_128k_tokens\": null, \"output_cost_per_character_above_128k_tokens\": null, \"output_cost_per_second\": null, \"output_cost_per_image\": null, \"output_vector_size\": null, \"litellm_provider\": \"openai\", \"mode\": \"chat\", \"supports_system_messages\": true, \"supports_response_schema\": true, \"supports_vision\": true, \"supports_function_calling\": true, \"supports_tool_choice\": true, \"supports_assistant_prefill\": false, \"supports_prompt_caching\": true, \"supports_audio_input\": false, \"supports_audio_output\": false, \"supports_pdf_input\": false, \"supports_embedding_image_input\": false, \"supports_native_streaming\": null, \"supports_web_search\": true, \"search_context_cost_per_query\": {\"search_context_size_low\": 0.03, \"search_context_size_medium\": 0.035, \"search_context_size_high\": 0.05}, \"tpm\": null, \"rpm\": null, \"supported_openai_params\": [\"frequency_penalty\", \"logit_bias\", \"logprobs\", \"top_logprobs\", \"max_tokens\", \"max_completion_tokens\", \"modalities\", \"prediction\", \"n\", \"presence_penalty\", \"seed\", \"stop\", \"stream\", \"stream_options\", \"temperature\", \"top_p\", \"tools\", \"tool_choice\", \"function_call\", \"functions\", \"max_retries\", \"extra_headers\", \"parallel_tool_calls\", \"audio\", \"response_format\", \"user\"]}}, \"additional_usage_values\": {\"completion_tokens_details\": null, \"prompt_tokens_details\": null}}",
    "cache_key": "Cache OFF",
    "spend": 0.00022500000000000002,
    "total_tokens": 30,
--- a/tests/logging_callback_tests/log.txt
+++ b/tests/logging_callback_tests/log.txt
@ -6,10 +6,10 @@ plugins: snapshot-0.9.0, cov-5.0.0, timeout-2.2.0, postgresql-7.0.1, respx-0.21.
 asyncio: mode=Mode.STRICT
 collecting ... collected 4 items

-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config0-search_context_size_low-True] PASSED [ 25%]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config1-search_context_size_low-False] PASSED [ 50%]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config2-search_context_size_medium-True] PASSED [ 75%]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config3-search_context_size_medium-False] PASSED [100%]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False] PASSED [ 25%]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-True] PASSED [ 50%]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-False] PASSED [ 75%]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-True] PASSED [100%]

 =============================== warnings summary ===============================
 ../../../../../../Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pydantic/_internal/_config.py:295
@ -17,10 +17,10 @@ test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_
    warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)

 ../../litellm/litellm_core_utils/get_model_cost_map.py:24
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config0-search_context_size_low-True]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config1-search_context_size_low-False]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config2-search_context_size_medium-True]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config3-search_context_size_medium-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-True]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[True-True]
  /Users/krrishdholakia/Documents/litellm/litellm/litellm_core_utils/get_model_cost_map.py:24: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
    with importlib.resources.open_text(

@ -28,12 +28,85 @@ test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_
  /Users/krrishdholakia/Documents/litellm/litellm/utils.py:183: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
    with resources.open_text(

-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config0-search_context_size_low-True]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config1-search_context_size_low-False]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config2-search_context_size_medium-True]
-test_built_in_tools_cost_tracking.py::test_openai_responses_api_web_search_cost_tracking[tools_config3-search_context_size_medium-False]
-  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/httpx/_content.py:204: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
-    warnings.warn(message, DeprecationWarning)
+test_otel_logging.py:145
+  /Users/krrishdholakia/Documents/litellm/tests/logging_callback_tests/test_otel_logging.py:145: PytestUnknownMarkWarning: Unknown pytest.mark.flaky - is this a typo?  You can register custom marks to avoid this warning - for details, see https://docs.pytest.org/en/stable/how-to/mark.html
+    @pytest.mark.flaky(retries=6, delay=2)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/google/rpc/__init__.py:18: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html
+    import pkg_resources
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.cloud')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2348: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(parent)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.logging')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.iam')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('mpl_toolkits')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('sphinxcontrib')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('testing')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2868: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('testing.common')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(pkg)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pkg_resources/__init__.py:2348: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('testing')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    declare_namespace(parent)
+
+test_otel_logging.py::test_awesome_otel_with_message_logging_off[False-False]
+  /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/google/rpc/__init__.py:20: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.rpc')`.
+  Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
+    pkg_resources.declare_namespace(__name__)

 -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
-======================= 4 passed, 11 warnings in 18.95s ========================
+======================== 4 passed, 37 warnings in 5.63s ========================
--- a/tests/logging_callback_tests/test_gcs_pub_sub.py
+++ b/tests/logging_callback_tests/test_gcs_pub_sub.py
@ -62,6 +62,7 @@ def assert_gcs_pubsub_request_matches_expected(
            actual_request_body[field] = expected_request_body[field]

    # Assert the entire request body matches
+    print("actual_request_body", actual_request_body)
    assert (
        actual_request_body == expected_request_body
    ), f"Difference in request bodies: {json.dumps(actual_request_body, indent=2)} != {json.dumps(expected_request_body, indent=2)}"
--- a/tests/logging_callback_tests/test_otel_logging.py
+++ b/tests/logging_callback_tests/test_otel_logging.py
@ -268,6 +268,7 @@ def validate_redacted_message_span_attributes(span):
        "metadata.requester_metadata",
        "metadata.user_api_key_team_id",
        "metadata.spend_logs_metadata",
+        "metadata.usage_object",
        "metadata.user_api_key_alias",
        "metadata.user_api_key_user_id",
        "metadata.user_api_key_org_id",
--- a/tests/logging_callback_tests/test_spend_logs.py
+++ b/tests/logging_callback_tests/test_spend_logs.py
@ -178,6 +178,10 @@ def test_spend_logs_payload(model_id: Optional[str]):
                "metadata": {
                    "user_api_key_end_user_id": "test-user",
                },
+                "model_map_information": {
+                    "tpm": 1000,
+                    "rpm": 1000,
+                },
            },
        },
        "response_obj": litellm.ModelResponse(
@ -357,6 +361,10 @@ def test_spend_logs_payload_with_prompts_enabled(monkeypatch):
            "user_api_key_end_user_id": "test-user",
        },
        "request_tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"],
+        "model_map_information": {
+            "tpm": 1000,
+            "rpm": 1000,
+        },
    }
    litellm_params = {
        "proxy_server_request": {