diff --git a/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py b/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py index 080aa3bd16..b2e597fb7c 100644 --- a/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py +++ b/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py @@ -20,6 +20,16 @@ from litellm.proxy.hooks.proxy_track_cost_callback import _ProxyDBLogger from litellm.proxy.proxy_server import app, prisma_client from litellm.router import Router +ignored_keys = [ + "request_id", + "startTime", + "endTime", + "completionStartTime", + "endTime", + "metadata.model_map_information", + "metadata.usage_object", +] + @pytest.fixture def client(): @@ -457,7 +467,7 @@ class TestSpendLogsPayload: "model": "gpt-4o", "user": "", "team_id": "", - "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "supports_reasoning": false, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}', + "metadata": '{"applied_guardrails": [], "batch_models": null, "mcp_tool_call_metadata": null, "usage_object": {"completion_tokens": 20, "prompt_tokens": 10, "total_tokens": 30, "completion_tokens_details": null, "prompt_tokens_details": null}, "model_map_information": {"model_map_key": "gpt-4o", "model_map_value": {"key": "gpt-4o", "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 2.5e-06, "cache_creation_input_token_cost": null, "cache_read_input_token_cost": 1.25e-06, "input_cost_per_character": null, "input_cost_per_token_above_128k_tokens": null, "input_cost_per_token_above_200k_tokens": null, "input_cost_per_query": null, "input_cost_per_second": null, "input_cost_per_audio_token": null, "input_cost_per_token_batches": 1.25e-06, "output_cost_per_token_batches": 5e-06, "output_cost_per_token": 1e-05, "output_cost_per_audio_token": null, "output_cost_per_character": null, "output_cost_per_token_above_128k_tokens": null, "output_cost_per_character_above_128k_tokens": null, "output_cost_per_token_above_200k_tokens": null, "output_cost_per_second": null, "output_cost_per_reasoning_token": null, "output_cost_per_image": null, "output_vector_size": null, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, "supports_response_schema": true, "supports_vision": true, "supports_function_calling": true, "supports_tool_choice": true, "supports_assistant_prefill": false, "supports_prompt_caching": true, "supports_audio_input": false, "supports_audio_output": false, "supports_pdf_input": false, "supports_embedding_image_input": false, "supports_native_streaming": null, "supports_web_search": true, "supports_reasoning": false, "search_context_cost_per_query": {"search_context_size_low": 0.03, "search_context_size_medium": 0.035, "search_context_size_high": 0.05}, "tpm": null, "rpm": null, "supported_openai_params": ["frequency_penalty", "logit_bias", "logprobs", "top_logprobs", "max_tokens", "max_completion_tokens", "modalities", "prediction", "n", "presence_penalty", "seed", "stop", "stream", "stream_options", "temperature", "top_p", "tools", "tool_choice", "function_call", "functions", "max_retries", "extra_headers", "parallel_tool_calls", "audio", "response_format", "user"]}}, "additional_usage_values": {"completion_tokens_details": null, "prompt_tokens_details": null}}', "cache_key": "Cache OFF", "spend": 0.00022500000000000002, "total_tokens": 30, @@ -475,19 +485,11 @@ class TestSpendLogsPayload: } ) - for key, value in expected_payload.items(): - if key in [ - "request_id", - "startTime", - "endTime", - "completionStartTime", - "endTime", - ]: - assert payload[key] is not None - else: - assert ( - payload[key] == value - ), f"Expected {key} to be {value}, but got {payload[key]}" + differences = _compare_nested_dicts( + payload, expected_payload, ignore_keys=ignored_keys + ) + if differences: + assert False, f"Dictionary mismatch: {differences}" def mock_anthropic_response(*args, **kwargs): mock_response = MagicMock() @@ -573,19 +575,11 @@ class TestSpendLogsPayload: } ) - for key, value in expected_payload.items(): - if key in [ - "request_id", - "startTime", - "endTime", - "completionStartTime", - "endTime", - ]: - assert payload[key] is not None - else: - assert ( - payload[key] == value - ), f"Expected {key} to be {value}, but got {payload[key]}" + differences = _compare_nested_dicts( + payload, expected_payload, ignore_keys=ignored_keys + ) + if differences: + assert False, f"Dictionary mismatch: {differences}" @pytest.mark.asyncio async def test_spend_logs_payload_success_log_with_router(self): @@ -669,16 +663,71 @@ class TestSpendLogsPayload: } ) - for key, value in expected_payload.items(): - if key in [ - "request_id", - "startTime", - "endTime", - "completionStartTime", - "endTime", - ]: - assert payload[key] is not None - else: - assert ( - payload[key] == value - ), f"Expected {key} to be {value}, but got {payload[key]}" + differences = _compare_nested_dicts( + payload, expected_payload, ignore_keys=ignored_keys + ) + if differences: + assert False, f"Dictionary mismatch: {differences}" + + +def _compare_nested_dicts( + actual: dict, expected: dict, path: str = "", ignore_keys: list[str] = [] +) -> list[str]: + """Compare nested dictionaries and return a list of differences in a human-friendly format.""" + differences = [] + + # Check if current path should be ignored + if path in ignore_keys: + return differences + + # Check for keys in actual but not in expected + for key in actual.keys(): + current_path = f"{path}.{key}" if path else key + if current_path not in ignore_keys and key not in expected: + differences.append(f"Extra key in actual: {current_path}") + + for key, expected_value in expected.items(): + current_path = f"{path}.{key}" if path else key + if current_path in ignore_keys: + continue + if key not in actual: + differences.append(f"Missing key: {current_path}") + continue + + actual_value = actual[key] + + # Try to parse JSON strings + if isinstance(expected_value, str): + try: + expected_value = json.loads(expected_value) + except json.JSONDecodeError: + pass + if isinstance(actual_value, str): + try: + actual_value = json.loads(actual_value) + except json.JSONDecodeError: + pass + + if isinstance(expected_value, dict) and isinstance(actual_value, dict): + differences.extend( + _compare_nested_dicts( + actual_value, expected_value, current_path, ignore_keys + ) + ) + elif isinstance(expected_value, dict) or isinstance(actual_value, dict): + differences.append( + f"Type mismatch at {current_path}: expected dict, got {type(actual_value).__name__}" + ) + else: + # For non-dict values, only report if they're different + if actual_value != expected_value: + # Format the values to be more readable + actual_str = str(actual_value) + expected_str = str(expected_value) + if len(actual_str) > 50 or len(expected_str) > 50: + actual_str = f"{actual_str[:50]}..." + expected_str = f"{expected_str[:50]}..." + differences.append( + f"Value mismatch at {current_path}:\n expected: {expected_str}\n got: {actual_str}" + ) + return differences