import json import os import sys import pytest sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path from unittest.mock import MagicMock, patch from pydantic import BaseModel import litellm from litellm.cost_calculator import ( handle_realtime_stream_cost_calculation, response_cost_calculator, ) from litellm.types.llms.openai import OpenAIRealtimeStreamList from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage def test_cost_calculator_with_response_cost_in_additional_headers(): class MockResponse(BaseModel): _hidden_params = { "additional_headers": {"llm_provider-x-litellm-response-cost": 1000} } result = response_cost_calculator( response_object=MockResponse(), model="", custom_llm_provider=None, call_type="", optional_params={}, cache_hit=None, base_model=None, ) assert result == 1000 def test_cost_calculator_with_usage(): from litellm import get_model_info os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" litellm.model_cost = litellm.get_model_cost_map(url="") usage = Usage( prompt_tokens=100, completion_tokens=100, prompt_tokens_details=PromptTokensDetailsWrapper( text_tokens=10, audio_tokens=90 ), ) mr = ModelResponse(usage=usage, model="gemini-2.0-flash-001") result = response_cost_calculator( response_object=mr, model="", custom_llm_provider="vertex_ai", call_type="acompletion", optional_params={}, cache_hit=None, base_model=None, ) model_info = litellm.model_cost["gemini-2.0-flash-001"] expected_cost = ( usage.prompt_tokens_details.audio_tokens * model_info["input_cost_per_audio_token"] + usage.prompt_tokens_details.text_tokens * model_info["input_cost_per_token"] + usage.completion_tokens * model_info["output_cost_per_token"] ) assert result == expected_cost, f"Got {result}, Expected {expected_cost}" def test_handle_realtime_stream_cost_calculation(): from litellm.cost_calculator import RealtimeAPITokenUsageProcessor # Setup test data results: OpenAIRealtimeStreamList = [ {"type": "session.created", "session": {"model": "gpt-3.5-turbo"}}, { "type": "response.done", "response": { "usage": {"input_tokens": 100, "output_tokens": 50, "total_tokens": 150} }, }, { "type": "response.done", "response": { "usage": { "input_tokens": 200, "output_tokens": 100, "total_tokens": 300, } }, }, ] combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results( results=results, ) # Test with explicit model name cost = handle_realtime_stream_cost_calculation( results=results, combined_usage_object=combined_usage_object, custom_llm_provider="openai", litellm_model_name="gpt-3.5-turbo", ) # Calculate expected cost # gpt-3.5-turbo costs: $0.0015/1K tokens input, $0.002/1K tokens output expected_cost = (300 * 0.0015 / 1000) + ( # input tokens (100 + 200) 150 * 0.002 / 1000 ) # output tokens (50 + 100) assert ( abs(cost - expected_cost) <= 0.00075 ) # Allow small floating point differences # Test with different model name in session results[0]["session"]["model"] = "gpt-4" cost = handle_realtime_stream_cost_calculation( results=results, combined_usage_object=combined_usage_object, custom_llm_provider="openai", litellm_model_name="gpt-3.5-turbo", ) # Calculate expected cost using gpt-4 rates # gpt-4 costs: $0.03/1K tokens input, $0.06/1K tokens output expected_cost = (300 * 0.03 / 1000) + ( # input tokens 150 * 0.06 / 1000 ) # output tokens assert abs(cost - expected_cost) < 0.00076 # Test with no response.done events results = [{"type": "session.created", "session": {"model": "gpt-3.5-turbo"}}] combined_usage_object = RealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_results( results=results, ) cost = handle_realtime_stream_cost_calculation( results=results, combined_usage_object=combined_usage_object, custom_llm_provider="openai", litellm_model_name="gpt-3.5-turbo", ) assert cost == 0.0 # No usage, no cost def test_custom_pricing_with_router_model_id(): from litellm import Router router = Router( model_list=[ { "model_name": "prod/claude-3-5-sonnet-20240620", "litellm_params": { "model": "anthropic/claude-3-5-sonnet-20240620", "api_key": "test_api_key", }, "model_info": { "id": "my-unique-model-id", "input_cost_per_token": 0.000006, "output_cost_per_token": 0.00003, "cache_creation_input_token_cost": 0.0000075, "cache_read_input_token_cost": 0.0000006, }, }, { "model_name": "claude-3-5-sonnet-20240620", "litellm_params": { "model": "anthropic/claude-3-5-sonnet-20240620", "api_key": "test_api_key", }, "model_info": { "input_cost_per_token": 100, "output_cost_per_token": 200, }, }, ] ) result = router.completion( model="claude-3-5-sonnet-20240620", messages=[{"role": "user", "content": "Hello, world!"}], mock_response=True, ) result_2 = router.completion( model="prod/claude-3-5-sonnet-20240620", messages=[{"role": "user", "content": "Hello, world!"}], mock_response=True, ) assert ( result._hidden_params["response_cost"] > result_2._hidden_params["response_cost"] ) model_info = router.get_deployment_model_info( model_id="my-unique-model-id", model_name="anthropic/claude-3-5-sonnet-20240620" ) assert model_info is not None assert model_info["input_cost_per_token"] == 0.000006 assert model_info["output_cost_per_token"] == 0.00003 assert model_info["cache_creation_input_token_cost"] == 0.0000075 assert model_info["cache_read_input_token_cost"] == 0.0000006 def test_azure_realtime_cost_calculator(): from litellm import get_model_info os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" litellm.model_cost = litellm.get_model_cost_map(url="") cost = handle_realtime_stream_cost_calculation( results=[ { "type": "session.created", "session": {"model": "gpt-4o-realtime-preview-2024-12-17"}, }, ], combined_usage_object=Usage( prompt_tokens=100, completion_tokens=100, prompt_tokens_details=PromptTokensDetailsWrapper( text_tokens=10, audio_tokens=90 ), ), custom_llm_provider="azure", litellm_model_name="my-custom-azure-deployment", ) assert cost > 0