diff --git a/litellm/__init__.py b/litellm/__init__.py index 60b8cf81a0..a3756251d1 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -308,7 +308,7 @@ _key_management_settings: KeyManagementSettings = KeyManagementSettings() #### PII MASKING #### output_parse_pii: bool = False ############################################# -from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map +from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map, get_locally_cached_model_cost_map model_cost = get_model_cost_map(url=model_cost_map_url) custom_prompt_dict: Dict[str, dict] = {} diff --git a/litellm/litellm_core_utils/get_model_cost_map.py b/litellm/litellm_core_utils/get_model_cost_map.py index b8bdaee19c..0e14457b2a 100644 --- a/litellm/litellm_core_utils/get_model_cost_map.py +++ b/litellm/litellm_core_utils/get_model_cost_map.py @@ -8,24 +8,29 @@ export LITELLM_LOCAL_MODEL_COST_MAP=True ``` """ +from functools import cache import os import httpx +@cache +def get_locally_cached_model_cost_map(): + import importlib.resources + import json + + with importlib.resources.open_text( + "litellm", "model_prices_and_context_window_backup.json" + ) as f: + content = json.load(f) + return content + def get_model_cost_map(url: str): if ( os.getenv("LITELLM_LOCAL_MODEL_COST_MAP", False) or os.getenv("LITELLM_LOCAL_MODEL_COST_MAP", False) == "True" ): - import importlib.resources - import json - - with importlib.resources.open_text( - "litellm", "model_prices_and_context_window_backup.json" - ) as f: - content = json.load(f) - return content + return get_locally_cached_model_cost_map() try: response = httpx.get( diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 96076fa3b8..961b55f49b 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -6,7 +6,7 @@ "input_cost_per_token": 0.0000, "output_cost_per_token": 0.000, "litellm_provider": "one of https://docs.litellm.ai/docs/providers", - "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech", + "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, moderations, rerank", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, diff --git a/tests/code_coverage_tests/bedrock_pricing.py b/tests/code_coverage_tests/bedrock_pricing.py index b2c9e78b06..9984cb8b0e 100644 --- a/tests/code_coverage_tests/bedrock_pricing.py +++ b/tests/code_coverage_tests/bedrock_pricing.py @@ -191,8 +191,7 @@ def _check_if_model_name_in_pricing( input_cost_per_1k_tokens: str, output_cost_per_1k_tokens: str, ): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() for model, value in litellm.model_cost.items(): if model.startswith(bedrock_model_name): diff --git a/tests/litellm_utils_tests/test_utils.py b/tests/litellm_utils_tests/test_utils.py index 2b1e78a681..fd8ad01c8b 100644 --- a/tests/litellm_utils_tests/test_utils.py +++ b/tests/litellm_utils_tests/test_utils.py @@ -907,8 +907,7 @@ def test_supports_response_schema(model, expected_bool): Should be true for gemini-1.5-pro on google ai studio / vertex ai AND predibase models Should be false otherwise """ - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() from litellm.utils import supports_response_schema @@ -1066,8 +1065,7 @@ def test_async_http_handler_force_ipv4(mock_async_client): "model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)] ) def test_supports_audio_input(model, expected_bool): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() from litellm.utils import supports_audio_input, supports_audio_output @@ -1165,8 +1163,7 @@ def test_models_by_provider(): """ Make sure all providers from model map are in the valid providers list """ - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() from litellm import models_by_provider @@ -1484,8 +1481,7 @@ def test_get_valid_models_default(monkeypatch): def test_supports_vision_gemini(): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() from litellm.utils import supports_vision assert supports_vision("gemini-1.5-pro") is True diff --git a/tests/llm_translation/base_embedding_unit_tests.py b/tests/llm_translation/base_embedding_unit_tests.py index 30a9dcc0da..1fcc825481 100644 --- a/tests/llm_translation/base_embedding_unit_tests.py +++ b/tests/llm_translation/base_embedding_unit_tests.py @@ -84,8 +84,7 @@ class BaseLLMEmbeddingTest(ABC): litellm.set_verbose = True from litellm.utils import supports_embedding_image_input - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() base_embedding_call_args = self.get_base_embedding_call_args() if not supports_embedding_image_input(base_embedding_call_args["model"], None): diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py index f91ef0eae9..eb18cbce90 100644 --- a/tests/llm_translation/base_llm_unit_tests.py +++ b/tests/llm_translation/base_llm_unit_tests.py @@ -342,8 +342,7 @@ class BaseLLMChatTest(ABC): from pydantic import BaseModel from litellm.utils import supports_response_schema - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() class TestModel(BaseModel): first_response: str @@ -382,16 +381,14 @@ class BaseLLMChatTest(ABC): from pydantic import BaseModel from litellm.utils import supports_response_schema - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() @pytest.mark.flaky(retries=6, delay=1) def test_json_response_nested_pydantic_obj(self): from pydantic import BaseModel from litellm.utils import supports_response_schema - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() class CalendarEvent(BaseModel): name: str @@ -438,8 +435,7 @@ class BaseLLMChatTest(ABC): from litellm.utils import supports_response_schema from litellm.llms.base_llm.base_utils import type_to_response_format_param - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() class CalendarEvent(BaseModel): name: str @@ -560,8 +556,7 @@ class BaseLLMChatTest(ABC): litellm.set_verbose = True from litellm.utils import supports_vision - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() base_completion_call_args = self.get_base_completion_call_args() if not supports_vision(base_completion_call_args["model"], None): @@ -615,8 +610,7 @@ class BaseLLMChatTest(ABC): litellm.set_verbose = True from litellm.utils import supports_vision - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" @@ -656,8 +650,7 @@ class BaseLLMChatTest(ABC): litellm.set_verbose = True from litellm.utils import supports_prompt_caching - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() base_completion_call_args = self.get_base_completion_call_args() if not supports_prompt_caching(base_completion_call_args["model"], None): @@ -773,8 +766,7 @@ class BaseLLMChatTest(ABC): litellm._turn_on_debug() from litellm.utils import supports_function_calling - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() base_completion_call_args = self.get_base_completion_call_args() if not supports_function_calling(base_completion_call_args["model"], None): @@ -872,8 +864,7 @@ class BaseLLMChatTest(ABC): async def test_completion_cost(self): from litellm import completion_cost - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.set_verbose = True response = await self.async_completion_function( diff --git a/tests/llm_translation/base_rerank_unit_tests.py b/tests/llm_translation/base_rerank_unit_tests.py index cff4a02753..b3f56f7c64 100644 --- a/tests/llm_translation/base_rerank_unit_tests.py +++ b/tests/llm_translation/base_rerank_unit_tests.py @@ -87,8 +87,7 @@ class BaseLLMRerankTest(ABC): @pytest.mark.parametrize("sync_mode", [True, False]) async def test_basic_rerank(self, sync_mode): litellm._turn_on_debug() - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() rerank_call_args = self.get_base_rerank_call_args() custom_llm_provider = self.get_custom_llm_provider() if sync_mode is True: diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py index 37253a37e6..04158b4ab4 100644 --- a/tests/llm_translation/test_anthropic_completion.py +++ b/tests/llm_translation/test_anthropic_completion.py @@ -693,8 +693,7 @@ class TestAnthropicCompletion(BaseLLMChatTest): from pydantic import BaseModel from litellm.utils import supports_response_schema - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() class RFormat(BaseModel): question: str diff --git a/tests/llm_translation/test_bedrock_completion.py b/tests/llm_translation/test_bedrock_completion.py index 2fb0ffb9e5..99e4e7ed1a 100644 --- a/tests/llm_translation/test_bedrock_completion.py +++ b/tests/llm_translation/test_bedrock_completion.py @@ -1975,8 +1975,7 @@ def test_bedrock_converse_route(): def test_bedrock_mapped_converse_models(): litellm.set_verbose = True - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.add_known_models() litellm.completion( model="bedrock/us.amazon.nova-pro-v1:0", @@ -2108,8 +2107,7 @@ def test_bedrock_supports_tool_call(model, expected_supports_tool_call): class TestBedrockConverseChatCrossRegion(BaseLLMChatTest): def get_base_completion_call_args(self) -> dict: - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.add_known_models() return { "model": "bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0", @@ -2137,8 +2135,7 @@ class TestBedrockConverseChatCrossRegion(BaseLLMChatTest): """ Test if region models info is correctly used for cost calculation. Using the base model info for cost calculation. """ - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() bedrock_model = "us.anthropic.claude-3-5-sonnet-20241022-v2:0" litellm.model_cost.pop(bedrock_model, None) model = f"bedrock/{bedrock_model}" @@ -2155,8 +2152,7 @@ class TestBedrockConverseChatCrossRegion(BaseLLMChatTest): class TestBedrockConverseChatNormal(BaseLLMChatTest): def get_base_completion_call_args(self) -> dict: - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.add_known_models() return { "model": "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", @@ -2325,8 +2321,7 @@ def test_bedrock_nova_topk(top_k_param): def test_bedrock_cross_region_inference(monkeypatch): from litellm.llms.custom_httpx.http_handler import HTTPHandler - monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True") - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.add_known_models() litellm.set_verbose = True diff --git a/tests/llm_translation/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py index 4208f1ae38..bcd7648f2a 100644 --- a/tests/llm_translation/test_openai_o1.py +++ b/tests/llm_translation/test_openai_o1.py @@ -29,8 +29,7 @@ async def test_o1_handle_system_role(model): from openai import AsyncOpenAI from litellm.utils import supports_system_messages - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.set_verbose = True @@ -83,8 +82,7 @@ async def test_o1_handle_tool_calling_optional_params( from litellm.utils import ProviderConfigManager from litellm.types.utils import LlmProviders - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() config = ProviderConfigManager.get_provider_chat_config( model=model, provider=LlmProviders.OPENAI @@ -190,8 +188,7 @@ class TestOpenAIO3(BaseOSeriesModelsTest, BaseLLMChatTest): def test_o1_supports_vision(): """Test that o1 supports vision""" - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() for k, v in litellm.model_cost.items(): if k.startswith("o1") and v.get("litellm_provider") == "openai": assert v.get("supports_vision") is True, f"{k} does not support vision" diff --git a/tests/llm_translation/test_rerank.py b/tests/llm_translation/test_rerank.py index d2cb2b6fea..ef5df795ab 100644 --- a/tests/llm_translation/test_rerank.py +++ b/tests/llm_translation/test_rerank.py @@ -274,8 +274,7 @@ class TestLogger(CustomLogger): @pytest.mark.asyncio() async def test_rerank_custom_callbacks(): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() custom_logger = TestLogger() litellm.callbacks = [custom_logger] diff --git a/tests/llm_translation/test_together_ai.py b/tests/llm_translation/test_together_ai.py index b83a700002..f275500817 100644 --- a/tests/llm_translation/test_together_ai.py +++ b/tests/llm_translation/test_together_ai.py @@ -42,8 +42,7 @@ class TestTogetherAI(BaseLLMChatTest): def test_get_supported_response_format_together_ai( self, model: str, expected_bool: bool ) -> None: - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() optional_params = litellm.get_supported_openai_params( model, custom_llm_provider="together_ai" ) diff --git a/tests/local_testing/test_amazing_vertex_completion.py b/tests/local_testing/test_amazing_vertex_completion.py index 02e0c9b2f1..d59df956be 100644 --- a/tests/local_testing/test_amazing_vertex_completion.py +++ b/tests/local_testing/test_amazing_vertex_completion.py @@ -1433,8 +1433,7 @@ async def test_gemini_pro_json_schema_args_sent_httpx( enforce_validation, ): load_vertex_ai_credentials() - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.set_verbose = True messages = [{"role": "user", "content": "List 5 cookie recipes"}] @@ -1554,8 +1553,7 @@ async def test_gemini_pro_json_schema_args_sent_httpx_openai_schema( from pydantic import BaseModel load_vertex_ai_credentials() - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.set_verbose = True diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index 200f2c012e..77d49961bd 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -634,8 +634,7 @@ def test_gemini_completion_cost(above_128k, provider): """ Check if cost correctly calculated for gemini models based on context window """ - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() if provider == "gemini": model_name = "gemini-1.5-flash-latest" else: @@ -690,8 +689,7 @@ def _count_characters(text): def test_vertex_ai_completion_cost(): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() text = "The quick brown fox jumps over the lazy dog." characters = _count_characters(text=text) @@ -726,8 +724,7 @@ def test_vertex_ai_medlm_completion_cost(): model=model, messages=messages, custom_llm_provider="vertex_ai" ) - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() model = "vertex_ai/medlm-medium" messages = [{"role": "user", "content": "Test MedLM completion cost."}] @@ -746,8 +743,7 @@ def test_vertex_ai_claude_completion_cost(): from litellm import Choices, Message, ModelResponse from litellm.utils import Usage - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.set_verbose = True input_tokens = litellm.token_counter( @@ -796,8 +792,7 @@ def test_vertex_ai_embedding_completion_cost(caplog): """ Relevant issue - https://github.com/BerriAI/litellm/issues/4630 """ - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() text = "The quick brown fox jumps over the lazy dog." input_tokens = litellm.token_counter( @@ -839,8 +834,7 @@ def test_vertex_ai_embedding_completion_cost(caplog): # from test_amazing_vertex_completion import load_vertex_ai_credentials # load_vertex_ai_credentials() -# os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" -# litellm.model_cost = litellm.get_model_cost_map(url="") +# litellm.model_cost = litellm.get_locally_cached_model_cost_map() # text = "The quick brown fox jumps over the lazy dog." # input_tokens = litellm.token_counter( @@ -867,8 +861,7 @@ def test_vertex_ai_embedding_completion_cost(caplog): def test_completion_azure_ai(): try: - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.set_verbose = True response = litellm.completion( @@ -974,8 +967,7 @@ def test_vertex_ai_mistral_predict_cost(usage): @pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"]) def test_completion_cost_tts(model): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() cost = completion_cost( model=model, @@ -1171,8 +1163,7 @@ def test_completion_cost_azure_common_deployment_name(): ], ) def test_completion_cost_prompt_caching(model, custom_llm_provider): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() from litellm.utils import Choices, Message, ModelResponse, Usage @@ -1273,8 +1264,7 @@ def test_completion_cost_prompt_caching(model, custom_llm_provider): ], ) def test_completion_cost_databricks(model): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() model, messages = model, [{"role": "user", "content": "What is 2+2?"}] resp = litellm.completion(model=model, messages=messages) # works fine @@ -1291,8 +1281,7 @@ def test_completion_cost_databricks(model): ], ) def test_completion_cost_databricks_embedding(model): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() resp = litellm.embedding(model=model, input=["hey, how's it going?"]) # works fine print(resp) @@ -1319,8 +1308,7 @@ def test_get_model_params_fireworks_ai(model, base_model): ["fireworks_ai/llama-v3p1-405b-instruct", "fireworks_ai/mixtral-8x7b-instruct"], ) def test_completion_cost_fireworks_ai(model): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() messages = [{"role": "user", "content": "Hey, how's it going?"}] resp = litellm.completion(model=model, messages=messages) # works fine @@ -1337,8 +1325,7 @@ def test_cost_azure_openai_prompt_caching(): ) from litellm import get_model_info - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() model = "azure/o1-mini" @@ -1427,8 +1414,7 @@ def test_cost_azure_openai_prompt_caching(): def test_completion_cost_vertex_llama3(): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() from litellm.utils import Choices, Message, ModelResponse, Usage @@ -1468,8 +1454,7 @@ def test_cost_openai_prompt_caching(): from litellm.utils import Choices, Message, ModelResponse, Usage from litellm import get_model_info - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() model = "gpt-4o-mini-2024-07-18" @@ -1559,8 +1544,7 @@ def test_cost_openai_prompt_caching(): def test_completion_cost_azure_ai_rerank(model): from litellm import RerankResponse, rerank - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() response = RerankResponse( id="b01dbf2e-63c8-4981-9e69-32241da559ed", @@ -1591,8 +1575,7 @@ def test_completion_cost_azure_ai_rerank(model): def test_together_ai_embedding_completion_cost(): from litellm.utils import Choices, EmbeddingResponse, Message, ModelResponse, Usage - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() response = EmbeddingResponse( model="togethercomputer/m2-bert-80M-8k-retrieval", data=[ @@ -2449,8 +2432,7 @@ def test_completion_cost_params_gemini_3(): from litellm.llms.vertex_ai.cost_calculator import cost_per_character - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() response = ModelResponse( id="chatcmpl-61043504-4439-48be-9996-e29bdee24dc3", @@ -2519,8 +2501,7 @@ def test_completion_cost_params_gemini_3(): # @pytest.mark.flaky(retries=3, delay=1) @pytest.mark.parametrize("stream", [False]) # True, async def test_test_completion_cost_gpt4o_audio_output_from_model(stream): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() from litellm.types.utils import ( Choices, Message, @@ -2617,8 +2598,7 @@ def test_completion_cost_model_response_cost(response_model, custom_llm_provider """ from litellm import ModelResponse - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.set_verbose = True response = { @@ -2718,8 +2698,7 @@ def test_select_model_name_for_cost_calc(): def test_moderations(): from litellm import moderation - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.add_known_models() assert "omni-moderation-latest" in litellm.model_cost @@ -2772,8 +2751,7 @@ def test_bedrock_cost_calc_with_region(): from litellm import ModelResponse - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() litellm.add_known_models() @@ -2972,9 +2950,7 @@ async def test_cost_calculator_with_custom_pricing_router(model_item, custom_pri def test_json_valid_model_cost_map(): import json - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - - model_cost = litellm.get_model_cost_map(url="") + model_cost = litellm.get_locally_cached_model_cost_map() try: # Attempt to serialize and deserialize the JSON diff --git a/tests/local_testing/test_embedding.py b/tests/local_testing/test_embedding.py index c85a830e5f..c369dd73eb 100644 --- a/tests/local_testing/test_embedding.py +++ b/tests/local_testing/test_embedding.py @@ -115,8 +115,7 @@ def test_openai_embedding_3(): @pytest.mark.asyncio async def test_openai_azure_embedding_simple(model, api_base, api_key, sync_mode): try: - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() # litellm.set_verbose = True if sync_mode: response = embedding( @@ -198,8 +197,7 @@ def _azure_ai_image_mock_response(*args, **kwargs): @pytest.mark.asyncio async def test_azure_ai_embedding_image(model, api_base, api_key, sync_mode): try: - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() input = base64_image if sync_mode: client = HTTPHandler() diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py index c879332c7b..c40ac41be2 100644 --- a/tests/local_testing/test_get_model_info.py +++ b/tests/local_testing/test_get_model_info.py @@ -58,16 +58,14 @@ def test_get_model_info_shows_correct_supports_vision(): def test_get_model_info_shows_assistant_prefill(): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() info = litellm.get_model_info("deepseek/deepseek-chat") print("info", info) assert info.get("supports_assistant_prefill") is True def test_get_model_info_shows_supports_prompt_caching(): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() info = litellm.get_model_info("deepseek/deepseek-chat") print("info", info) assert info.get("supports_prompt_caching") is True @@ -116,8 +114,7 @@ def test_get_model_info_gemini(): """ Tests if ALL gemini models have 'tpm' and 'rpm' in the model info """ - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() model_map = litellm.model_cost for model, info in model_map.items(): @@ -127,8 +124,7 @@ def test_get_model_info_gemini(): def test_get_model_info_bedrock_region(): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() args = { "model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "custom_llm_provider": "bedrock", @@ -212,8 +208,7 @@ def test_model_info_bedrock_converse(monkeypatch): This ensures they are automatically routed to the converse endpoint. """ - monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True") - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() try: # Load whitelist models from file with open("whitelisted_bedrock_models.txt", "r") as file: @@ -231,8 +226,7 @@ def test_model_info_bedrock_converse_enforcement(monkeypatch): """ Test the enforcement of the whitelist by adding a fake model and ensuring the test fails. """ - monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True") - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() # Add a fake unwhitelisted model litellm.model_cost["fake.bedrock-chat-model"] = { @@ -323,8 +317,7 @@ def test_get_model_info_bedrock_models(): """ from litellm.llms.bedrock.common_utils import BedrockModelInfo - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() for k, v in litellm.model_cost.items(): if v["litellm_provider"] == "bedrock": diff --git a/tests/local_testing/test_router_utils.py b/tests/local_testing/test_router_utils.py index 7de9707579..d0afc440d9 100644 --- a/tests/local_testing/test_router_utils.py +++ b/tests/local_testing/test_router_utils.py @@ -178,8 +178,7 @@ async def test_update_kwargs_before_fallbacks(call_type): def test_router_get_model_info_wildcard_routes(): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() router = Router( model_list=[ { @@ -200,8 +199,7 @@ def test_router_get_model_info_wildcard_routes(): @pytest.mark.asyncio async def test_router_get_model_group_usage_wildcard_routes(): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() router = Router( model_list=[ { @@ -297,8 +295,7 @@ async def test_call_router_callbacks_on_failure(): @pytest.mark.asyncio async def test_router_model_group_headers(): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() from litellm.types.utils import OPENAI_RESPONSE_HEADERS router = Router( @@ -330,8 +327,7 @@ async def test_router_model_group_headers(): @pytest.mark.asyncio async def test_get_remaining_model_group_usage(): - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.model_cost = litellm.get_locally_cached_model_cost_map() from litellm.types.utils import OPENAI_RESPONSE_HEADERS router = Router( diff --git a/tests/test_model_prices_and_context_window_schema.py b/tests/test_model_prices_and_context_window_schema.py new file mode 100644 index 0000000000..80d35f84b4 --- /dev/null +++ b/tests/test_model_prices_and_context_window_schema.py @@ -0,0 +1,111 @@ +import litellm +from jsonschema import validate + +def test_model_prices_and_context_window_json_is_valid(): + ''' + Validates the `model_prices_and_context_window.json` file. + + If this test fails after you update the json, you need to update the schema or correct the change you made. + ''' + + INTENDED_SCHEMA = { + "type": "object", + "additionalProperties": { + "type": "object", + "properties": { + "cache_creation_input_audio_token_cost": {"type": "number"}, + "cache_creation_input_token_cost": {"type": "number"}, + "cache_read_input_token_cost": {"type": "number"}, + "deprecation_date": {"type": "string"}, + "input_cost_per_audio_per_second": {"type": "number"}, + "input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"}, + "input_cost_per_audio_token": {"type": "number"}, + "input_cost_per_character": {"type": "number"}, + "input_cost_per_character_above_128k_tokens": {"type": "number"}, + "input_cost_per_image": {"type": "number"}, + "input_cost_per_image_above_128k_tokens": {"type": "number"}, + "input_cost_per_pixel": {"type": "number"}, + "input_cost_per_query": {"type": "number"}, + "input_cost_per_request": {"type": "number"}, + "input_cost_per_second": {"type": "number"}, + "input_cost_per_token": {"type": "number"}, + "input_cost_per_token_above_128k_tokens": {"type": "number"}, + "input_cost_per_token_batch_requests": {"type": "number"}, + "input_cost_per_token_batches": {"type": "number"}, + "input_cost_per_token_cache_hit": {"type": "number"}, + "input_cost_per_video_per_second": {"type": "number"}, + "input_cost_per_video_per_second_above_128k_tokens": {"type": "number"}, + "input_dbu_cost_per_token": {"type": "number"}, + "litellm_provider": {"type": "string"}, + "max_audio_length_hours": {"type": "number"}, + "max_audio_per_prompt": {"type": "number"}, + "max_document_chunks_per_query": {"type": "number"}, + "max_images_per_prompt": {"type": "number"}, + "max_input_tokens": {"type": "number"}, + "max_output_tokens": {"type": "number"}, + "max_pdf_size_mb": {"type": "number"}, + "max_query_tokens": {"type": "number"}, + "max_tokens": {"type": "number"}, + "max_tokens_per_document_chunk": {"type": "number"}, + "max_video_length": {"type": "number"}, + "max_videos_per_prompt": {"type": "number"}, + "metadata": {"type": "object"}, + "mode": { + "type": "string", + "enum": [ + "audio_speech", + "audio_transcription", + "chat", + "completion", + "embedding", + "image_generation", + "moderation", + "moderations", + "rerank" + ], + }, + "output_cost_per_audio_token": {"type": "number"}, + "output_cost_per_character": {"type": "number"}, + "output_cost_per_character_above_128k_tokens": {"type": "number"}, + "output_cost_per_image": {"type": "number"}, + "output_cost_per_pixel": {"type": "number"}, + "output_cost_per_second": {"type": "number"}, + "output_cost_per_token": {"type": "number"}, + "output_cost_per_token_above_128k_tokens": {"type": "number"}, + "output_cost_per_token_batches": {"type": "number"}, + "output_db_cost_per_token": {"type": "number"}, + "output_dbu_cost_per_token": {"type": "number"}, + "output_vector_size": {"type": "number"}, + "rpd": {"type": "number"}, + "rpm": {"type": "number"}, + "source": {"type": "string"}, + "supports_assistant_prefill": {"type": "boolean"}, + "supports_audio_input": {"type": "boolean"}, + "supports_audio_output": {"type": "boolean"}, + "supports_embedding_image_input": {"type": "boolean"}, + "supports_function_calling": {"type": "boolean"}, + "supports_image_input": {"type": "boolean"}, + "supports_parallel_function_calling": {"type": "boolean"}, + "supports_pdf_input": {"type": "boolean"}, + "supports_prompt_caching": {"type": "boolean"}, + "supports_response_schema": {"type": "boolean"}, + "supports_system_messages": {"type": "boolean"}, + "supports_tool_choice": {"type": "boolean"}, + "supports_video_input": {"type": "boolean"}, + "supports_vision": {"type": "boolean"}, + "tool_use_system_prompt_tokens": {"type": "number"}, + "tpm": {"type": "number"}, + }, + "additionalProperties": False, + }, + } + + actual_json = litellm.get_locally_cached_model_cost_map() + assert isinstance(actual_json, dict) + temporarily_removed = actual_json.pop('sample_spec', None) # remove the sample, whose schema is inconsistent with the real data + + validate(actual_json, INTENDED_SCHEMA) + + if temporarily_removed is not None: + # put back the sample spec that we removed + actual_json.update({'sample_spec': temporarily_removed})