Revert "changes"

This reverts commit fa88bc9632.
This commit is contained in:
Utkash Dubey 2025-03-10 14:46:21 -07:00
parent 1f17daf52c
commit c7a04140e9
19 changed files with 141 additions and 191 deletions

View file

@ -306,7 +306,7 @@ _key_management_settings: KeyManagementSettings = KeyManagementSettings()
#### PII MASKING #### #### PII MASKING ####
output_parse_pii: bool = False output_parse_pii: bool = False
############################################# #############################################
from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map, get_locally_cached_model_cost_map from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map
model_cost = get_model_cost_map(url=model_cost_map_url) model_cost = get_model_cost_map(url=model_cost_map_url)
custom_prompt_dict: Dict[str, dict] = {} custom_prompt_dict: Dict[str, dict] = {}

View file

@ -8,13 +8,16 @@ export LITELLM_LOCAL_MODEL_COST_MAP=True
``` ```
""" """
from functools import cache
import os import os
import httpx import httpx
@cache
def get_locally_cached_model_cost_map(): def get_model_cost_map(url: str):
if (
os.getenv("LITELLM_LOCAL_MODEL_COST_MAP", False)
or os.getenv("LITELLM_LOCAL_MODEL_COST_MAP", False) == "True"
):
import importlib.resources import importlib.resources
import json import json
@ -24,14 +27,6 @@ def get_locally_cached_model_cost_map():
content = json.load(f) content = json.load(f)
return content return content
def get_model_cost_map(url: str):
if (
os.getenv("LITELLM_LOCAL_MODEL_COST_MAP", False)
or os.getenv("LITELLM_LOCAL_MODEL_COST_MAP", False) == "True"
):
return get_locally_cached_model_cost_map()
try: try:
response = httpx.get( response = httpx.get(
url, timeout=5 url, timeout=5

View file

@ -6,7 +6,7 @@
"input_cost_per_token": 0.0000, "input_cost_per_token": 0.0000,
"output_cost_per_token": 0.000, "output_cost_per_token": 0.000,
"litellm_provider": "one of https://docs.litellm.ai/docs/providers", "litellm_provider": "one of https://docs.litellm.ai/docs/providers",
"mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, moderations, rerank", "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": true, "supports_vision": true,

View file

@ -191,7 +191,8 @@ def _check_if_model_name_in_pricing(
input_cost_per_1k_tokens: str, input_cost_per_1k_tokens: str,
output_cost_per_1k_tokens: str, output_cost_per_1k_tokens: str,
): ):
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
for model, value in litellm.model_cost.items(): for model, value in litellm.model_cost.items():
if model.startswith(bedrock_model_name): if model.startswith(bedrock_model_name):

View file

@ -907,7 +907,8 @@ def test_supports_response_schema(model, expected_bool):
Should be true for gemini-1.5-pro on google ai studio / vertex ai AND predibase models Should be true for gemini-1.5-pro on google ai studio / vertex ai AND predibase models
Should be false otherwise Should be false otherwise
""" """
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
from litellm.utils import supports_response_schema from litellm.utils import supports_response_schema
@ -1065,7 +1066,8 @@ def test_async_http_handler_force_ipv4(mock_async_client):
"model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)] "model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)]
) )
def test_supports_audio_input(model, expected_bool): def test_supports_audio_input(model, expected_bool):
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
from litellm.utils import supports_audio_input, supports_audio_output from litellm.utils import supports_audio_input, supports_audio_output
@ -1163,7 +1165,8 @@ def test_models_by_provider():
""" """
Make sure all providers from model map are in the valid providers list Make sure all providers from model map are in the valid providers list
""" """
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
from litellm import models_by_provider from litellm import models_by_provider
@ -1481,7 +1484,8 @@ def test_get_valid_models_default(monkeypatch):
def test_supports_vision_gemini(): def test_supports_vision_gemini():
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
from litellm.utils import supports_vision from litellm.utils import supports_vision
assert supports_vision("gemini-1.5-pro") is True assert supports_vision("gemini-1.5-pro") is True

View file

@ -84,7 +84,8 @@ class BaseLLMEmbeddingTest(ABC):
litellm.set_verbose = True litellm.set_verbose = True
from litellm.utils import supports_embedding_image_input from litellm.utils import supports_embedding_image_input
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
base_embedding_call_args = self.get_base_embedding_call_args() base_embedding_call_args = self.get_base_embedding_call_args()
if not supports_embedding_image_input(base_embedding_call_args["model"], None): if not supports_embedding_image_input(base_embedding_call_args["model"], None):

View file

@ -342,7 +342,8 @@ class BaseLLMChatTest(ABC):
from pydantic import BaseModel from pydantic import BaseModel
from litellm.utils import supports_response_schema from litellm.utils import supports_response_schema
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
class TestModel(BaseModel): class TestModel(BaseModel):
first_response: str first_response: str
@ -381,14 +382,16 @@ class BaseLLMChatTest(ABC):
from pydantic import BaseModel from pydantic import BaseModel
from litellm.utils import supports_response_schema from litellm.utils import supports_response_schema
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
@pytest.mark.flaky(retries=6, delay=1) @pytest.mark.flaky(retries=6, delay=1)
def test_json_response_nested_pydantic_obj(self): def test_json_response_nested_pydantic_obj(self):
from pydantic import BaseModel from pydantic import BaseModel
from litellm.utils import supports_response_schema from litellm.utils import supports_response_schema
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
class CalendarEvent(BaseModel): class CalendarEvent(BaseModel):
name: str name: str
@ -435,7 +438,8 @@ class BaseLLMChatTest(ABC):
from litellm.utils import supports_response_schema from litellm.utils import supports_response_schema
from litellm.llms.base_llm.base_utils import type_to_response_format_param from litellm.llms.base_llm.base_utils import type_to_response_format_param
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
class CalendarEvent(BaseModel): class CalendarEvent(BaseModel):
name: str name: str
@ -556,7 +560,8 @@ class BaseLLMChatTest(ABC):
litellm.set_verbose = True litellm.set_verbose = True
from litellm.utils import supports_vision from litellm.utils import supports_vision
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
base_completion_call_args = self.get_base_completion_call_args() base_completion_call_args = self.get_base_completion_call_args()
if not supports_vision(base_completion_call_args["model"], None): if not supports_vision(base_completion_call_args["model"], None):
@ -610,7 +615,8 @@ class BaseLLMChatTest(ABC):
litellm.set_verbose = True litellm.set_verbose = True
from litellm.utils import supports_vision from litellm.utils import supports_vision
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
@ -650,7 +656,8 @@ class BaseLLMChatTest(ABC):
litellm.set_verbose = True litellm.set_verbose = True
from litellm.utils import supports_prompt_caching from litellm.utils import supports_prompt_caching
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
base_completion_call_args = self.get_base_completion_call_args() base_completion_call_args = self.get_base_completion_call_args()
if not supports_prompt_caching(base_completion_call_args["model"], None): if not supports_prompt_caching(base_completion_call_args["model"], None):
@ -766,7 +773,8 @@ class BaseLLMChatTest(ABC):
litellm._turn_on_debug() litellm._turn_on_debug()
from litellm.utils import supports_function_calling from litellm.utils import supports_function_calling
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
base_completion_call_args = self.get_base_completion_call_args() base_completion_call_args = self.get_base_completion_call_args()
if not supports_function_calling(base_completion_call_args["model"], None): if not supports_function_calling(base_completion_call_args["model"], None):
@ -864,7 +872,8 @@ class BaseLLMChatTest(ABC):
async def test_completion_cost(self): async def test_completion_cost(self):
from litellm import completion_cost from litellm import completion_cost
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.set_verbose = True litellm.set_verbose = True
response = await self.async_completion_function( response = await self.async_completion_function(

View file

@ -87,7 +87,8 @@ class BaseLLMRerankTest(ABC):
@pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.parametrize("sync_mode", [True, False])
async def test_basic_rerank(self, sync_mode): async def test_basic_rerank(self, sync_mode):
litellm._turn_on_debug() litellm._turn_on_debug()
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
rerank_call_args = self.get_base_rerank_call_args() rerank_call_args = self.get_base_rerank_call_args()
custom_llm_provider = self.get_custom_llm_provider() custom_llm_provider = self.get_custom_llm_provider()
if sync_mode is True: if sync_mode is True:

View file

@ -491,7 +491,8 @@ class TestAnthropicCompletion(BaseLLMChatTest):
from pydantic import BaseModel from pydantic import BaseModel
from litellm.utils import supports_response_schema from litellm.utils import supports_response_schema
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
class RFormat(BaseModel): class RFormat(BaseModel):
question: str question: str

View file

@ -1975,7 +1975,8 @@ def test_bedrock_converse_route():
def test_bedrock_mapped_converse_models(): def test_bedrock_mapped_converse_models():
litellm.set_verbose = True litellm.set_verbose = True
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.add_known_models() litellm.add_known_models()
litellm.completion( litellm.completion(
model="bedrock/us.amazon.nova-pro-v1:0", model="bedrock/us.amazon.nova-pro-v1:0",
@ -2107,7 +2108,8 @@ def test_bedrock_supports_tool_call(model, expected_supports_tool_call):
class TestBedrockConverseChatCrossRegion(BaseLLMChatTest): class TestBedrockConverseChatCrossRegion(BaseLLMChatTest):
def get_base_completion_call_args(self) -> dict: def get_base_completion_call_args(self) -> dict:
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.add_known_models() litellm.add_known_models()
return { return {
"model": "bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0", "model": "bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0",
@ -2135,7 +2137,8 @@ class TestBedrockConverseChatCrossRegion(BaseLLMChatTest):
""" """
Test if region models info is correctly used for cost calculation. Using the base model info for cost calculation. Test if region models info is correctly used for cost calculation. Using the base model info for cost calculation.
""" """
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
bedrock_model = "us.anthropic.claude-3-5-sonnet-20241022-v2:0" bedrock_model = "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
litellm.model_cost.pop(bedrock_model, None) litellm.model_cost.pop(bedrock_model, None)
model = f"bedrock/{bedrock_model}" model = f"bedrock/{bedrock_model}"
@ -2152,7 +2155,8 @@ class TestBedrockConverseChatCrossRegion(BaseLLMChatTest):
class TestBedrockConverseChatNormal(BaseLLMChatTest): class TestBedrockConverseChatNormal(BaseLLMChatTest):
def get_base_completion_call_args(self) -> dict: def get_base_completion_call_args(self) -> dict:
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.add_known_models() litellm.add_known_models()
return { return {
"model": "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", "model": "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
@ -2321,7 +2325,8 @@ def test_bedrock_nova_topk(top_k_param):
def test_bedrock_cross_region_inference(monkeypatch): def test_bedrock_cross_region_inference(monkeypatch):
from litellm.llms.custom_httpx.http_handler import HTTPHandler from litellm.llms.custom_httpx.http_handler import HTTPHandler
litellm.model_cost = litellm.get_locally_cached_model_cost_map() monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.add_known_models() litellm.add_known_models()
litellm.set_verbose = True litellm.set_verbose = True

View file

@ -29,7 +29,8 @@ async def test_o1_handle_system_role(model):
from openai import AsyncOpenAI from openai import AsyncOpenAI
from litellm.utils import supports_system_messages from litellm.utils import supports_system_messages
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.set_verbose = True litellm.set_verbose = True
@ -82,7 +83,8 @@ async def test_o1_handle_tool_calling_optional_params(
from litellm.utils import ProviderConfigManager from litellm.utils import ProviderConfigManager
from litellm.types.utils import LlmProviders from litellm.types.utils import LlmProviders
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
config = ProviderConfigManager.get_provider_chat_config( config = ProviderConfigManager.get_provider_chat_config(
model=model, provider=LlmProviders.OPENAI model=model, provider=LlmProviders.OPENAI
@ -188,7 +190,8 @@ class TestOpenAIO3(BaseOSeriesModelsTest, BaseLLMChatTest):
def test_o1_supports_vision(): def test_o1_supports_vision():
"""Test that o1 supports vision""" """Test that o1 supports vision"""
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
for k, v in litellm.model_cost.items(): for k, v in litellm.model_cost.items():
if k.startswith("o1") and v.get("litellm_provider") == "openai": if k.startswith("o1") and v.get("litellm_provider") == "openai":
assert v.get("supports_vision") is True, f"{k} does not support vision" assert v.get("supports_vision") is True, f"{k} does not support vision"

View file

@ -274,7 +274,8 @@ class TestLogger(CustomLogger):
@pytest.mark.asyncio() @pytest.mark.asyncio()
async def test_rerank_custom_callbacks(): async def test_rerank_custom_callbacks():
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
custom_logger = TestLogger() custom_logger = TestLogger()
litellm.callbacks = [custom_logger] litellm.callbacks = [custom_logger]

View file

@ -42,7 +42,8 @@ class TestTogetherAI(BaseLLMChatTest):
def test_get_supported_response_format_together_ai( def test_get_supported_response_format_together_ai(
self, model: str, expected_bool: bool self, model: str, expected_bool: bool
) -> None: ) -> None:
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
optional_params = litellm.get_supported_openai_params( optional_params = litellm.get_supported_openai_params(
model, custom_llm_provider="together_ai" model, custom_llm_provider="together_ai"
) )

View file

@ -1433,7 +1433,8 @@ async def test_gemini_pro_json_schema_args_sent_httpx(
enforce_validation, enforce_validation,
): ):
load_vertex_ai_credentials() load_vertex_ai_credentials()
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.set_verbose = True litellm.set_verbose = True
messages = [{"role": "user", "content": "List 5 cookie recipes"}] messages = [{"role": "user", "content": "List 5 cookie recipes"}]
@ -1553,7 +1554,8 @@ async def test_gemini_pro_json_schema_args_sent_httpx_openai_schema(
from pydantic import BaseModel from pydantic import BaseModel
load_vertex_ai_credentials() load_vertex_ai_credentials()
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.set_verbose = True litellm.set_verbose = True

View file

@ -634,7 +634,8 @@ def test_gemini_completion_cost(above_128k, provider):
""" """
Check if cost correctly calculated for gemini models based on context window Check if cost correctly calculated for gemini models based on context window
""" """
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
if provider == "gemini": if provider == "gemini":
model_name = "gemini-1.5-flash-latest" model_name = "gemini-1.5-flash-latest"
else: else:
@ -689,7 +690,8 @@ def _count_characters(text):
def test_vertex_ai_completion_cost(): def test_vertex_ai_completion_cost():
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
text = "The quick brown fox jumps over the lazy dog." text = "The quick brown fox jumps over the lazy dog."
characters = _count_characters(text=text) characters = _count_characters(text=text)
@ -724,7 +726,8 @@ def test_vertex_ai_medlm_completion_cost():
model=model, messages=messages, custom_llm_provider="vertex_ai" model=model, messages=messages, custom_llm_provider="vertex_ai"
) )
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
model = "vertex_ai/medlm-medium" model = "vertex_ai/medlm-medium"
messages = [{"role": "user", "content": "Test MedLM completion cost."}] messages = [{"role": "user", "content": "Test MedLM completion cost."}]
@ -743,7 +746,8 @@ def test_vertex_ai_claude_completion_cost():
from litellm import Choices, Message, ModelResponse from litellm import Choices, Message, ModelResponse
from litellm.utils import Usage from litellm.utils import Usage
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.set_verbose = True litellm.set_verbose = True
input_tokens = litellm.token_counter( input_tokens = litellm.token_counter(
@ -792,7 +796,8 @@ def test_vertex_ai_embedding_completion_cost(caplog):
""" """
Relevant issue - https://github.com/BerriAI/litellm/issues/4630 Relevant issue - https://github.com/BerriAI/litellm/issues/4630
""" """
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
text = "The quick brown fox jumps over the lazy dog." text = "The quick brown fox jumps over the lazy dog."
input_tokens = litellm.token_counter( input_tokens = litellm.token_counter(
@ -834,7 +839,8 @@ def test_vertex_ai_embedding_completion_cost(caplog):
# from test_amazing_vertex_completion import load_vertex_ai_credentials # from test_amazing_vertex_completion import load_vertex_ai_credentials
# load_vertex_ai_credentials() # load_vertex_ai_credentials()
# litellm.model_cost = litellm.get_locally_cached_model_cost_map() # os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
# litellm.model_cost = litellm.get_model_cost_map(url="")
# text = "The quick brown fox jumps over the lazy dog." # text = "The quick brown fox jumps over the lazy dog."
# input_tokens = litellm.token_counter( # input_tokens = litellm.token_counter(
@ -861,7 +867,8 @@ def test_vertex_ai_embedding_completion_cost(caplog):
def test_completion_azure_ai(): def test_completion_azure_ai():
try: try:
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.set_verbose = True litellm.set_verbose = True
response = litellm.completion( response = litellm.completion(
@ -967,7 +974,8 @@ def test_vertex_ai_mistral_predict_cost(usage):
@pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"]) @pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"])
def test_completion_cost_tts(model): def test_completion_cost_tts(model):
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
cost = completion_cost( cost = completion_cost(
model=model, model=model,
@ -1163,7 +1171,8 @@ def test_completion_cost_azure_common_deployment_name():
], ],
) )
def test_completion_cost_prompt_caching(model, custom_llm_provider): def test_completion_cost_prompt_caching(model, custom_llm_provider):
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
from litellm.utils import Choices, Message, ModelResponse, Usage from litellm.utils import Choices, Message, ModelResponse, Usage
@ -1264,7 +1273,8 @@ def test_completion_cost_prompt_caching(model, custom_llm_provider):
], ],
) )
def test_completion_cost_databricks(model): def test_completion_cost_databricks(model):
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
model, messages = model, [{"role": "user", "content": "What is 2+2?"}] model, messages = model, [{"role": "user", "content": "What is 2+2?"}]
resp = litellm.completion(model=model, messages=messages) # works fine resp = litellm.completion(model=model, messages=messages) # works fine
@ -1281,7 +1291,8 @@ def test_completion_cost_databricks(model):
], ],
) )
def test_completion_cost_databricks_embedding(model): def test_completion_cost_databricks_embedding(model):
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
resp = litellm.embedding(model=model, input=["hey, how's it going?"]) # works fine resp = litellm.embedding(model=model, input=["hey, how's it going?"]) # works fine
print(resp) print(resp)
@ -1308,7 +1319,8 @@ def test_get_model_params_fireworks_ai(model, base_model):
["fireworks_ai/llama-v3p1-405b-instruct", "fireworks_ai/mixtral-8x7b-instruct"], ["fireworks_ai/llama-v3p1-405b-instruct", "fireworks_ai/mixtral-8x7b-instruct"],
) )
def test_completion_cost_fireworks_ai(model): def test_completion_cost_fireworks_ai(model):
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
messages = [{"role": "user", "content": "Hey, how's it going?"}] messages = [{"role": "user", "content": "Hey, how's it going?"}]
resp = litellm.completion(model=model, messages=messages) # works fine resp = litellm.completion(model=model, messages=messages) # works fine
@ -1325,7 +1337,8 @@ def test_cost_azure_openai_prompt_caching():
) )
from litellm import get_model_info from litellm import get_model_info
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
model = "azure/o1-mini" model = "azure/o1-mini"
@ -1416,7 +1429,8 @@ def test_cost_azure_openai_prompt_caching():
def test_completion_cost_vertex_llama3(): def test_completion_cost_vertex_llama3():
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
from litellm.utils import Choices, Message, ModelResponse, Usage from litellm.utils import Choices, Message, ModelResponse, Usage
@ -1456,7 +1470,8 @@ def test_cost_openai_prompt_caching():
from litellm.utils import Choices, Message, ModelResponse, Usage from litellm.utils import Choices, Message, ModelResponse, Usage
from litellm import get_model_info from litellm import get_model_info
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
model = "gpt-4o-mini-2024-07-18" model = "gpt-4o-mini-2024-07-18"
@ -1546,7 +1561,8 @@ def test_cost_openai_prompt_caching():
def test_completion_cost_azure_ai_rerank(model): def test_completion_cost_azure_ai_rerank(model):
from litellm import RerankResponse, rerank from litellm import RerankResponse, rerank
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
response = RerankResponse( response = RerankResponse(
id="b01dbf2e-63c8-4981-9e69-32241da559ed", id="b01dbf2e-63c8-4981-9e69-32241da559ed",
@ -1577,7 +1593,8 @@ def test_completion_cost_azure_ai_rerank(model):
def test_together_ai_embedding_completion_cost(): def test_together_ai_embedding_completion_cost():
from litellm.utils import Choices, EmbeddingResponse, Message, ModelResponse, Usage from litellm.utils import Choices, EmbeddingResponse, Message, ModelResponse, Usage
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
response = EmbeddingResponse( response = EmbeddingResponse(
model="togethercomputer/m2-bert-80M-8k-retrieval", model="togethercomputer/m2-bert-80M-8k-retrieval",
data=[ data=[
@ -2434,7 +2451,8 @@ def test_completion_cost_params_gemini_3():
from litellm.llms.vertex_ai.cost_calculator import cost_per_character from litellm.llms.vertex_ai.cost_calculator import cost_per_character
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
response = ModelResponse( response = ModelResponse(
id="chatcmpl-61043504-4439-48be-9996-e29bdee24dc3", id="chatcmpl-61043504-4439-48be-9996-e29bdee24dc3",
@ -2503,7 +2521,8 @@ def test_completion_cost_params_gemini_3():
# @pytest.mark.flaky(retries=3, delay=1) # @pytest.mark.flaky(retries=3, delay=1)
@pytest.mark.parametrize("stream", [False]) # True, @pytest.mark.parametrize("stream", [False]) # True,
async def test_test_completion_cost_gpt4o_audio_output_from_model(stream): async def test_test_completion_cost_gpt4o_audio_output_from_model(stream):
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
from litellm.types.utils import ( from litellm.types.utils import (
Choices, Choices,
Message, Message,
@ -2600,7 +2619,8 @@ def test_completion_cost_model_response_cost(response_model, custom_llm_provider
""" """
from litellm import ModelResponse from litellm import ModelResponse
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.set_verbose = True litellm.set_verbose = True
response = { response = {
@ -2700,7 +2720,8 @@ def test_select_model_name_for_cost_calc():
def test_moderations(): def test_moderations():
from litellm import moderation from litellm import moderation
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.add_known_models() litellm.add_known_models()
assert "omni-moderation-latest" in litellm.model_cost assert "omni-moderation-latest" in litellm.model_cost
@ -2753,7 +2774,8 @@ def test_bedrock_cost_calc_with_region():
from litellm import ModelResponse from litellm import ModelResponse
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.add_known_models() litellm.add_known_models()
@ -2952,7 +2974,9 @@ async def test_cost_calculator_with_custom_pricing_router(model_item, custom_pri
def test_json_valid_model_cost_map(): def test_json_valid_model_cost_map():
import json import json
model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
model_cost = litellm.get_model_cost_map(url="")
try: try:
# Attempt to serialize and deserialize the JSON # Attempt to serialize and deserialize the JSON

View file

@ -115,7 +115,8 @@ def test_openai_embedding_3():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_openai_azure_embedding_simple(model, api_base, api_key, sync_mode): async def test_openai_azure_embedding_simple(model, api_base, api_key, sync_mode):
try: try:
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
# litellm.set_verbose = True # litellm.set_verbose = True
if sync_mode: if sync_mode:
response = embedding( response = embedding(
@ -197,7 +198,8 @@ def _azure_ai_image_mock_response(*args, **kwargs):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_azure_ai_embedding_image(model, api_base, api_key, sync_mode): async def test_azure_ai_embedding_image(model, api_base, api_key, sync_mode):
try: try:
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
input = base64_image input = base64_image
if sync_mode: if sync_mode:
client = HTTPHandler() client = HTTPHandler()

View file

@ -58,14 +58,16 @@ def test_get_model_info_shows_correct_supports_vision():
def test_get_model_info_shows_assistant_prefill(): def test_get_model_info_shows_assistant_prefill():
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
info = litellm.get_model_info("deepseek/deepseek-chat") info = litellm.get_model_info("deepseek/deepseek-chat")
print("info", info) print("info", info)
assert info.get("supports_assistant_prefill") is True assert info.get("supports_assistant_prefill") is True
def test_get_model_info_shows_supports_prompt_caching(): def test_get_model_info_shows_supports_prompt_caching():
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
info = litellm.get_model_info("deepseek/deepseek-chat") info = litellm.get_model_info("deepseek/deepseek-chat")
print("info", info) print("info", info)
assert info.get("supports_prompt_caching") is True assert info.get("supports_prompt_caching") is True
@ -114,7 +116,8 @@ def test_get_model_info_gemini():
""" """
Tests if ALL gemini models have 'tpm' and 'rpm' in the model info Tests if ALL gemini models have 'tpm' and 'rpm' in the model info
""" """
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
model_map = litellm.model_cost model_map = litellm.model_cost
for model, info in model_map.items(): for model, info in model_map.items():
@ -124,7 +127,8 @@ def test_get_model_info_gemini():
def test_get_model_info_bedrock_region(): def test_get_model_info_bedrock_region():
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
args = { args = {
"model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0", "model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
"custom_llm_provider": "bedrock", "custom_llm_provider": "bedrock",
@ -208,7 +212,8 @@ def test_model_info_bedrock_converse(monkeypatch):
This ensures they are automatically routed to the converse endpoint. This ensures they are automatically routed to the converse endpoint.
""" """
litellm.model_cost = litellm.get_locally_cached_model_cost_map() monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
litellm.model_cost = litellm.get_model_cost_map(url="")
try: try:
# Load whitelist models from file # Load whitelist models from file
with open("whitelisted_bedrock_models.txt", "r") as file: with open("whitelisted_bedrock_models.txt", "r") as file:
@ -226,7 +231,8 @@ def test_model_info_bedrock_converse_enforcement(monkeypatch):
""" """
Test the enforcement of the whitelist by adding a fake model and ensuring the test fails. Test the enforcement of the whitelist by adding a fake model and ensuring the test fails.
""" """
litellm.model_cost = litellm.get_locally_cached_model_cost_map() monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
litellm.model_cost = litellm.get_model_cost_map(url="")
# Add a fake unwhitelisted model # Add a fake unwhitelisted model
litellm.model_cost["fake.bedrock-chat-model"] = { litellm.model_cost["fake.bedrock-chat-model"] = {
@ -317,7 +323,8 @@ def test_get_model_info_bedrock_models():
""" """
from litellm.llms.bedrock.common_utils import BedrockModelInfo from litellm.llms.bedrock.common_utils import BedrockModelInfo
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
for k, v in litellm.model_cost.items(): for k, v in litellm.model_cost.items():
if v["litellm_provider"] == "bedrock": if v["litellm_provider"] == "bedrock":

View file

@ -178,7 +178,8 @@ async def test_update_kwargs_before_fallbacks(call_type):
def test_router_get_model_info_wildcard_routes(): def test_router_get_model_info_wildcard_routes():
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
router = Router( router = Router(
model_list=[ model_list=[
{ {
@ -199,7 +200,8 @@ def test_router_get_model_info_wildcard_routes():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_router_get_model_group_usage_wildcard_routes(): async def test_router_get_model_group_usage_wildcard_routes():
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
router = Router( router = Router(
model_list=[ model_list=[
{ {
@ -295,7 +297,8 @@ async def test_call_router_callbacks_on_failure():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_router_model_group_headers(): async def test_router_model_group_headers():
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
from litellm.types.utils import OPENAI_RESPONSE_HEADERS from litellm.types.utils import OPENAI_RESPONSE_HEADERS
router = Router( router = Router(
@ -327,7 +330,8 @@ async def test_router_model_group_headers():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_get_remaining_model_group_usage(): async def test_get_remaining_model_group_usage():
litellm.model_cost = litellm.get_locally_cached_model_cost_map() os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
from litellm.types.utils import OPENAI_RESPONSE_HEADERS from litellm.types.utils import OPENAI_RESPONSE_HEADERS
router = Router( router = Router(

View file

@ -1,111 +0,0 @@
import litellm
from jsonschema import validate
def test_model_prices_and_context_window_json_is_valid():
'''
Validates the `model_prices_and_context_window.json` file.
If this test fails after you update the json, you need to update the schema or correct the change you made.
'''
INTENDED_SCHEMA = {
"type": "object",
"additionalProperties": {
"type": "object",
"properties": {
"cache_creation_input_audio_token_cost": {"type": "number"},
"cache_creation_input_token_cost": {"type": "number"},
"cache_read_input_token_cost": {"type": "number"},
"deprecation_date": {"type": "string"},
"input_cost_per_audio_per_second": {"type": "number"},
"input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"},
"input_cost_per_audio_token": {"type": "number"},
"input_cost_per_character": {"type": "number"},
"input_cost_per_character_above_128k_tokens": {"type": "number"},
"input_cost_per_image": {"type": "number"},
"input_cost_per_image_above_128k_tokens": {"type": "number"},
"input_cost_per_pixel": {"type": "number"},
"input_cost_per_query": {"type": "number"},
"input_cost_per_request": {"type": "number"},
"input_cost_per_second": {"type": "number"},
"input_cost_per_token": {"type": "number"},
"input_cost_per_token_above_128k_tokens": {"type": "number"},
"input_cost_per_token_batch_requests": {"type": "number"},
"input_cost_per_token_batches": {"type": "number"},
"input_cost_per_token_cache_hit": {"type": "number"},
"input_cost_per_video_per_second": {"type": "number"},
"input_cost_per_video_per_second_above_128k_tokens": {"type": "number"},
"input_dbu_cost_per_token": {"type": "number"},
"litellm_provider": {"type": "string"},
"max_audio_length_hours": {"type": "number"},
"max_audio_per_prompt": {"type": "number"},
"max_document_chunks_per_query": {"type": "number"},
"max_images_per_prompt": {"type": "number"},
"max_input_tokens": {"type": "number"},
"max_output_tokens": {"type": "number"},
"max_pdf_size_mb": {"type": "number"},
"max_query_tokens": {"type": "number"},
"max_tokens": {"type": "number"},
"max_tokens_per_document_chunk": {"type": "number"},
"max_video_length": {"type": "number"},
"max_videos_per_prompt": {"type": "number"},
"metadata": {"type": "object"},
"mode": {
"type": "string",
"enum": [
"audio_speech",
"audio_transcription",
"chat",
"completion",
"embedding",
"image_generation",
"moderation",
"moderations",
"rerank"
],
},
"output_cost_per_audio_token": {"type": "number"},
"output_cost_per_character": {"type": "number"},
"output_cost_per_character_above_128k_tokens": {"type": "number"},
"output_cost_per_image": {"type": "number"},
"output_cost_per_pixel": {"type": "number"},
"output_cost_per_second": {"type": "number"},
"output_cost_per_token": {"type": "number"},
"output_cost_per_token_above_128k_tokens": {"type": "number"},
"output_cost_per_token_batches": {"type": "number"},
"output_db_cost_per_token": {"type": "number"},
"output_dbu_cost_per_token": {"type": "number"},
"output_vector_size": {"type": "number"},
"rpd": {"type": "number"},
"rpm": {"type": "number"},
"source": {"type": "string"},
"supports_assistant_prefill": {"type": "boolean"},
"supports_audio_input": {"type": "boolean"},
"supports_audio_output": {"type": "boolean"},
"supports_embedding_image_input": {"type": "boolean"},
"supports_function_calling": {"type": "boolean"},
"supports_image_input": {"type": "boolean"},
"supports_parallel_function_calling": {"type": "boolean"},
"supports_pdf_input": {"type": "boolean"},
"supports_prompt_caching": {"type": "boolean"},
"supports_response_schema": {"type": "boolean"},
"supports_system_messages": {"type": "boolean"},
"supports_tool_choice": {"type": "boolean"},
"supports_video_input": {"type": "boolean"},
"supports_vision": {"type": "boolean"},
"tool_use_system_prompt_tokens": {"type": "number"},
"tpm": {"type": "number"},
},
"additionalProperties": False,
},
}
actual_json = litellm.get_locally_cached_model_cost_map()
assert isinstance(actual_json, dict)
temporarily_removed = actual_json.pop('sample_spec', None) # remove the sample, whose schema is inconsistent with the real data
validate(actual_json, INTENDED_SCHEMA)
if temporarily_removed is not None:
# put back the sample spec that we removed
actual_json.update({'sample_spec': temporarily_removed})