mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
parent
1f17daf52c
commit
c7a04140e9
19 changed files with 141 additions and 191 deletions
|
@ -306,7 +306,7 @@ _key_management_settings: KeyManagementSettings = KeyManagementSettings()
|
|||
#### PII MASKING ####
|
||||
output_parse_pii: bool = False
|
||||
#############################################
|
||||
from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map, get_locally_cached_model_cost_map
|
||||
from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map
|
||||
|
||||
model_cost = get_model_cost_map(url=model_cost_map_url)
|
||||
custom_prompt_dict: Dict[str, dict] = {}
|
||||
|
|
|
@ -8,29 +8,24 @@ export LITELLM_LOCAL_MODEL_COST_MAP=True
|
|||
```
|
||||
"""
|
||||
|
||||
from functools import cache
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
@cache
|
||||
def get_locally_cached_model_cost_map():
|
||||
import importlib.resources
|
||||
import json
|
||||
|
||||
with importlib.resources.open_text(
|
||||
"litellm", "model_prices_and_context_window_backup.json"
|
||||
) as f:
|
||||
content = json.load(f)
|
||||
return content
|
||||
|
||||
|
||||
def get_model_cost_map(url: str):
|
||||
if (
|
||||
os.getenv("LITELLM_LOCAL_MODEL_COST_MAP", False)
|
||||
or os.getenv("LITELLM_LOCAL_MODEL_COST_MAP", False) == "True"
|
||||
):
|
||||
return get_locally_cached_model_cost_map()
|
||||
import importlib.resources
|
||||
import json
|
||||
|
||||
with importlib.resources.open_text(
|
||||
"litellm", "model_prices_and_context_window_backup.json"
|
||||
) as f:
|
||||
content = json.load(f)
|
||||
return content
|
||||
|
||||
try:
|
||||
response = httpx.get(
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
"input_cost_per_token": 0.0000,
|
||||
"output_cost_per_token": 0.000,
|
||||
"litellm_provider": "one of https://docs.litellm.ai/docs/providers",
|
||||
"mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, moderations, rerank",
|
||||
"mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_vision": true,
|
||||
|
|
|
@ -191,7 +191,8 @@ def _check_if_model_name_in_pricing(
|
|||
input_cost_per_1k_tokens: str,
|
||||
output_cost_per_1k_tokens: str,
|
||||
):
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
for model, value in litellm.model_cost.items():
|
||||
if model.startswith(bedrock_model_name):
|
||||
|
|
|
@ -907,7 +907,8 @@ def test_supports_response_schema(model, expected_bool):
|
|||
Should be true for gemini-1.5-pro on google ai studio / vertex ai AND predibase models
|
||||
Should be false otherwise
|
||||
"""
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
from litellm.utils import supports_response_schema
|
||||
|
||||
|
@ -1065,7 +1066,8 @@ def test_async_http_handler_force_ipv4(mock_async_client):
|
|||
"model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)]
|
||||
)
|
||||
def test_supports_audio_input(model, expected_bool):
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
from litellm.utils import supports_audio_input, supports_audio_output
|
||||
|
||||
|
@ -1163,7 +1165,8 @@ def test_models_by_provider():
|
|||
"""
|
||||
Make sure all providers from model map are in the valid providers list
|
||||
"""
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
from litellm import models_by_provider
|
||||
|
||||
|
@ -1481,7 +1484,8 @@ def test_get_valid_models_default(monkeypatch):
|
|||
|
||||
|
||||
def test_supports_vision_gemini():
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
from litellm.utils import supports_vision
|
||||
|
||||
assert supports_vision("gemini-1.5-pro") is True
|
||||
|
|
|
@ -84,7 +84,8 @@ class BaseLLMEmbeddingTest(ABC):
|
|||
litellm.set_verbose = True
|
||||
from litellm.utils import supports_embedding_image_input
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
base_embedding_call_args = self.get_base_embedding_call_args()
|
||||
if not supports_embedding_image_input(base_embedding_call_args["model"], None):
|
||||
|
|
|
@ -342,7 +342,8 @@ class BaseLLMChatTest(ABC):
|
|||
from pydantic import BaseModel
|
||||
from litellm.utils import supports_response_schema
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
class TestModel(BaseModel):
|
||||
first_response: str
|
||||
|
@ -381,14 +382,16 @@ class BaseLLMChatTest(ABC):
|
|||
from pydantic import BaseModel
|
||||
from litellm.utils import supports_response_schema
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
@pytest.mark.flaky(retries=6, delay=1)
|
||||
def test_json_response_nested_pydantic_obj(self):
|
||||
from pydantic import BaseModel
|
||||
from litellm.utils import supports_response_schema
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
class CalendarEvent(BaseModel):
|
||||
name: str
|
||||
|
@ -435,7 +438,8 @@ class BaseLLMChatTest(ABC):
|
|||
from litellm.utils import supports_response_schema
|
||||
from litellm.llms.base_llm.base_utils import type_to_response_format_param
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
class CalendarEvent(BaseModel):
|
||||
name: str
|
||||
|
@ -556,7 +560,8 @@ class BaseLLMChatTest(ABC):
|
|||
litellm.set_verbose = True
|
||||
from litellm.utils import supports_vision
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
if not supports_vision(base_completion_call_args["model"], None):
|
||||
|
@ -610,7 +615,8 @@ class BaseLLMChatTest(ABC):
|
|||
litellm.set_verbose = True
|
||||
from litellm.utils import supports_vision
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
||||
|
||||
|
@ -650,7 +656,8 @@ class BaseLLMChatTest(ABC):
|
|||
litellm.set_verbose = True
|
||||
from litellm.utils import supports_prompt_caching
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
if not supports_prompt_caching(base_completion_call_args["model"], None):
|
||||
|
@ -766,7 +773,8 @@ class BaseLLMChatTest(ABC):
|
|||
litellm._turn_on_debug()
|
||||
from litellm.utils import supports_function_calling
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
if not supports_function_calling(base_completion_call_args["model"], None):
|
||||
|
@ -864,7 +872,8 @@ class BaseLLMChatTest(ABC):
|
|||
async def test_completion_cost(self):
|
||||
from litellm import completion_cost
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
litellm.set_verbose = True
|
||||
response = await self.async_completion_function(
|
||||
|
|
|
@ -87,7 +87,8 @@ class BaseLLMRerankTest(ABC):
|
|||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
async def test_basic_rerank(self, sync_mode):
|
||||
litellm._turn_on_debug()
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
rerank_call_args = self.get_base_rerank_call_args()
|
||||
custom_llm_provider = self.get_custom_llm_provider()
|
||||
if sync_mode is True:
|
||||
|
|
|
@ -491,7 +491,8 @@ class TestAnthropicCompletion(BaseLLMChatTest):
|
|||
from pydantic import BaseModel
|
||||
from litellm.utils import supports_response_schema
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
class RFormat(BaseModel):
|
||||
question: str
|
||||
|
|
|
@ -1975,7 +1975,8 @@ def test_bedrock_converse_route():
|
|||
|
||||
def test_bedrock_mapped_converse_models():
|
||||
litellm.set_verbose = True
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
litellm.add_known_models()
|
||||
litellm.completion(
|
||||
model="bedrock/us.amazon.nova-pro-v1:0",
|
||||
|
@ -2107,7 +2108,8 @@ def test_bedrock_supports_tool_call(model, expected_supports_tool_call):
|
|||
|
||||
class TestBedrockConverseChatCrossRegion(BaseLLMChatTest):
|
||||
def get_base_completion_call_args(self) -> dict:
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
litellm.add_known_models()
|
||||
return {
|
||||
"model": "bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0",
|
||||
|
@ -2135,7 +2137,8 @@ class TestBedrockConverseChatCrossRegion(BaseLLMChatTest):
|
|||
"""
|
||||
Test if region models info is correctly used for cost calculation. Using the base model info for cost calculation.
|
||||
"""
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
bedrock_model = "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
|
||||
litellm.model_cost.pop(bedrock_model, None)
|
||||
model = f"bedrock/{bedrock_model}"
|
||||
|
@ -2152,7 +2155,8 @@ class TestBedrockConverseChatCrossRegion(BaseLLMChatTest):
|
|||
|
||||
class TestBedrockConverseChatNormal(BaseLLMChatTest):
|
||||
def get_base_completion_call_args(self) -> dict:
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
litellm.add_known_models()
|
||||
return {
|
||||
"model": "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
|
||||
|
@ -2321,7 +2325,8 @@ def test_bedrock_nova_topk(top_k_param):
|
|||
def test_bedrock_cross_region_inference(monkeypatch):
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
litellm.add_known_models()
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
|
|
@ -29,7 +29,8 @@ async def test_o1_handle_system_role(model):
|
|||
from openai import AsyncOpenAI
|
||||
from litellm.utils import supports_system_messages
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
@ -82,7 +83,8 @@ async def test_o1_handle_tool_calling_optional_params(
|
|||
from litellm.utils import ProviderConfigManager
|
||||
from litellm.types.utils import LlmProviders
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
config = ProviderConfigManager.get_provider_chat_config(
|
||||
model=model, provider=LlmProviders.OPENAI
|
||||
|
@ -188,7 +190,8 @@ class TestOpenAIO3(BaseOSeriesModelsTest, BaseLLMChatTest):
|
|||
|
||||
def test_o1_supports_vision():
|
||||
"""Test that o1 supports vision"""
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
for k, v in litellm.model_cost.items():
|
||||
if k.startswith("o1") and v.get("litellm_provider") == "openai":
|
||||
assert v.get("supports_vision") is True, f"{k} does not support vision"
|
||||
|
|
|
@ -274,7 +274,8 @@ class TestLogger(CustomLogger):
|
|||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_rerank_custom_callbacks():
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
custom_logger = TestLogger()
|
||||
litellm.callbacks = [custom_logger]
|
||||
|
|
|
@ -42,7 +42,8 @@ class TestTogetherAI(BaseLLMChatTest):
|
|||
def test_get_supported_response_format_together_ai(
|
||||
self, model: str, expected_bool: bool
|
||||
) -> None:
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
optional_params = litellm.get_supported_openai_params(
|
||||
model, custom_llm_provider="together_ai"
|
||||
)
|
||||
|
|
|
@ -1433,7 +1433,8 @@ async def test_gemini_pro_json_schema_args_sent_httpx(
|
|||
enforce_validation,
|
||||
):
|
||||
load_vertex_ai_credentials()
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
litellm.set_verbose = True
|
||||
messages = [{"role": "user", "content": "List 5 cookie recipes"}]
|
||||
|
@ -1553,7 +1554,8 @@ async def test_gemini_pro_json_schema_args_sent_httpx_openai_schema(
|
|||
from pydantic import BaseModel
|
||||
|
||||
load_vertex_ai_credentials()
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
|
|
@ -634,7 +634,8 @@ def test_gemini_completion_cost(above_128k, provider):
|
|||
"""
|
||||
Check if cost correctly calculated for gemini models based on context window
|
||||
"""
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
if provider == "gemini":
|
||||
model_name = "gemini-1.5-flash-latest"
|
||||
else:
|
||||
|
@ -689,7 +690,8 @@ def _count_characters(text):
|
|||
|
||||
|
||||
def test_vertex_ai_completion_cost():
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
text = "The quick brown fox jumps over the lazy dog."
|
||||
characters = _count_characters(text=text)
|
||||
|
@ -724,7 +726,8 @@ def test_vertex_ai_medlm_completion_cost():
|
|||
model=model, messages=messages, custom_llm_provider="vertex_ai"
|
||||
)
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
model = "vertex_ai/medlm-medium"
|
||||
messages = [{"role": "user", "content": "Test MedLM completion cost."}]
|
||||
|
@ -743,7 +746,8 @@ def test_vertex_ai_claude_completion_cost():
|
|||
from litellm import Choices, Message, ModelResponse
|
||||
from litellm.utils import Usage
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
litellm.set_verbose = True
|
||||
input_tokens = litellm.token_counter(
|
||||
|
@ -792,7 +796,8 @@ def test_vertex_ai_embedding_completion_cost(caplog):
|
|||
"""
|
||||
Relevant issue - https://github.com/BerriAI/litellm/issues/4630
|
||||
"""
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
text = "The quick brown fox jumps over the lazy dog."
|
||||
input_tokens = litellm.token_counter(
|
||||
|
@ -834,7 +839,8 @@ def test_vertex_ai_embedding_completion_cost(caplog):
|
|||
# from test_amazing_vertex_completion import load_vertex_ai_credentials
|
||||
|
||||
# load_vertex_ai_credentials()
|
||||
# litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
# os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
# litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
# text = "The quick brown fox jumps over the lazy dog."
|
||||
# input_tokens = litellm.token_counter(
|
||||
|
@ -861,7 +867,8 @@ def test_vertex_ai_embedding_completion_cost(caplog):
|
|||
|
||||
def test_completion_azure_ai():
|
||||
try:
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
litellm.set_verbose = True
|
||||
response = litellm.completion(
|
||||
|
@ -967,7 +974,8 @@ def test_vertex_ai_mistral_predict_cost(usage):
|
|||
|
||||
@pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"])
|
||||
def test_completion_cost_tts(model):
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
cost = completion_cost(
|
||||
model=model,
|
||||
|
@ -1163,7 +1171,8 @@ def test_completion_cost_azure_common_deployment_name():
|
|||
],
|
||||
)
|
||||
def test_completion_cost_prompt_caching(model, custom_llm_provider):
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
from litellm.utils import Choices, Message, ModelResponse, Usage
|
||||
|
||||
|
@ -1264,7 +1273,8 @@ def test_completion_cost_prompt_caching(model, custom_llm_provider):
|
|||
],
|
||||
)
|
||||
def test_completion_cost_databricks(model):
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
model, messages = model, [{"role": "user", "content": "What is 2+2?"}]
|
||||
|
||||
resp = litellm.completion(model=model, messages=messages) # works fine
|
||||
|
@ -1281,7 +1291,8 @@ def test_completion_cost_databricks(model):
|
|||
],
|
||||
)
|
||||
def test_completion_cost_databricks_embedding(model):
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
resp = litellm.embedding(model=model, input=["hey, how's it going?"]) # works fine
|
||||
|
||||
print(resp)
|
||||
|
@ -1308,7 +1319,8 @@ def test_get_model_params_fireworks_ai(model, base_model):
|
|||
["fireworks_ai/llama-v3p1-405b-instruct", "fireworks_ai/mixtral-8x7b-instruct"],
|
||||
)
|
||||
def test_completion_cost_fireworks_ai(model):
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
resp = litellm.completion(model=model, messages=messages) # works fine
|
||||
|
@ -1325,7 +1337,8 @@ def test_cost_azure_openai_prompt_caching():
|
|||
)
|
||||
from litellm import get_model_info
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
model = "azure/o1-mini"
|
||||
|
||||
|
@ -1416,7 +1429,8 @@ def test_cost_azure_openai_prompt_caching():
|
|||
|
||||
|
||||
def test_completion_cost_vertex_llama3():
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
from litellm.utils import Choices, Message, ModelResponse, Usage
|
||||
|
||||
|
@ -1456,7 +1470,8 @@ def test_cost_openai_prompt_caching():
|
|||
from litellm.utils import Choices, Message, ModelResponse, Usage
|
||||
from litellm import get_model_info
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
model = "gpt-4o-mini-2024-07-18"
|
||||
|
||||
|
@ -1546,7 +1561,8 @@ def test_cost_openai_prompt_caching():
|
|||
def test_completion_cost_azure_ai_rerank(model):
|
||||
from litellm import RerankResponse, rerank
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
response = RerankResponse(
|
||||
id="b01dbf2e-63c8-4981-9e69-32241da559ed",
|
||||
|
@ -1577,7 +1593,8 @@ def test_completion_cost_azure_ai_rerank(model):
|
|||
def test_together_ai_embedding_completion_cost():
|
||||
from litellm.utils import Choices, EmbeddingResponse, Message, ModelResponse, Usage
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
response = EmbeddingResponse(
|
||||
model="togethercomputer/m2-bert-80M-8k-retrieval",
|
||||
data=[
|
||||
|
@ -2434,7 +2451,8 @@ def test_completion_cost_params_gemini_3():
|
|||
|
||||
from litellm.llms.vertex_ai.cost_calculator import cost_per_character
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
response = ModelResponse(
|
||||
id="chatcmpl-61043504-4439-48be-9996-e29bdee24dc3",
|
||||
|
@ -2503,7 +2521,8 @@ def test_completion_cost_params_gemini_3():
|
|||
# @pytest.mark.flaky(retries=3, delay=1)
|
||||
@pytest.mark.parametrize("stream", [False]) # True,
|
||||
async def test_test_completion_cost_gpt4o_audio_output_from_model(stream):
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
from litellm.types.utils import (
|
||||
Choices,
|
||||
Message,
|
||||
|
@ -2600,7 +2619,8 @@ def test_completion_cost_model_response_cost(response_model, custom_llm_provider
|
|||
"""
|
||||
from litellm import ModelResponse
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
litellm.set_verbose = True
|
||||
response = {
|
||||
|
@ -2700,7 +2720,8 @@ def test_select_model_name_for_cost_calc():
|
|||
def test_moderations():
|
||||
from litellm import moderation
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
litellm.add_known_models()
|
||||
|
||||
assert "omni-moderation-latest" in litellm.model_cost
|
||||
|
@ -2753,7 +2774,8 @@ def test_bedrock_cost_calc_with_region():
|
|||
|
||||
from litellm import ModelResponse
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
litellm.add_known_models()
|
||||
|
||||
|
@ -2952,7 +2974,9 @@ async def test_cost_calculator_with_custom_pricing_router(model_item, custom_pri
|
|||
def test_json_valid_model_cost_map():
|
||||
import json
|
||||
|
||||
model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
|
||||
model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
try:
|
||||
# Attempt to serialize and deserialize the JSON
|
||||
|
|
|
@ -115,7 +115,8 @@ def test_openai_embedding_3():
|
|||
@pytest.mark.asyncio
|
||||
async def test_openai_azure_embedding_simple(model, api_base, api_key, sync_mode):
|
||||
try:
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
# litellm.set_verbose = True
|
||||
if sync_mode:
|
||||
response = embedding(
|
||||
|
@ -197,7 +198,8 @@ def _azure_ai_image_mock_response(*args, **kwargs):
|
|||
@pytest.mark.asyncio
|
||||
async def test_azure_ai_embedding_image(model, api_base, api_key, sync_mode):
|
||||
try:
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
input = base64_image
|
||||
if sync_mode:
|
||||
client = HTTPHandler()
|
||||
|
|
|
@ -58,14 +58,16 @@ def test_get_model_info_shows_correct_supports_vision():
|
|||
|
||||
|
||||
def test_get_model_info_shows_assistant_prefill():
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
info = litellm.get_model_info("deepseek/deepseek-chat")
|
||||
print("info", info)
|
||||
assert info.get("supports_assistant_prefill") is True
|
||||
|
||||
|
||||
def test_get_model_info_shows_supports_prompt_caching():
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
info = litellm.get_model_info("deepseek/deepseek-chat")
|
||||
print("info", info)
|
||||
assert info.get("supports_prompt_caching") is True
|
||||
|
@ -114,7 +116,8 @@ def test_get_model_info_gemini():
|
|||
"""
|
||||
Tests if ALL gemini models have 'tpm' and 'rpm' in the model info
|
||||
"""
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
model_map = litellm.model_cost
|
||||
for model, info in model_map.items():
|
||||
|
@ -124,7 +127,8 @@ def test_get_model_info_gemini():
|
|||
|
||||
|
||||
def test_get_model_info_bedrock_region():
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
args = {
|
||||
"model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
|
||||
"custom_llm_provider": "bedrock",
|
||||
|
@ -208,7 +212,8 @@ def test_model_info_bedrock_converse(monkeypatch):
|
|||
|
||||
This ensures they are automatically routed to the converse endpoint.
|
||||
"""
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
try:
|
||||
# Load whitelist models from file
|
||||
with open("whitelisted_bedrock_models.txt", "r") as file:
|
||||
|
@ -226,7 +231,8 @@ def test_model_info_bedrock_converse_enforcement(monkeypatch):
|
|||
"""
|
||||
Test the enforcement of the whitelist by adding a fake model and ensuring the test fails.
|
||||
"""
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
# Add a fake unwhitelisted model
|
||||
litellm.model_cost["fake.bedrock-chat-model"] = {
|
||||
|
@ -317,7 +323,8 @@ def test_get_model_info_bedrock_models():
|
|||
"""
|
||||
from litellm.llms.bedrock.common_utils import BedrockModelInfo
|
||||
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
for k, v in litellm.model_cost.items():
|
||||
if v["litellm_provider"] == "bedrock":
|
||||
|
|
|
@ -178,7 +178,8 @@ async def test_update_kwargs_before_fallbacks(call_type):
|
|||
|
||||
|
||||
def test_router_get_model_info_wildcard_routes():
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
|
@ -199,7 +200,8 @@ def test_router_get_model_info_wildcard_routes():
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_get_model_group_usage_wildcard_routes():
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
|
@ -295,7 +297,8 @@ async def test_call_router_callbacks_on_failure():
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_model_group_headers():
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
from litellm.types.utils import OPENAI_RESPONSE_HEADERS
|
||||
|
||||
router = Router(
|
||||
|
@ -327,7 +330,8 @@ async def test_router_model_group_headers():
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_remaining_model_group_usage():
|
||||
litellm.model_cost = litellm.get_locally_cached_model_cost_map()
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
from litellm.types.utils import OPENAI_RESPONSE_HEADERS
|
||||
|
||||
router = Router(
|
||||
|
|
|
@ -1,111 +0,0 @@
|
|||
import litellm
|
||||
from jsonschema import validate
|
||||
|
||||
def test_model_prices_and_context_window_json_is_valid():
|
||||
'''
|
||||
Validates the `model_prices_and_context_window.json` file.
|
||||
|
||||
If this test fails after you update the json, you need to update the schema or correct the change you made.
|
||||
'''
|
||||
|
||||
INTENDED_SCHEMA = {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cache_creation_input_audio_token_cost": {"type": "number"},
|
||||
"cache_creation_input_token_cost": {"type": "number"},
|
||||
"cache_read_input_token_cost": {"type": "number"},
|
||||
"deprecation_date": {"type": "string"},
|
||||
"input_cost_per_audio_per_second": {"type": "number"},
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"},
|
||||
"input_cost_per_audio_token": {"type": "number"},
|
||||
"input_cost_per_character": {"type": "number"},
|
||||
"input_cost_per_character_above_128k_tokens": {"type": "number"},
|
||||
"input_cost_per_image": {"type": "number"},
|
||||
"input_cost_per_image_above_128k_tokens": {"type": "number"},
|
||||
"input_cost_per_pixel": {"type": "number"},
|
||||
"input_cost_per_query": {"type": "number"},
|
||||
"input_cost_per_request": {"type": "number"},
|
||||
"input_cost_per_second": {"type": "number"},
|
||||
"input_cost_per_token": {"type": "number"},
|
||||
"input_cost_per_token_above_128k_tokens": {"type": "number"},
|
||||
"input_cost_per_token_batch_requests": {"type": "number"},
|
||||
"input_cost_per_token_batches": {"type": "number"},
|
||||
"input_cost_per_token_cache_hit": {"type": "number"},
|
||||
"input_cost_per_video_per_second": {"type": "number"},
|
||||
"input_cost_per_video_per_second_above_128k_tokens": {"type": "number"},
|
||||
"input_dbu_cost_per_token": {"type": "number"},
|
||||
"litellm_provider": {"type": "string"},
|
||||
"max_audio_length_hours": {"type": "number"},
|
||||
"max_audio_per_prompt": {"type": "number"},
|
||||
"max_document_chunks_per_query": {"type": "number"},
|
||||
"max_images_per_prompt": {"type": "number"},
|
||||
"max_input_tokens": {"type": "number"},
|
||||
"max_output_tokens": {"type": "number"},
|
||||
"max_pdf_size_mb": {"type": "number"},
|
||||
"max_query_tokens": {"type": "number"},
|
||||
"max_tokens": {"type": "number"},
|
||||
"max_tokens_per_document_chunk": {"type": "number"},
|
||||
"max_video_length": {"type": "number"},
|
||||
"max_videos_per_prompt": {"type": "number"},
|
||||
"metadata": {"type": "object"},
|
||||
"mode": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"audio_speech",
|
||||
"audio_transcription",
|
||||
"chat",
|
||||
"completion",
|
||||
"embedding",
|
||||
"image_generation",
|
||||
"moderation",
|
||||
"moderations",
|
||||
"rerank"
|
||||
],
|
||||
},
|
||||
"output_cost_per_audio_token": {"type": "number"},
|
||||
"output_cost_per_character": {"type": "number"},
|
||||
"output_cost_per_character_above_128k_tokens": {"type": "number"},
|
||||
"output_cost_per_image": {"type": "number"},
|
||||
"output_cost_per_pixel": {"type": "number"},
|
||||
"output_cost_per_second": {"type": "number"},
|
||||
"output_cost_per_token": {"type": "number"},
|
||||
"output_cost_per_token_above_128k_tokens": {"type": "number"},
|
||||
"output_cost_per_token_batches": {"type": "number"},
|
||||
"output_db_cost_per_token": {"type": "number"},
|
||||
"output_dbu_cost_per_token": {"type": "number"},
|
||||
"output_vector_size": {"type": "number"},
|
||||
"rpd": {"type": "number"},
|
||||
"rpm": {"type": "number"},
|
||||
"source": {"type": "string"},
|
||||
"supports_assistant_prefill": {"type": "boolean"},
|
||||
"supports_audio_input": {"type": "boolean"},
|
||||
"supports_audio_output": {"type": "boolean"},
|
||||
"supports_embedding_image_input": {"type": "boolean"},
|
||||
"supports_function_calling": {"type": "boolean"},
|
||||
"supports_image_input": {"type": "boolean"},
|
||||
"supports_parallel_function_calling": {"type": "boolean"},
|
||||
"supports_pdf_input": {"type": "boolean"},
|
||||
"supports_prompt_caching": {"type": "boolean"},
|
||||
"supports_response_schema": {"type": "boolean"},
|
||||
"supports_system_messages": {"type": "boolean"},
|
||||
"supports_tool_choice": {"type": "boolean"},
|
||||
"supports_video_input": {"type": "boolean"},
|
||||
"supports_vision": {"type": "boolean"},
|
||||
"tool_use_system_prompt_tokens": {"type": "number"},
|
||||
"tpm": {"type": "number"},
|
||||
},
|
||||
"additionalProperties": False,
|
||||
},
|
||||
}
|
||||
|
||||
actual_json = litellm.get_locally_cached_model_cost_map()
|
||||
assert isinstance(actual_json, dict)
|
||||
temporarily_removed = actual_json.pop('sample_spec', None) # remove the sample, whose schema is inconsistent with the real data
|
||||
|
||||
validate(actual_json, INTENDED_SCHEMA)
|
||||
|
||||
if temporarily_removed is not None:
|
||||
# put back the sample spec that we removed
|
||||
actual_json.update({'sample_spec': temporarily_removed})
|
Loading…
Add table
Add a link
Reference in a new issue