mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
572 lines
21 KiB
Python
572 lines
21 KiB
Python
# What is this?
|
|
## Unit testing for the 'get_model_info()' function
|
|
import os
|
|
import sys
|
|
import traceback
|
|
import json
|
|
|
|
from jsonschema import validate
|
|
|
|
from typing import List, Dict, Any
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system-path
|
|
import pytest
|
|
|
|
import litellm
|
|
from litellm import get_model_info
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
|
|
def test_get_model_info_simple_model_name():
|
|
"""
|
|
tests if model name given, and model exists in model info - the object is returned
|
|
"""
|
|
model = "claude-3-opus-20240229"
|
|
litellm.get_model_info(model)
|
|
|
|
|
|
def test_get_model_info_custom_llm_with_model_name():
|
|
"""
|
|
Tests if {custom_llm_provider}/{model_name} name given, and model exists in model info, the object is returned
|
|
"""
|
|
model = "anthropic/claude-3-opus-20240229"
|
|
litellm.get_model_info(model)
|
|
|
|
|
|
def test_get_model_info_custom_llm_with_same_name_vllm(monkeypatch):
|
|
"""
|
|
Tests if {custom_llm_provider}/{model_name} name given, and model exists in model info, the object is returned
|
|
"""
|
|
model = "command-r-plus"
|
|
provider = "openai" # vllm is openai-compatible
|
|
litellm.register_model(
|
|
{
|
|
"openai/command-r-plus": {
|
|
"input_cost_per_token": 0.0,
|
|
"output_cost_per_token": 0.0,
|
|
},
|
|
}
|
|
)
|
|
model_info = litellm.get_model_info(model, custom_llm_provider=provider)
|
|
print("model_info", model_info)
|
|
assert model_info["input_cost_per_token"] == 0.0
|
|
|
|
|
|
def test_get_model_info_shows_correct_supports_vision():
|
|
info = litellm.get_model_info("gemini/gemini-1.5-flash")
|
|
print("info", info)
|
|
assert info["supports_vision"] is True
|
|
|
|
|
|
def test_get_model_info_shows_assistant_prefill():
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
info = litellm.get_model_info("deepseek/deepseek-chat")
|
|
print("info", info)
|
|
assert info.get("supports_assistant_prefill") is True
|
|
|
|
|
|
def test_get_model_info_shows_supports_prompt_caching():
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
info = litellm.get_model_info("deepseek/deepseek-chat")
|
|
print("info", info)
|
|
assert info.get("supports_prompt_caching") is True
|
|
|
|
|
|
def test_get_model_info_finetuned_models():
|
|
info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id")
|
|
print("info", info)
|
|
assert info["input_cost_per_token"] == 0.000003
|
|
|
|
|
|
def test_get_model_info_gemini_pro():
|
|
info = litellm.get_model_info("gemini-1.5-pro-002")
|
|
print("info", info)
|
|
assert info["key"] == "gemini-1.5-pro-002"
|
|
|
|
|
|
def test_get_model_info_ollama_chat():
|
|
from litellm.llms.ollama.completion.transformation import OllamaConfig
|
|
|
|
with patch.object(
|
|
litellm.module_level_client,
|
|
"post",
|
|
return_value=MagicMock(
|
|
json=lambda: {
|
|
"model_info": {"llama.context_length": 32768},
|
|
"template": "tools",
|
|
}
|
|
),
|
|
) as mock_client:
|
|
info = OllamaConfig().get_model_info("mistral")
|
|
assert info["supports_function_calling"] is True
|
|
|
|
info = get_model_info("ollama/mistral")
|
|
print("info", info)
|
|
assert info["supports_function_calling"] is True
|
|
|
|
mock_client.assert_called()
|
|
|
|
print(mock_client.call_args.kwargs)
|
|
|
|
assert mock_client.call_args.kwargs["json"]["name"] == "mistral"
|
|
|
|
|
|
def test_get_model_info_gemini():
|
|
"""
|
|
Tests if ALL gemini models have 'tpm' and 'rpm' in the model info
|
|
"""
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
|
|
model_map = litellm.model_cost
|
|
for model, info in model_map.items():
|
|
if (
|
|
model.startswith("gemini/")
|
|
and not "gemma" in model
|
|
and not "learnlm" in model
|
|
):
|
|
assert info.get("tpm") is not None, f"{model} does not have tpm"
|
|
assert info.get("rpm") is not None, f"{model} does not have rpm"
|
|
|
|
|
|
def test_get_model_info_bedrock_region():
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
args = {
|
|
"model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
"custom_llm_provider": "bedrock",
|
|
}
|
|
litellm.model_cost.pop("us.anthropic.claude-3-5-sonnet-20241022-v2:0", None)
|
|
info = litellm.get_model_info(**args)
|
|
print("info", info)
|
|
assert info["key"] == "anthropic.claude-3-5-sonnet-20241022-v2:0"
|
|
assert info["litellm_provider"] == "bedrock"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model",
|
|
[
|
|
"ft:gpt-3.5-turbo:my-org:custom_suffix:id",
|
|
"ft:gpt-4-0613:my-org:custom_suffix:id",
|
|
"ft:davinci-002:my-org:custom_suffix:id",
|
|
"ft:gpt-4-0613:my-org:custom_suffix:id",
|
|
"ft:babbage-002:my-org:custom_suffix:id",
|
|
"gpt-35-turbo",
|
|
"ada",
|
|
],
|
|
)
|
|
def test_get_model_info_completion_cost_unit_tests(model):
|
|
info = litellm.get_model_info(model)
|
|
print("info", info)
|
|
|
|
|
|
def test_get_model_info_ft_model_with_provider_prefix():
|
|
args = {
|
|
"model": "openai/ft:gpt-3.5-turbo:my-org:custom_suffix:id",
|
|
"custom_llm_provider": "openai",
|
|
}
|
|
info = litellm.get_model_info(**args)
|
|
print("info", info)
|
|
assert info["key"] == "ft:gpt-3.5-turbo"
|
|
|
|
|
|
def test_get_whitelisted_models():
|
|
"""
|
|
Snapshot of all bedrock models as of 12/24/2024.
|
|
|
|
Enforce any new bedrock chat model to be added as `bedrock_converse` unless explicitly whitelisted.
|
|
|
|
Create whitelist to prevent naming regressions for older litellm versions.
|
|
"""
|
|
whitelisted_models = []
|
|
for model, info in litellm.model_cost.items():
|
|
if info["litellm_provider"] == "bedrock" and info["mode"] == "chat":
|
|
whitelisted_models.append(model)
|
|
|
|
# Write to a local file
|
|
with open("whitelisted_bedrock_models.txt", "w") as file:
|
|
for model in whitelisted_models:
|
|
file.write(f"{model}\n")
|
|
|
|
print("whitelisted_models written to whitelisted_bedrock_models.txt")
|
|
|
|
|
|
def _enforce_bedrock_converse_models(
|
|
model_cost: List[Dict[str, Any]], whitelist_models: List[str]
|
|
):
|
|
"""
|
|
Assert all new bedrock chat models are added as `bedrock_converse` unless explicitly whitelisted.
|
|
"""
|
|
# Check for unwhitelisted models
|
|
for model, info in litellm.model_cost.items():
|
|
if (
|
|
info["litellm_provider"] == "bedrock"
|
|
and info["mode"] == "chat"
|
|
and model not in whitelist_models
|
|
):
|
|
raise AssertionError(
|
|
f"New bedrock chat model detected: {model}. Please set `litellm_provider='bedrock_converse'` for this model."
|
|
)
|
|
|
|
|
|
def test_model_info_bedrock_converse(monkeypatch):
|
|
"""
|
|
Assert all new bedrock chat models are added as `bedrock_converse` unless explicitly whitelisted.
|
|
|
|
This ensures they are automatically routed to the converse endpoint.
|
|
"""
|
|
monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
try:
|
|
# Load whitelist models from file
|
|
with open("whitelisted_bedrock_models.txt", "r") as file:
|
|
whitelist_models = [line.strip() for line in file.readlines()]
|
|
except FileNotFoundError:
|
|
pytest.skip("whitelisted_bedrock_models.txt not found")
|
|
|
|
_enforce_bedrock_converse_models(
|
|
model_cost=litellm.model_cost, whitelist_models=whitelist_models
|
|
)
|
|
|
|
|
|
@pytest.mark.flaky(retries=6, delay=2)
|
|
def test_model_info_bedrock_converse_enforcement(monkeypatch):
|
|
"""
|
|
Test the enforcement of the whitelist by adding a fake model and ensuring the test fails.
|
|
"""
|
|
monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
|
|
# Add a fake unwhitelisted model
|
|
litellm.model_cost["fake.bedrock-chat-model"] = {
|
|
"litellm_provider": "bedrock",
|
|
"mode": "chat",
|
|
}
|
|
|
|
try:
|
|
# Load whitelist models from file
|
|
with open("whitelisted_bedrock_models.txt", "r") as file:
|
|
whitelist_models = [line.strip() for line in file.readlines()]
|
|
|
|
# Check for unwhitelisted models
|
|
with pytest.raises(AssertionError):
|
|
_enforce_bedrock_converse_models(
|
|
model_cost=litellm.model_cost, whitelist_models=whitelist_models
|
|
)
|
|
except FileNotFoundError as e:
|
|
pytest.skip("whitelisted_bedrock_models.txt not found")
|
|
|
|
|
|
def test_get_model_info_custom_provider():
|
|
# Custom provider example copied from https://docs.litellm.ai/docs/providers/custom_llm_server:
|
|
import litellm
|
|
from litellm import CustomLLM, completion, get_llm_provider
|
|
|
|
class MyCustomLLM(CustomLLM):
|
|
def completion(self, *args, **kwargs) -> litellm.ModelResponse:
|
|
return litellm.completion(
|
|
model="gpt-3.5-turbo",
|
|
messages=[{"role": "user", "content": "Hello world"}],
|
|
mock_response="Hi!",
|
|
) # type: ignore
|
|
|
|
my_custom_llm = MyCustomLLM()
|
|
|
|
litellm.custom_provider_map = [ # 👈 KEY STEP - REGISTER HANDLER
|
|
{"provider": "my-custom-llm", "custom_handler": my_custom_llm}
|
|
]
|
|
|
|
resp = completion(
|
|
model="my-custom-llm/my-fake-model",
|
|
messages=[{"role": "user", "content": "Hello world!"}],
|
|
)
|
|
|
|
assert resp.choices[0].message.content == "Hi!"
|
|
|
|
# Register model info
|
|
model_info = {"my-custom-llm/my-fake-model": {"max_tokens": 2048}}
|
|
litellm.register_model(model_info)
|
|
|
|
# Get registered model info
|
|
from litellm import get_model_info
|
|
|
|
get_model_info(
|
|
model="my-custom-llm/my-fake-model"
|
|
) # 💥 "Exception: This model isn't mapped yet." in v1.56.10
|
|
|
|
|
|
def test_get_model_info_custom_model_router():
|
|
from litellm import Router
|
|
from litellm import get_model_info
|
|
|
|
litellm._turn_on_debug()
|
|
|
|
router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "ma-summary",
|
|
"litellm_params": {
|
|
"api_base": "http://ma-mix-llm-serving.cicero.svc.cluster.local/v1",
|
|
"input_cost_per_token": 1,
|
|
"output_cost_per_token": 1,
|
|
"model": "openai/meta-llama/Meta-Llama-3-8B-Instruct",
|
|
},
|
|
"model_info": {
|
|
"id": "c20d603e-1166-4e0f-aa65-ed9c476ad4ca",
|
|
}
|
|
}
|
|
]
|
|
)
|
|
info = get_model_info("c20d603e-1166-4e0f-aa65-ed9c476ad4ca")
|
|
print("info", info)
|
|
assert info is not None
|
|
|
|
|
|
def test_get_model_info_bedrock_models():
|
|
"""
|
|
Check for drift in base model info for bedrock models and regional model info for bedrock models.
|
|
"""
|
|
from litellm.llms.bedrock.common_utils import BedrockModelInfo
|
|
|
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
|
|
|
for k, v in litellm.model_cost.items():
|
|
if v["litellm_provider"] == "bedrock":
|
|
k = k.replace("*/", "")
|
|
potential_commitments = [
|
|
"1-month-commitment",
|
|
"3-month-commitment",
|
|
"6-month-commitment",
|
|
]
|
|
if any(commitment in k for commitment in potential_commitments):
|
|
for commitment in potential_commitments:
|
|
k = k.replace(f"{commitment}/", "")
|
|
base_model = BedrockModelInfo.get_base_model(k)
|
|
base_model_info = litellm.model_cost[base_model]
|
|
for base_model_key, base_model_value in base_model_info.items():
|
|
if "invoke/" in k:
|
|
continue
|
|
if base_model_key.startswith("supports_"):
|
|
assert (
|
|
base_model_key in v
|
|
), f"{base_model_key} is not in model cost map for {k}"
|
|
assert (
|
|
v[base_model_key] == base_model_value
|
|
), f"{base_model_key} is not equal to {base_model_value} for model {k}"
|
|
|
|
|
|
def test_get_model_info_huggingface_models(monkeypatch):
|
|
from litellm import Router
|
|
from litellm.types.router import ModelGroupInfo
|
|
|
|
monkeypatch.setenv("HUGGINGFACE_API_KEY", "hf_abc123")
|
|
|
|
router = Router(
|
|
model_list=[
|
|
{
|
|
"model_name": "meta-llama/Meta-Llama-3-8B-Instruct",
|
|
"litellm_params": {
|
|
"model": "huggingface/meta-llama/Meta-Llama-3-8B-Instruct",
|
|
"api_base": "https://router.huggingface.co/hf-inference/models/meta-llama/Meta-Llama-3-8B-Instruct",
|
|
"api_key": os.environ["HUGGINGFACE_API_KEY"],
|
|
},
|
|
}
|
|
]
|
|
)
|
|
info = litellm.get_model_info("huggingface/meta-llama/Meta-Llama-3-8B-Instruct")
|
|
print("info", info)
|
|
assert info is not None
|
|
|
|
ModelGroupInfo(
|
|
model_group="meta-llama/Meta-Llama-3-8B-Instruct",
|
|
providers=["huggingface"],
|
|
**info,
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"model, provider",
|
|
[
|
|
("bedrock/us-east-2/us.anthropic.claude-3-haiku-20240307-v1:0", None),
|
|
(
|
|
"bedrock/us-east-2/us.anthropic.claude-3-haiku-20240307-v1:0",
|
|
"bedrock",
|
|
),
|
|
],
|
|
)
|
|
def test_get_model_info_cost_calculator_bedrock_region_cris_stripped(model, provider):
|
|
"""
|
|
ensure cross region inferencing model is used correctly
|
|
Relevant Issue: https://github.com/BerriAI/litellm/issues/8115
|
|
"""
|
|
info = get_model_info(model=model, custom_llm_provider=provider)
|
|
print("info", info)
|
|
assert info["key"] == "us.anthropic.claude-3-haiku-20240307-v1:0"
|
|
assert info["litellm_provider"] == "bedrock"
|
|
|
|
|
|
def test_aaamodel_prices_and_context_window_json_is_valid():
|
|
"""
|
|
Validates the `model_prices_and_context_window.json` file.
|
|
|
|
If this test fails after you update the json, you need to update the schema or correct the change you made.
|
|
"""
|
|
|
|
INTENDED_SCHEMA = {
|
|
"type": "object",
|
|
"additionalProperties": {
|
|
"type": "object",
|
|
"properties": {
|
|
"cache_creation_input_audio_token_cost": {"type": "number"},
|
|
"cache_creation_input_token_cost": {"type": "number"},
|
|
"cache_read_input_token_cost": {"type": "number"},
|
|
"cache_read_input_audio_token_cost": {"type": "number"},
|
|
"deprecation_date": {"type": "string"},
|
|
"input_cost_per_audio_per_second": {"type": "number"},
|
|
"input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"},
|
|
"input_cost_per_audio_token": {"type": "number"},
|
|
"input_cost_per_character": {"type": "number"},
|
|
"input_cost_per_character_above_128k_tokens": {"type": "number"},
|
|
"input_cost_per_image": {"type": "number"},
|
|
"input_cost_per_image_above_128k_tokens": {"type": "number"},
|
|
"input_cost_per_token_above_200k_tokens": {"type": "number"},
|
|
"input_cost_per_pixel": {"type": "number"},
|
|
"input_cost_per_query": {"type": "number"},
|
|
"input_cost_per_request": {"type": "number"},
|
|
"input_cost_per_second": {"type": "number"},
|
|
"input_cost_per_token": {"type": "number"},
|
|
"input_cost_per_token_above_128k_tokens": {"type": "number"},
|
|
"input_cost_per_token_batch_requests": {"type": "number"},
|
|
"input_cost_per_token_batches": {"type": "number"},
|
|
"input_cost_per_token_cache_hit": {"type": "number"},
|
|
"input_cost_per_video_per_second": {"type": "number"},
|
|
"input_cost_per_video_per_second_above_8s_interval": {"type": "number"},
|
|
"input_cost_per_video_per_second_above_15s_interval": {
|
|
"type": "number"
|
|
},
|
|
"input_cost_per_video_per_second_above_128k_tokens": {"type": "number"},
|
|
"input_dbu_cost_per_token": {"type": "number"},
|
|
"litellm_provider": {"type": "string"},
|
|
"max_audio_length_hours": {"type": "number"},
|
|
"max_audio_per_prompt": {"type": "number"},
|
|
"max_document_chunks_per_query": {"type": "number"},
|
|
"max_images_per_prompt": {"type": "number"},
|
|
"max_input_tokens": {"type": "number"},
|
|
"max_output_tokens": {"type": "number"},
|
|
"max_pdf_size_mb": {"type": "number"},
|
|
"max_query_tokens": {"type": "number"},
|
|
"max_tokens": {"type": "number"},
|
|
"max_tokens_per_document_chunk": {"type": "number"},
|
|
"max_video_length": {"type": "number"},
|
|
"max_videos_per_prompt": {"type": "number"},
|
|
"metadata": {"type": "object"},
|
|
"mode": {
|
|
"type": "string",
|
|
"enum": [
|
|
"audio_speech",
|
|
"audio_transcription",
|
|
"chat",
|
|
"completion",
|
|
"embedding",
|
|
"image_generation",
|
|
"moderation",
|
|
"rerank",
|
|
"responses",
|
|
],
|
|
},
|
|
"output_cost_per_audio_token": {"type": "number"},
|
|
"output_cost_per_character": {"type": "number"},
|
|
"output_cost_per_character_above_128k_tokens": {"type": "number"},
|
|
"output_cost_per_image": {"type": "number"},
|
|
"output_cost_per_pixel": {"type": "number"},
|
|
"output_cost_per_second": {"type": "number"},
|
|
"output_cost_per_token": {"type": "number"},
|
|
"output_cost_per_token_above_128k_tokens": {"type": "number"},
|
|
"output_cost_per_token_above_200k_tokens": {"type": "number"},
|
|
"output_cost_per_token_batches": {"type": "number"},
|
|
"output_cost_per_reasoning_token": {"type": "number"},
|
|
"output_db_cost_per_token": {"type": "number"},
|
|
"output_dbu_cost_per_token": {"type": "number"},
|
|
"output_vector_size": {"type": "number"},
|
|
"rpd": {"type": "number"},
|
|
"rpm": {"type": "number"},
|
|
"source": {"type": "string"},
|
|
"supports_assistant_prefill": {"type": "boolean"},
|
|
"supports_audio_input": {"type": "boolean"},
|
|
"supports_audio_output": {"type": "boolean"},
|
|
"supports_embedding_image_input": {"type": "boolean"},
|
|
"supports_function_calling": {"type": "boolean"},
|
|
"supports_image_input": {"type": "boolean"},
|
|
"supports_parallel_function_calling": {"type": "boolean"},
|
|
"supports_pdf_input": {"type": "boolean"},
|
|
"supports_prompt_caching": {"type": "boolean"},
|
|
"supports_response_schema": {"type": "boolean"},
|
|
"supports_system_messages": {"type": "boolean"},
|
|
"supports_tool_choice": {"type": "boolean"},
|
|
"supports_video_input": {"type": "boolean"},
|
|
"supports_vision": {"type": "boolean"},
|
|
"supports_web_search": {"type": "boolean"},
|
|
"supports_reasoning": {"type": "boolean"},
|
|
"tool_use_system_prompt_tokens": {"type": "number"},
|
|
"tpm": {"type": "number"},
|
|
"supported_endpoints": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
"enum": [
|
|
"/v1/responses",
|
|
"/v1/embeddings",
|
|
"/v1/chat/completions",
|
|
"/v1/completions",
|
|
"/v1/images/generations",
|
|
"/v1/images/variations",
|
|
"/v1/images/edits",
|
|
"/v1/batch",
|
|
"/v1/audio/transcriptions",
|
|
"/v1/audio/speech",
|
|
],
|
|
},
|
|
},
|
|
"search_context_cost_per_query": {
|
|
"type": "object",
|
|
"properties": {
|
|
"search_context_size_low": {"type": "number"},
|
|
"search_context_size_medium": {"type": "number"},
|
|
"search_context_size_high": {"type": "number"},
|
|
},
|
|
"additionalProperties": False,
|
|
},
|
|
"supported_modalities": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
"enum": ["text", "audio", "image", "video"],
|
|
},
|
|
},
|
|
"supported_output_modalities": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
"enum": ["text", "image", "audio"],
|
|
},
|
|
},
|
|
"supports_native_streaming": {"type": "boolean"},
|
|
},
|
|
"additionalProperties": False,
|
|
},
|
|
}
|
|
|
|
prod_json = "./model_prices_and_context_window.json"
|
|
with open(prod_json, "r") as model_prices_file:
|
|
actual_json = json.load(model_prices_file)
|
|
assert isinstance(actual_json, dict)
|
|
actual_json.pop(
|
|
"sample_spec", None
|
|
) # remove the sample, whose schema is inconsistent with the real data
|
|
|
|
validate(actual_json, INTENDED_SCHEMA)
|