Litellm dev 01 11 2025 p3 (#7702)

* fix(__init__.py): fix init to exclude pricing-only model cost values from real model names

prevents bad health checks on wildcard routes

* fix(get_llm_provider.py): fix to handle calling bedrock_converse models
This commit is contained in:
Krish Dholakia 2025-01-11 20:06:54 -08:00 committed by GitHub
parent 9ebb8a8795
commit 267be77720
11 changed files with 141 additions and 66 deletions

View file

@ -18,6 +18,7 @@ from litellm._logging import (
_turn_on_json, _turn_on_json,
log_level, log_level,
) )
import re
from litellm.constants import ( from litellm.constants import (
DEFAULT_BATCH_SIZE, DEFAULT_BATCH_SIZE,
DEFAULT_FLUSH_INTERVAL_SECONDS, DEFAULT_FLUSH_INTERVAL_SECONDS,
@ -484,9 +485,44 @@ galadriel_models: List = []
sambanova_models: List = [] sambanova_models: List = []
def is_bedrock_pricing_only_model(key: str) -> bool:
"""
Excludes keys with the pattern 'bedrock/<region>/<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
Args:
key (str): A key to filter.
Returns:
bool: True if the key matches the Bedrock pattern, False otherwise.
"""
# Regex to match 'bedrock/<region>/<model>'
bedrock_pattern = re.compile(r"^bedrock/[a-zA-Z0-9_-]+/.+$")
if "month-commitment" in key:
return True
is_match = bedrock_pattern.match(key)
return is_match is not None
def is_openai_finetune_model(key: str) -> bool:
"""
Excludes model cost keys with the pattern 'ft:<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
Args:
key (str): A key to filter.
Returns:
bool: True if the key matches the OpenAI finetune pattern, False otherwise.
"""
return key.startswith("ft:") and not key.count(":") > 1
def add_known_models(): def add_known_models():
for key, value in model_cost.items(): for key, value in model_cost.items():
if value.get("litellm_provider") == "openai": if value.get("litellm_provider") == "openai" and not is_openai_finetune_model(
key
):
open_ai_chat_completion_models.append(key) open_ai_chat_completion_models.append(key)
elif value.get("litellm_provider") == "text-completion-openai": elif value.get("litellm_provider") == "text-completion-openai":
open_ai_text_completion_models.append(key) open_ai_text_completion_models.append(key)
@ -542,7 +578,9 @@ def add_known_models():
nlp_cloud_models.append(key) nlp_cloud_models.append(key)
elif value.get("litellm_provider") == "aleph_alpha": elif value.get("litellm_provider") == "aleph_alpha":
aleph_alpha_models.append(key) aleph_alpha_models.append(key)
elif value.get("litellm_provider") == "bedrock": elif value.get(
"litellm_provider"
) == "bedrock" and not is_bedrock_pricing_only_model(key):
bedrock_models.append(key) bedrock_models.append(key)
elif value.get("litellm_provider") == "bedrock_converse": elif value.get("litellm_provider") == "bedrock_converse":
bedrock_converse_models.append(key) bedrock_converse_models.append(key)

View file

@ -306,7 +306,9 @@ def get_llm_provider( # noqa: PLR0915
custom_llm_provider = "petals" custom_llm_provider = "petals"
## bedrock ## bedrock
elif ( elif (
model in litellm.bedrock_models or model in litellm.bedrock_embedding_models model in litellm.bedrock_models
or model in litellm.bedrock_embedding_models
or model in litellm.bedrock_converse_models
): ):
custom_llm_provider = "bedrock" custom_llm_provider = "bedrock"
elif model in litellm.watsonx_models: elif model in litellm.watsonx_models:

View file

@ -30,16 +30,23 @@ def _ensure_extra_body_is_safe(extra_body: Optional[Dict]) -> Optional[Dict]:
return extra_body return extra_body
def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str): def pick_cheapest_chat_models_from_llm_provider(custom_llm_provider: str, n=1):
""" """
Pick the cheapest chat model from the LLM provider. Pick the n cheapest chat models from the LLM provider.
Args:
custom_llm_provider (str): The name of the LLM provider.
n (int): The number of cheapest models to return.
Returns:
list[str]: A list of the n cheapest chat models.
""" """
if custom_llm_provider not in litellm.models_by_provider: if custom_llm_provider not in litellm.models_by_provider:
raise ValueError(f"Unknown LLM provider: {custom_llm_provider}") return []
known_models = litellm.models_by_provider.get(custom_llm_provider, []) known_models = litellm.models_by_provider.get(custom_llm_provider, [])
min_cost = float("inf") model_costs = []
cheapest_model = None
for model in known_models: for model in known_models:
try: try:
model_info = litellm.get_model_info( model_info = litellm.get_model_info(
@ -52,7 +59,10 @@ def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str):
_cost = model_info.get("input_cost_per_token", 0) + model_info.get( _cost = model_info.get("input_cost_per_token", 0) + model_info.get(
"output_cost_per_token", 0 "output_cost_per_token", 0
) )
if _cost < min_cost: model_costs.append((model, _cost))
min_cost = _cost
cheapest_model = model # Sort by cost (ascending)
return cheapest_model model_costs.sort(key=lambda x: x[1])
# Return the top n cheapest models
return [model for model, _ in model_costs[:n]]

View file

@ -57,6 +57,9 @@ from litellm.litellm_core_utils.health_check_utils import (
_filter_model_params, _filter_model_params,
) )
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.litellm_core_utils.llm_request_utils import (
pick_cheapest_chat_models_from_llm_provider,
)
from litellm.litellm_core_utils.mock_functions import ( from litellm.litellm_core_utils.mock_functions import (
mock_embedding, mock_embedding,
mock_image_generation, mock_image_generation,
@ -5080,25 +5083,26 @@ def speech(
async def ahealth_check_wildcard_models( async def ahealth_check_wildcard_models(
model: str, custom_llm_provider: str, model_params: dict model: str, custom_llm_provider: str, model_params: dict
) -> dict: ) -> dict:
from litellm.litellm_core_utils.llm_request_utils import (
pick_cheapest_chat_model_from_llm_provider,
)
# this is a wildcard model, we need to pick a random model from the provider # this is a wildcard model, we need to pick a random model from the provider
cheapest_model = pick_cheapest_chat_model_from_llm_provider( cheapest_models = pick_cheapest_chat_models_from_llm_provider(
custom_llm_provider=custom_llm_provider custom_llm_provider=custom_llm_provider, n=3
) )
fallback_models: Optional[List] = None if len(cheapest_models) == 0:
if custom_llm_provider in litellm.models_by_provider: raise Exception(
models = litellm.models_by_provider[custom_llm_provider] f"Unable to health check wildcard model for provider {custom_llm_provider}. Add a model on your config.yaml or contribute here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
random.shuffle(models) # Shuffle the models list in place )
fallback_models = models[:2] # Pick the first 2 models from the shuffled list if len(cheapest_models) > 1:
model_params["model"] = cheapest_model fallback_models = cheapest_models[
1:
] # Pick the last 2 models from the shuffled list
else:
fallback_models = None
model_params["model"] = cheapest_models[0]
model_params["fallbacks"] = fallback_models model_params["fallbacks"] = fallback_models
model_params["max_tokens"] = 1 model_params["max_tokens"] = 1
await acompletion(**model_params) await acompletion(**model_params)
response: dict = {} # args like remaining ratelimit etc. return {}
return response
async def ahealth_check( async def ahealth_check(

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,44 +1,43 @@
model_list: model_list:
- model_name: azure-embedding-model # At least one model must exist for the proxy to start.
- model_name: gpt-4o
litellm_params: litellm_params:
model: azure/azure-embedding-model model: gpt-4o
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE
- model_name: openai-text-completion
litellm_params:
model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
- model_name: chatbot_actions # timeout: 0.1 # timeout in (seconds)
# stream_timeout: 0.01 # timeout for stream requests (seconds)
- model_name: anthropic.claude-3-5-sonnet-20241022-v2:0
litellm_params: litellm_params:
model: langfuse/gpt-3.5-turbo model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
- model_name: nova-lite
litellm_params:
model: bedrock/us.amazon.nova-lite-v1:0
- model_name: llama3-2-11b-instruct-v1:0
litellm_params:
model: bedrock/us.meta.llama3-2-11b-instruct-v1:0
- model_name: gpt-4o-bad
litellm_params:
model: gpt-4o
api_key: bad
- model_name: "bedrock/*"
litellm_params:
model: "bedrock/*"
- model_name: "openai/*"
litellm_params:
model: "openai/*"
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
tpm: 1000000 general_settings:
prompt_id: "jokes" store_model_in_db: true
- model_name: openai-deepseek disable_prisma_schema_update: true
litellm_params: # master_key: os.environ/LITELLM_MASTER_KEY
model: deepseek/deepseek-chat
api_key: os.environ/OPENAI_API_KEY
model_info:
access_groups: ["restricted-models"]
custom_tokenizer:
identifier: deepseek-ai/DeepSeek-V3-Base
revision: main
auth_token: os.environ/HUGGINGFACE_API_KEY
- model_name: watsonx/ibm/granite-13b-chat-v2 # tried to keep original name for backwards compatibility but I've also tried watsonx_text
litellm_params:
model: watsonx_text/ibm/granite-13b-chat-v2
model_info:
input_cost_per_token: 0.0000006
output_cost_per_token: 0.0000006
litellm_settings: litellm_settings:
success_callback: ["s3"] fallbacks: [{"gpt-4o-bad": ["gpt-4o"]}] #, {"gpt-4o": ["nova-lite"]}]
enable_preview_features: true request_timeout: 600 # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set
s3_callback_params: # set_verbose: false # Switch off Debug Logging, ensure your logs do not have any debugging on
s3_bucket_name: my-new-test-bucket-litellm # AWS Bucket Name for S3 # json_logs: true # Get debug logs in json format
s3_region_name: us-west-2 # AWS Region Name for S3 ssl_verify: true
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3 callbacks: ["prometheus"]
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3 service_callback: ["prometheus_system"]
s3_use_team_prefix: true turn_off_message_logging: true # turn off messages in otel
#callbacks: ["langfuse"]
redact_user_api_key_info: true

View file

@ -2704,7 +2704,6 @@ def test_select_model_name_for_cost_calc():
assert return_model == "azure_ai/mistral-large" assert return_model == "azure_ai/mistral-large"
def test_moderations(): def test_moderations():
from litellm import moderation from litellm import moderation
@ -2722,6 +2721,7 @@ def test_moderations():
cost = completion_cost(response, model="omni-moderation-latest") cost = completion_cost(response, model="omni-moderation-latest")
assert cost == 0 assert cost == 0
def test_cost_calculator_azure_embedding(): def test_cost_calculator_azure_embedding():
from litellm.cost_calculator import response_cost_calculator from litellm.cost_calculator import response_cost_calculator
from litellm.types.utils import EmbeddingResponse, Usage from litellm.types.utils import EmbeddingResponse, Usage
@ -2747,3 +2747,10 @@ def test_cost_calculator_azure_embedding():
except Exception as e: except Exception as e:
traceback.print_exc() traceback.print_exc()
pytest.fail(f"Error: {e}") pytest.fail(f"Error: {e}")
def test_add_known_models():
litellm.add_known_models()
assert (
"bedrock/us-west-1/meta.llama3-70b-instruct-v1:0" not in litellm.bedrock_models
)

View file

@ -200,3 +200,11 @@ def test_azure_global_standard_get_llm_provider():
api_key="fake-api-key", api_key="fake-api-key",
) )
assert custom_llm_provider == "azure_ai" assert custom_llm_provider == "azure_ai"
def test_nova_bedrock_converse():
model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider(
model="amazon.nova-micro-v1:0",
)
assert custom_llm_provider == "bedrock"
assert model == "amazon.nova-micro-v1:0"

View file

@ -1457,3 +1457,13 @@ def test_supports_vision_gemini():
from litellm.utils import supports_vision from litellm.utils import supports_vision
assert supports_vision("gemini-1.5-pro") is True assert supports_vision("gemini-1.5-pro") is True
def test_pick_cheapest_chat_model_from_llm_provider():
from litellm.litellm_core_utils.llm_request_utils import (
pick_cheapest_chat_models_from_llm_provider,
)
assert len(pick_cheapest_chat_models_from_llm_provider("openai", n=3)) == 3
assert len(pick_cheapest_chat_models_from_llm_provider("unknown", n=1)) == 0