Litellm dev 01 11 2025 p3 (#7702)

* fix(__init__.py): fix init to exclude pricing-only model cost values from real model names

prevents bad health checks on wildcard routes

* fix(get_llm_provider.py): fix to handle calling bedrock_converse models
This commit is contained in:
Krish Dholakia 2025-01-11 20:06:54 -08:00 committed by GitHub
parent 9ebb8a8795
commit 267be77720
11 changed files with 141 additions and 66 deletions

View file

@ -18,6 +18,7 @@ from litellm._logging import (
_turn_on_json,
log_level,
)
import re
from litellm.constants import (
DEFAULT_BATCH_SIZE,
DEFAULT_FLUSH_INTERVAL_SECONDS,
@ -484,9 +485,44 @@ galadriel_models: List = []
sambanova_models: List = []
def is_bedrock_pricing_only_model(key: str) -> bool:
"""
Excludes keys with the pattern 'bedrock/<region>/<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
Args:
key (str): A key to filter.
Returns:
bool: True if the key matches the Bedrock pattern, False otherwise.
"""
# Regex to match 'bedrock/<region>/<model>'
bedrock_pattern = re.compile(r"^bedrock/[a-zA-Z0-9_-]+/.+$")
if "month-commitment" in key:
return True
is_match = bedrock_pattern.match(key)
return is_match is not None
def is_openai_finetune_model(key: str) -> bool:
"""
Excludes model cost keys with the pattern 'ft:<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
Args:
key (str): A key to filter.
Returns:
bool: True if the key matches the OpenAI finetune pattern, False otherwise.
"""
return key.startswith("ft:") and not key.count(":") > 1
def add_known_models():
for key, value in model_cost.items():
if value.get("litellm_provider") == "openai":
if value.get("litellm_provider") == "openai" and not is_openai_finetune_model(
key
):
open_ai_chat_completion_models.append(key)
elif value.get("litellm_provider") == "text-completion-openai":
open_ai_text_completion_models.append(key)
@ -542,7 +578,9 @@ def add_known_models():
nlp_cloud_models.append(key)
elif value.get("litellm_provider") == "aleph_alpha":
aleph_alpha_models.append(key)
elif value.get("litellm_provider") == "bedrock":
elif value.get(
"litellm_provider"
) == "bedrock" and not is_bedrock_pricing_only_model(key):
bedrock_models.append(key)
elif value.get("litellm_provider") == "bedrock_converse":
bedrock_converse_models.append(key)

View file

@ -306,7 +306,9 @@ def get_llm_provider( # noqa: PLR0915
custom_llm_provider = "petals"
## bedrock
elif (
model in litellm.bedrock_models or model in litellm.bedrock_embedding_models
model in litellm.bedrock_models
or model in litellm.bedrock_embedding_models
or model in litellm.bedrock_converse_models
):
custom_llm_provider = "bedrock"
elif model in litellm.watsonx_models:

View file

@ -30,16 +30,23 @@ def _ensure_extra_body_is_safe(extra_body: Optional[Dict]) -> Optional[Dict]:
return extra_body
def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str):
def pick_cheapest_chat_models_from_llm_provider(custom_llm_provider: str, n=1):
"""
Pick the cheapest chat model from the LLM provider.
Pick the n cheapest chat models from the LLM provider.
Args:
custom_llm_provider (str): The name of the LLM provider.
n (int): The number of cheapest models to return.
Returns:
list[str]: A list of the n cheapest chat models.
"""
if custom_llm_provider not in litellm.models_by_provider:
raise ValueError(f"Unknown LLM provider: {custom_llm_provider}")
return []
known_models = litellm.models_by_provider.get(custom_llm_provider, [])
min_cost = float("inf")
cheapest_model = None
model_costs = []
for model in known_models:
try:
model_info = litellm.get_model_info(
@ -52,7 +59,10 @@ def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str):
_cost = model_info.get("input_cost_per_token", 0) + model_info.get(
"output_cost_per_token", 0
)
if _cost < min_cost:
min_cost = _cost
cheapest_model = model
return cheapest_model
model_costs.append((model, _cost))
# Sort by cost (ascending)
model_costs.sort(key=lambda x: x[1])
# Return the top n cheapest models
return [model for model, _ in model_costs[:n]]

View file

@ -57,6 +57,9 @@ from litellm.litellm_core_utils.health_check_utils import (
_filter_model_params,
)
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.litellm_core_utils.llm_request_utils import (
pick_cheapest_chat_models_from_llm_provider,
)
from litellm.litellm_core_utils.mock_functions import (
mock_embedding,
mock_image_generation,
@ -5080,25 +5083,26 @@ def speech(
async def ahealth_check_wildcard_models(
model: str, custom_llm_provider: str, model_params: dict
) -> dict:
from litellm.litellm_core_utils.llm_request_utils import (
pick_cheapest_chat_model_from_llm_provider,
)
# this is a wildcard model, we need to pick a random model from the provider
cheapest_model = pick_cheapest_chat_model_from_llm_provider(
custom_llm_provider=custom_llm_provider
cheapest_models = pick_cheapest_chat_models_from_llm_provider(
custom_llm_provider=custom_llm_provider, n=3
)
fallback_models: Optional[List] = None
if custom_llm_provider in litellm.models_by_provider:
models = litellm.models_by_provider[custom_llm_provider]
random.shuffle(models) # Shuffle the models list in place
fallback_models = models[:2] # Pick the first 2 models from the shuffled list
model_params["model"] = cheapest_model
if len(cheapest_models) == 0:
raise Exception(
f"Unable to health check wildcard model for provider {custom_llm_provider}. Add a model on your config.yaml or contribute here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
)
if len(cheapest_models) > 1:
fallback_models = cheapest_models[
1:
] # Pick the last 2 models from the shuffled list
else:
fallback_models = None
model_params["model"] = cheapest_models[0]
model_params["fallbacks"] = fallback_models
model_params["max_tokens"] = 1
await acompletion(**model_params)
response: dict = {} # args like remaining ratelimit etc.
return response
return {}
async def ahealth_check(

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,44 +1,43 @@
model_list:
- model_name: azure-embedding-model
# At least one model must exist for the proxy to start.
- model_name: gpt-4o
litellm_params:
model: azure/azure-embedding-model
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE
- model_name: openai-text-completion
litellm_params:
model: openai/gpt-3.5-turbo
model: gpt-4o
api_key: os.environ/OPENAI_API_KEY
- model_name: chatbot_actions
# timeout: 0.1 # timeout in (seconds)
# stream_timeout: 0.01 # timeout for stream requests (seconds)
- model_name: anthropic.claude-3-5-sonnet-20241022-v2:0
litellm_params:
model: langfuse/gpt-3.5-turbo
model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
- model_name: nova-lite
litellm_params:
model: bedrock/us.amazon.nova-lite-v1:0
- model_name: llama3-2-11b-instruct-v1:0
litellm_params:
model: bedrock/us.meta.llama3-2-11b-instruct-v1:0
- model_name: gpt-4o-bad
litellm_params:
model: gpt-4o
api_key: bad
- model_name: "bedrock/*"
litellm_params:
model: "bedrock/*"
- model_name: "openai/*"
litellm_params:
model: "openai/*"
api_key: os.environ/OPENAI_API_KEY
tpm: 1000000
prompt_id: "jokes"
- model_name: openai-deepseek
litellm_params:
model: deepseek/deepseek-chat
api_key: os.environ/OPENAI_API_KEY
model_info:
access_groups: ["restricted-models"]
custom_tokenizer:
identifier: deepseek-ai/DeepSeek-V3-Base
revision: main
auth_token: os.environ/HUGGINGFACE_API_KEY
- model_name: watsonx/ibm/granite-13b-chat-v2 # tried to keep original name for backwards compatibility but I've also tried watsonx_text
litellm_params:
model: watsonx_text/ibm/granite-13b-chat-v2
model_info:
input_cost_per_token: 0.0000006
output_cost_per_token: 0.0000006
general_settings:
store_model_in_db: true
disable_prisma_schema_update: true
# master_key: os.environ/LITELLM_MASTER_KEY
litellm_settings:
success_callback: ["s3"]
enable_preview_features: true
s3_callback_params:
s3_bucket_name: my-new-test-bucket-litellm # AWS Bucket Name for S3
s3_region_name: us-west-2 # AWS Region Name for S3
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
s3_use_team_prefix: true
fallbacks: [{"gpt-4o-bad": ["gpt-4o"]}] #, {"gpt-4o": ["nova-lite"]}]
request_timeout: 600 # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set
# set_verbose: false # Switch off Debug Logging, ensure your logs do not have any debugging on
# json_logs: true # Get debug logs in json format
ssl_verify: true
callbacks: ["prometheus"]
service_callback: ["prometheus_system"]
turn_off_message_logging: true # turn off messages in otel
#callbacks: ["langfuse"]
redact_user_api_key_info: true

View file

@ -2704,7 +2704,6 @@ def test_select_model_name_for_cost_calc():
assert return_model == "azure_ai/mistral-large"
def test_moderations():
from litellm import moderation
@ -2722,6 +2721,7 @@ def test_moderations():
cost = completion_cost(response, model="omni-moderation-latest")
assert cost == 0
def test_cost_calculator_azure_embedding():
from litellm.cost_calculator import response_cost_calculator
from litellm.types.utils import EmbeddingResponse, Usage
@ -2747,3 +2747,10 @@ def test_cost_calculator_azure_embedding():
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error: {e}")
def test_add_known_models():
litellm.add_known_models()
assert (
"bedrock/us-west-1/meta.llama3-70b-instruct-v1:0" not in litellm.bedrock_models
)

View file

@ -200,3 +200,11 @@ def test_azure_global_standard_get_llm_provider():
api_key="fake-api-key",
)
assert custom_llm_provider == "azure_ai"
def test_nova_bedrock_converse():
model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider(
model="amazon.nova-micro-v1:0",
)
assert custom_llm_provider == "bedrock"
assert model == "amazon.nova-micro-v1:0"

View file

@ -1457,3 +1457,13 @@ def test_supports_vision_gemini():
from litellm.utils import supports_vision
assert supports_vision("gemini-1.5-pro") is True
def test_pick_cheapest_chat_model_from_llm_provider():
from litellm.litellm_core_utils.llm_request_utils import (
pick_cheapest_chat_models_from_llm_provider,
)
assert len(pick_cheapest_chat_models_from_llm_provider("openai", n=3)) == 3
assert len(pick_cheapest_chat_models_from_llm_provider("unknown", n=1)) == 0