mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
Litellm dev 01 11 2025 p3 (#7702)
* fix(__init__.py): fix init to exclude pricing-only model cost values from real model names prevents bad health checks on wildcard routes * fix(get_llm_provider.py): fix to handle calling bedrock_converse models
This commit is contained in:
parent
9ebb8a8795
commit
267be77720
11 changed files with 141 additions and 66 deletions
|
@ -18,6 +18,7 @@ from litellm._logging import (
|
|||
_turn_on_json,
|
||||
log_level,
|
||||
)
|
||||
import re
|
||||
from litellm.constants import (
|
||||
DEFAULT_BATCH_SIZE,
|
||||
DEFAULT_FLUSH_INTERVAL_SECONDS,
|
||||
|
@ -484,9 +485,44 @@ galadriel_models: List = []
|
|||
sambanova_models: List = []
|
||||
|
||||
|
||||
def is_bedrock_pricing_only_model(key: str) -> bool:
|
||||
"""
|
||||
Excludes keys with the pattern 'bedrock/<region>/<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
|
||||
|
||||
Args:
|
||||
key (str): A key to filter.
|
||||
|
||||
Returns:
|
||||
bool: True if the key matches the Bedrock pattern, False otherwise.
|
||||
"""
|
||||
# Regex to match 'bedrock/<region>/<model>'
|
||||
bedrock_pattern = re.compile(r"^bedrock/[a-zA-Z0-9_-]+/.+$")
|
||||
|
||||
if "month-commitment" in key:
|
||||
return True
|
||||
|
||||
is_match = bedrock_pattern.match(key)
|
||||
return is_match is not None
|
||||
|
||||
|
||||
def is_openai_finetune_model(key: str) -> bool:
|
||||
"""
|
||||
Excludes model cost keys with the pattern 'ft:<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
|
||||
|
||||
Args:
|
||||
key (str): A key to filter.
|
||||
|
||||
Returns:
|
||||
bool: True if the key matches the OpenAI finetune pattern, False otherwise.
|
||||
"""
|
||||
return key.startswith("ft:") and not key.count(":") > 1
|
||||
|
||||
|
||||
def add_known_models():
|
||||
for key, value in model_cost.items():
|
||||
if value.get("litellm_provider") == "openai":
|
||||
if value.get("litellm_provider") == "openai" and not is_openai_finetune_model(
|
||||
key
|
||||
):
|
||||
open_ai_chat_completion_models.append(key)
|
||||
elif value.get("litellm_provider") == "text-completion-openai":
|
||||
open_ai_text_completion_models.append(key)
|
||||
|
@ -542,7 +578,9 @@ def add_known_models():
|
|||
nlp_cloud_models.append(key)
|
||||
elif value.get("litellm_provider") == "aleph_alpha":
|
||||
aleph_alpha_models.append(key)
|
||||
elif value.get("litellm_provider") == "bedrock":
|
||||
elif value.get(
|
||||
"litellm_provider"
|
||||
) == "bedrock" and not is_bedrock_pricing_only_model(key):
|
||||
bedrock_models.append(key)
|
||||
elif value.get("litellm_provider") == "bedrock_converse":
|
||||
bedrock_converse_models.append(key)
|
||||
|
|
|
@ -306,7 +306,9 @@ def get_llm_provider( # noqa: PLR0915
|
|||
custom_llm_provider = "petals"
|
||||
## bedrock
|
||||
elif (
|
||||
model in litellm.bedrock_models or model in litellm.bedrock_embedding_models
|
||||
model in litellm.bedrock_models
|
||||
or model in litellm.bedrock_embedding_models
|
||||
or model in litellm.bedrock_converse_models
|
||||
):
|
||||
custom_llm_provider = "bedrock"
|
||||
elif model in litellm.watsonx_models:
|
||||
|
|
|
@ -30,16 +30,23 @@ def _ensure_extra_body_is_safe(extra_body: Optional[Dict]) -> Optional[Dict]:
|
|||
return extra_body
|
||||
|
||||
|
||||
def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str):
|
||||
def pick_cheapest_chat_models_from_llm_provider(custom_llm_provider: str, n=1):
|
||||
"""
|
||||
Pick the cheapest chat model from the LLM provider.
|
||||
Pick the n cheapest chat models from the LLM provider.
|
||||
|
||||
Args:
|
||||
custom_llm_provider (str): The name of the LLM provider.
|
||||
n (int): The number of cheapest models to return.
|
||||
|
||||
Returns:
|
||||
list[str]: A list of the n cheapest chat models.
|
||||
"""
|
||||
if custom_llm_provider not in litellm.models_by_provider:
|
||||
raise ValueError(f"Unknown LLM provider: {custom_llm_provider}")
|
||||
return []
|
||||
|
||||
known_models = litellm.models_by_provider.get(custom_llm_provider, [])
|
||||
min_cost = float("inf")
|
||||
cheapest_model = None
|
||||
model_costs = []
|
||||
|
||||
for model in known_models:
|
||||
try:
|
||||
model_info = litellm.get_model_info(
|
||||
|
@ -52,7 +59,10 @@ def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str):
|
|||
_cost = model_info.get("input_cost_per_token", 0) + model_info.get(
|
||||
"output_cost_per_token", 0
|
||||
)
|
||||
if _cost < min_cost:
|
||||
min_cost = _cost
|
||||
cheapest_model = model
|
||||
return cheapest_model
|
||||
model_costs.append((model, _cost))
|
||||
|
||||
# Sort by cost (ascending)
|
||||
model_costs.sort(key=lambda x: x[1])
|
||||
|
||||
# Return the top n cheapest models
|
||||
return [model for model, _ in model_costs[:n]]
|
||||
|
|
|
@ -57,6 +57,9 @@ from litellm.litellm_core_utils.health_check_utils import (
|
|||
_filter_model_params,
|
||||
)
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.litellm_core_utils.llm_request_utils import (
|
||||
pick_cheapest_chat_models_from_llm_provider,
|
||||
)
|
||||
from litellm.litellm_core_utils.mock_functions import (
|
||||
mock_embedding,
|
||||
mock_image_generation,
|
||||
|
@ -5080,25 +5083,26 @@ def speech(
|
|||
async def ahealth_check_wildcard_models(
|
||||
model: str, custom_llm_provider: str, model_params: dict
|
||||
) -> dict:
|
||||
from litellm.litellm_core_utils.llm_request_utils import (
|
||||
pick_cheapest_chat_model_from_llm_provider,
|
||||
)
|
||||
|
||||
# this is a wildcard model, we need to pick a random model from the provider
|
||||
cheapest_model = pick_cheapest_chat_model_from_llm_provider(
|
||||
custom_llm_provider=custom_llm_provider
|
||||
cheapest_models = pick_cheapest_chat_models_from_llm_provider(
|
||||
custom_llm_provider=custom_llm_provider, n=3
|
||||
)
|
||||
fallback_models: Optional[List] = None
|
||||
if custom_llm_provider in litellm.models_by_provider:
|
||||
models = litellm.models_by_provider[custom_llm_provider]
|
||||
random.shuffle(models) # Shuffle the models list in place
|
||||
fallback_models = models[:2] # Pick the first 2 models from the shuffled list
|
||||
model_params["model"] = cheapest_model
|
||||
if len(cheapest_models) == 0:
|
||||
raise Exception(
|
||||
f"Unable to health check wildcard model for provider {custom_llm_provider}. Add a model on your config.yaml or contribute here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||
)
|
||||
if len(cheapest_models) > 1:
|
||||
fallback_models = cheapest_models[
|
||||
1:
|
||||
] # Pick the last 2 models from the shuffled list
|
||||
else:
|
||||
fallback_models = None
|
||||
model_params["model"] = cheapest_models[0]
|
||||
model_params["fallbacks"] = fallback_models
|
||||
model_params["max_tokens"] = 1
|
||||
await acompletion(**model_params)
|
||||
response: dict = {} # args like remaining ratelimit etc.
|
||||
return response
|
||||
return {}
|
||||
|
||||
|
||||
async def ahealth_check(
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,44 +1,43 @@
|
|||
model_list:
|
||||
- model_name: azure-embedding-model
|
||||
# At least one model must exist for the proxy to start.
|
||||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: azure/azure-embedding-model
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
- model_name: openai-text-completion
|
||||
litellm_params:
|
||||
model: openai/gpt-3.5-turbo
|
||||
model: gpt-4o
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: chatbot_actions
|
||||
# timeout: 0.1 # timeout in (seconds)
|
||||
# stream_timeout: 0.01 # timeout for stream requests (seconds)
|
||||
- model_name: anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||
litellm_params:
|
||||
model: langfuse/gpt-3.5-turbo
|
||||
model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||
- model_name: nova-lite
|
||||
litellm_params:
|
||||
model: bedrock/us.amazon.nova-lite-v1:0
|
||||
- model_name: llama3-2-11b-instruct-v1:0
|
||||
litellm_params:
|
||||
model: bedrock/us.meta.llama3-2-11b-instruct-v1:0
|
||||
- model_name: gpt-4o-bad
|
||||
litellm_params:
|
||||
model: gpt-4o
|
||||
api_key: bad
|
||||
- model_name: "bedrock/*"
|
||||
litellm_params:
|
||||
model: "bedrock/*"
|
||||
- model_name: "openai/*"
|
||||
litellm_params:
|
||||
model: "openai/*"
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
tpm: 1000000
|
||||
prompt_id: "jokes"
|
||||
- model_name: openai-deepseek
|
||||
litellm_params:
|
||||
model: deepseek/deepseek-chat
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
model_info:
|
||||
access_groups: ["restricted-models"]
|
||||
custom_tokenizer:
|
||||
identifier: deepseek-ai/DeepSeek-V3-Base
|
||||
revision: main
|
||||
auth_token: os.environ/HUGGINGFACE_API_KEY
|
||||
- model_name: watsonx/ibm/granite-13b-chat-v2 # tried to keep original name for backwards compatibility but I've also tried watsonx_text
|
||||
litellm_params:
|
||||
model: watsonx_text/ibm/granite-13b-chat-v2
|
||||
model_info:
|
||||
input_cost_per_token: 0.0000006
|
||||
output_cost_per_token: 0.0000006
|
||||
|
||||
|
||||
general_settings:
|
||||
store_model_in_db: true
|
||||
disable_prisma_schema_update: true
|
||||
# master_key: os.environ/LITELLM_MASTER_KEY
|
||||
litellm_settings:
|
||||
success_callback: ["s3"]
|
||||
enable_preview_features: true
|
||||
s3_callback_params:
|
||||
s3_bucket_name: my-new-test-bucket-litellm # AWS Bucket Name for S3
|
||||
s3_region_name: us-west-2 # AWS Region Name for S3
|
||||
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
|
||||
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
|
||||
s3_use_team_prefix: true
|
||||
|
||||
fallbacks: [{"gpt-4o-bad": ["gpt-4o"]}] #, {"gpt-4o": ["nova-lite"]}]
|
||||
request_timeout: 600 # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set
|
||||
# set_verbose: false # Switch off Debug Logging, ensure your logs do not have any debugging on
|
||||
# json_logs: true # Get debug logs in json format
|
||||
ssl_verify: true
|
||||
callbacks: ["prometheus"]
|
||||
service_callback: ["prometheus_system"]
|
||||
turn_off_message_logging: true # turn off messages in otel
|
||||
#callbacks: ["langfuse"]
|
||||
redact_user_api_key_info: true
|
|
@ -2704,7 +2704,6 @@ def test_select_model_name_for_cost_calc():
|
|||
assert return_model == "azure_ai/mistral-large"
|
||||
|
||||
|
||||
|
||||
def test_moderations():
|
||||
from litellm import moderation
|
||||
|
||||
|
@ -2722,6 +2721,7 @@ def test_moderations():
|
|||
cost = completion_cost(response, model="omni-moderation-latest")
|
||||
assert cost == 0
|
||||
|
||||
|
||||
def test_cost_calculator_azure_embedding():
|
||||
from litellm.cost_calculator import response_cost_calculator
|
||||
from litellm.types.utils import EmbeddingResponse, Usage
|
||||
|
@ -2747,3 +2747,10 @@ def test_cost_calculator_azure_embedding():
|
|||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
pytest.fail(f"Error: {e}")
|
||||
|
||||
|
||||
def test_add_known_models():
|
||||
litellm.add_known_models()
|
||||
assert (
|
||||
"bedrock/us-west-1/meta.llama3-70b-instruct-v1:0" not in litellm.bedrock_models
|
||||
)
|
||||
|
|
|
@ -200,3 +200,11 @@ def test_azure_global_standard_get_llm_provider():
|
|||
api_key="fake-api-key",
|
||||
)
|
||||
assert custom_llm_provider == "azure_ai"
|
||||
|
||||
|
||||
def test_nova_bedrock_converse():
|
||||
model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider(
|
||||
model="amazon.nova-micro-v1:0",
|
||||
)
|
||||
assert custom_llm_provider == "bedrock"
|
||||
assert model == "amazon.nova-micro-v1:0"
|
||||
|
|
|
@ -1457,3 +1457,13 @@ def test_supports_vision_gemini():
|
|||
from litellm.utils import supports_vision
|
||||
|
||||
assert supports_vision("gemini-1.5-pro") is True
|
||||
|
||||
|
||||
def test_pick_cheapest_chat_model_from_llm_provider():
|
||||
from litellm.litellm_core_utils.llm_request_utils import (
|
||||
pick_cheapest_chat_models_from_llm_provider,
|
||||
)
|
||||
|
||||
assert len(pick_cheapest_chat_models_from_llm_provider("openai", n=3)) == 3
|
||||
|
||||
assert len(pick_cheapest_chat_models_from_llm_provider("unknown", n=1)) == 0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue