mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
Litellm dev 01 11 2025 p3 (#7702)
* fix(__init__.py): fix init to exclude pricing-only model cost values from real model names prevents bad health checks on wildcard routes * fix(get_llm_provider.py): fix to handle calling bedrock_converse models
This commit is contained in:
parent
9ebb8a8795
commit
267be77720
11 changed files with 141 additions and 66 deletions
|
@ -18,6 +18,7 @@ from litellm._logging import (
|
||||||
_turn_on_json,
|
_turn_on_json,
|
||||||
log_level,
|
log_level,
|
||||||
)
|
)
|
||||||
|
import re
|
||||||
from litellm.constants import (
|
from litellm.constants import (
|
||||||
DEFAULT_BATCH_SIZE,
|
DEFAULT_BATCH_SIZE,
|
||||||
DEFAULT_FLUSH_INTERVAL_SECONDS,
|
DEFAULT_FLUSH_INTERVAL_SECONDS,
|
||||||
|
@ -484,9 +485,44 @@ galadriel_models: List = []
|
||||||
sambanova_models: List = []
|
sambanova_models: List = []
|
||||||
|
|
||||||
|
|
||||||
|
def is_bedrock_pricing_only_model(key: str) -> bool:
|
||||||
|
"""
|
||||||
|
Excludes keys with the pattern 'bedrock/<region>/<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key (str): A key to filter.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the key matches the Bedrock pattern, False otherwise.
|
||||||
|
"""
|
||||||
|
# Regex to match 'bedrock/<region>/<model>'
|
||||||
|
bedrock_pattern = re.compile(r"^bedrock/[a-zA-Z0-9_-]+/.+$")
|
||||||
|
|
||||||
|
if "month-commitment" in key:
|
||||||
|
return True
|
||||||
|
|
||||||
|
is_match = bedrock_pattern.match(key)
|
||||||
|
return is_match is not None
|
||||||
|
|
||||||
|
|
||||||
|
def is_openai_finetune_model(key: str) -> bool:
|
||||||
|
"""
|
||||||
|
Excludes model cost keys with the pattern 'ft:<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key (str): A key to filter.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the key matches the OpenAI finetune pattern, False otherwise.
|
||||||
|
"""
|
||||||
|
return key.startswith("ft:") and not key.count(":") > 1
|
||||||
|
|
||||||
|
|
||||||
def add_known_models():
|
def add_known_models():
|
||||||
for key, value in model_cost.items():
|
for key, value in model_cost.items():
|
||||||
if value.get("litellm_provider") == "openai":
|
if value.get("litellm_provider") == "openai" and not is_openai_finetune_model(
|
||||||
|
key
|
||||||
|
):
|
||||||
open_ai_chat_completion_models.append(key)
|
open_ai_chat_completion_models.append(key)
|
||||||
elif value.get("litellm_provider") == "text-completion-openai":
|
elif value.get("litellm_provider") == "text-completion-openai":
|
||||||
open_ai_text_completion_models.append(key)
|
open_ai_text_completion_models.append(key)
|
||||||
|
@ -542,7 +578,9 @@ def add_known_models():
|
||||||
nlp_cloud_models.append(key)
|
nlp_cloud_models.append(key)
|
||||||
elif value.get("litellm_provider") == "aleph_alpha":
|
elif value.get("litellm_provider") == "aleph_alpha":
|
||||||
aleph_alpha_models.append(key)
|
aleph_alpha_models.append(key)
|
||||||
elif value.get("litellm_provider") == "bedrock":
|
elif value.get(
|
||||||
|
"litellm_provider"
|
||||||
|
) == "bedrock" and not is_bedrock_pricing_only_model(key):
|
||||||
bedrock_models.append(key)
|
bedrock_models.append(key)
|
||||||
elif value.get("litellm_provider") == "bedrock_converse":
|
elif value.get("litellm_provider") == "bedrock_converse":
|
||||||
bedrock_converse_models.append(key)
|
bedrock_converse_models.append(key)
|
||||||
|
|
|
@ -306,7 +306,9 @@ def get_llm_provider( # noqa: PLR0915
|
||||||
custom_llm_provider = "petals"
|
custom_llm_provider = "petals"
|
||||||
## bedrock
|
## bedrock
|
||||||
elif (
|
elif (
|
||||||
model in litellm.bedrock_models or model in litellm.bedrock_embedding_models
|
model in litellm.bedrock_models
|
||||||
|
or model in litellm.bedrock_embedding_models
|
||||||
|
or model in litellm.bedrock_converse_models
|
||||||
):
|
):
|
||||||
custom_llm_provider = "bedrock"
|
custom_llm_provider = "bedrock"
|
||||||
elif model in litellm.watsonx_models:
|
elif model in litellm.watsonx_models:
|
||||||
|
|
|
@ -30,16 +30,23 @@ def _ensure_extra_body_is_safe(extra_body: Optional[Dict]) -> Optional[Dict]:
|
||||||
return extra_body
|
return extra_body
|
||||||
|
|
||||||
|
|
||||||
def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str):
|
def pick_cheapest_chat_models_from_llm_provider(custom_llm_provider: str, n=1):
|
||||||
"""
|
"""
|
||||||
Pick the cheapest chat model from the LLM provider.
|
Pick the n cheapest chat models from the LLM provider.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
custom_llm_provider (str): The name of the LLM provider.
|
||||||
|
n (int): The number of cheapest models to return.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[str]: A list of the n cheapest chat models.
|
||||||
"""
|
"""
|
||||||
if custom_llm_provider not in litellm.models_by_provider:
|
if custom_llm_provider not in litellm.models_by_provider:
|
||||||
raise ValueError(f"Unknown LLM provider: {custom_llm_provider}")
|
return []
|
||||||
|
|
||||||
known_models = litellm.models_by_provider.get(custom_llm_provider, [])
|
known_models = litellm.models_by_provider.get(custom_llm_provider, [])
|
||||||
min_cost = float("inf")
|
model_costs = []
|
||||||
cheapest_model = None
|
|
||||||
for model in known_models:
|
for model in known_models:
|
||||||
try:
|
try:
|
||||||
model_info = litellm.get_model_info(
|
model_info = litellm.get_model_info(
|
||||||
|
@ -52,7 +59,10 @@ def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str):
|
||||||
_cost = model_info.get("input_cost_per_token", 0) + model_info.get(
|
_cost = model_info.get("input_cost_per_token", 0) + model_info.get(
|
||||||
"output_cost_per_token", 0
|
"output_cost_per_token", 0
|
||||||
)
|
)
|
||||||
if _cost < min_cost:
|
model_costs.append((model, _cost))
|
||||||
min_cost = _cost
|
|
||||||
cheapest_model = model
|
# Sort by cost (ascending)
|
||||||
return cheapest_model
|
model_costs.sort(key=lambda x: x[1])
|
||||||
|
|
||||||
|
# Return the top n cheapest models
|
||||||
|
return [model for model, _ in model_costs[:n]]
|
||||||
|
|
|
@ -57,6 +57,9 @@ from litellm.litellm_core_utils.health_check_utils import (
|
||||||
_filter_model_params,
|
_filter_model_params,
|
||||||
)
|
)
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
|
from litellm.litellm_core_utils.llm_request_utils import (
|
||||||
|
pick_cheapest_chat_models_from_llm_provider,
|
||||||
|
)
|
||||||
from litellm.litellm_core_utils.mock_functions import (
|
from litellm.litellm_core_utils.mock_functions import (
|
||||||
mock_embedding,
|
mock_embedding,
|
||||||
mock_image_generation,
|
mock_image_generation,
|
||||||
|
@ -5080,25 +5083,26 @@ def speech(
|
||||||
async def ahealth_check_wildcard_models(
|
async def ahealth_check_wildcard_models(
|
||||||
model: str, custom_llm_provider: str, model_params: dict
|
model: str, custom_llm_provider: str, model_params: dict
|
||||||
) -> dict:
|
) -> dict:
|
||||||
from litellm.litellm_core_utils.llm_request_utils import (
|
|
||||||
pick_cheapest_chat_model_from_llm_provider,
|
|
||||||
)
|
|
||||||
|
|
||||||
# this is a wildcard model, we need to pick a random model from the provider
|
# this is a wildcard model, we need to pick a random model from the provider
|
||||||
cheapest_model = pick_cheapest_chat_model_from_llm_provider(
|
cheapest_models = pick_cheapest_chat_models_from_llm_provider(
|
||||||
custom_llm_provider=custom_llm_provider
|
custom_llm_provider=custom_llm_provider, n=3
|
||||||
)
|
)
|
||||||
fallback_models: Optional[List] = None
|
if len(cheapest_models) == 0:
|
||||||
if custom_llm_provider in litellm.models_by_provider:
|
raise Exception(
|
||||||
models = litellm.models_by_provider[custom_llm_provider]
|
f"Unable to health check wildcard model for provider {custom_llm_provider}. Add a model on your config.yaml or contribute here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||||
random.shuffle(models) # Shuffle the models list in place
|
)
|
||||||
fallback_models = models[:2] # Pick the first 2 models from the shuffled list
|
if len(cheapest_models) > 1:
|
||||||
model_params["model"] = cheapest_model
|
fallback_models = cheapest_models[
|
||||||
|
1:
|
||||||
|
] # Pick the last 2 models from the shuffled list
|
||||||
|
else:
|
||||||
|
fallback_models = None
|
||||||
|
model_params["model"] = cheapest_models[0]
|
||||||
model_params["fallbacks"] = fallback_models
|
model_params["fallbacks"] = fallback_models
|
||||||
model_params["max_tokens"] = 1
|
model_params["max_tokens"] = 1
|
||||||
await acompletion(**model_params)
|
await acompletion(**model_params)
|
||||||
response: dict = {} # args like remaining ratelimit etc.
|
return {}
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
async def ahealth_check(
|
async def ahealth_check(
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,44 +1,43 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: azure-embedding-model
|
# At least one model must exist for the proxy to start.
|
||||||
|
- model_name: gpt-4o
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: azure/azure-embedding-model
|
model: gpt-4o
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
api_base: os.environ/AZURE_API_BASE
|
|
||||||
- model_name: openai-text-completion
|
|
||||||
litellm_params:
|
|
||||||
model: openai/gpt-3.5-turbo
|
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
- model_name: chatbot_actions
|
# timeout: 0.1 # timeout in (seconds)
|
||||||
|
# stream_timeout: 0.01 # timeout for stream requests (seconds)
|
||||||
|
- model_name: anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: langfuse/gpt-3.5-turbo
|
model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
|
||||||
|
- model_name: nova-lite
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/us.amazon.nova-lite-v1:0
|
||||||
|
- model_name: llama3-2-11b-instruct-v1:0
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/us.meta.llama3-2-11b-instruct-v1:0
|
||||||
|
- model_name: gpt-4o-bad
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-4o
|
||||||
|
api_key: bad
|
||||||
|
- model_name: "bedrock/*"
|
||||||
|
litellm_params:
|
||||||
|
model: "bedrock/*"
|
||||||
|
- model_name: "openai/*"
|
||||||
|
litellm_params:
|
||||||
|
model: "openai/*"
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
tpm: 1000000
|
general_settings:
|
||||||
prompt_id: "jokes"
|
store_model_in_db: true
|
||||||
- model_name: openai-deepseek
|
disable_prisma_schema_update: true
|
||||||
litellm_params:
|
# master_key: os.environ/LITELLM_MASTER_KEY
|
||||||
model: deepseek/deepseek-chat
|
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
|
||||||
model_info:
|
|
||||||
access_groups: ["restricted-models"]
|
|
||||||
custom_tokenizer:
|
|
||||||
identifier: deepseek-ai/DeepSeek-V3-Base
|
|
||||||
revision: main
|
|
||||||
auth_token: os.environ/HUGGINGFACE_API_KEY
|
|
||||||
- model_name: watsonx/ibm/granite-13b-chat-v2 # tried to keep original name for backwards compatibility but I've also tried watsonx_text
|
|
||||||
litellm_params:
|
|
||||||
model: watsonx_text/ibm/granite-13b-chat-v2
|
|
||||||
model_info:
|
|
||||||
input_cost_per_token: 0.0000006
|
|
||||||
output_cost_per_token: 0.0000006
|
|
||||||
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
success_callback: ["s3"]
|
fallbacks: [{"gpt-4o-bad": ["gpt-4o"]}] #, {"gpt-4o": ["nova-lite"]}]
|
||||||
enable_preview_features: true
|
request_timeout: 600 # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set
|
||||||
s3_callback_params:
|
# set_verbose: false # Switch off Debug Logging, ensure your logs do not have any debugging on
|
||||||
s3_bucket_name: my-new-test-bucket-litellm # AWS Bucket Name for S3
|
# json_logs: true # Get debug logs in json format
|
||||||
s3_region_name: us-west-2 # AWS Region Name for S3
|
ssl_verify: true
|
||||||
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
|
callbacks: ["prometheus"]
|
||||||
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
|
service_callback: ["prometheus_system"]
|
||||||
s3_use_team_prefix: true
|
turn_off_message_logging: true # turn off messages in otel
|
||||||
|
#callbacks: ["langfuse"]
|
||||||
|
redact_user_api_key_info: true
|
|
@ -2704,7 +2704,6 @@ def test_select_model_name_for_cost_calc():
|
||||||
assert return_model == "azure_ai/mistral-large"
|
assert return_model == "azure_ai/mistral-large"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_moderations():
|
def test_moderations():
|
||||||
from litellm import moderation
|
from litellm import moderation
|
||||||
|
|
||||||
|
@ -2722,6 +2721,7 @@ def test_moderations():
|
||||||
cost = completion_cost(response, model="omni-moderation-latest")
|
cost = completion_cost(response, model="omni-moderation-latest")
|
||||||
assert cost == 0
|
assert cost == 0
|
||||||
|
|
||||||
|
|
||||||
def test_cost_calculator_azure_embedding():
|
def test_cost_calculator_azure_embedding():
|
||||||
from litellm.cost_calculator import response_cost_calculator
|
from litellm.cost_calculator import response_cost_calculator
|
||||||
from litellm.types.utils import EmbeddingResponse, Usage
|
from litellm.types.utils import EmbeddingResponse, Usage
|
||||||
|
@ -2747,3 +2747,10 @@ def test_cost_calculator_azure_embedding():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
pytest.fail(f"Error: {e}")
|
pytest.fail(f"Error: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_known_models():
|
||||||
|
litellm.add_known_models()
|
||||||
|
assert (
|
||||||
|
"bedrock/us-west-1/meta.llama3-70b-instruct-v1:0" not in litellm.bedrock_models
|
||||||
|
)
|
||||||
|
|
|
@ -200,3 +200,11 @@ def test_azure_global_standard_get_llm_provider():
|
||||||
api_key="fake-api-key",
|
api_key="fake-api-key",
|
||||||
)
|
)
|
||||||
assert custom_llm_provider == "azure_ai"
|
assert custom_llm_provider == "azure_ai"
|
||||||
|
|
||||||
|
|
||||||
|
def test_nova_bedrock_converse():
|
||||||
|
model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider(
|
||||||
|
model="amazon.nova-micro-v1:0",
|
||||||
|
)
|
||||||
|
assert custom_llm_provider == "bedrock"
|
||||||
|
assert model == "amazon.nova-micro-v1:0"
|
||||||
|
|
|
@ -1457,3 +1457,13 @@ def test_supports_vision_gemini():
|
||||||
from litellm.utils import supports_vision
|
from litellm.utils import supports_vision
|
||||||
|
|
||||||
assert supports_vision("gemini-1.5-pro") is True
|
assert supports_vision("gemini-1.5-pro") is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_pick_cheapest_chat_model_from_llm_provider():
|
||||||
|
from litellm.litellm_core_utils.llm_request_utils import (
|
||||||
|
pick_cheapest_chat_models_from_llm_provider,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(pick_cheapest_chat_models_from_llm_provider("openai", n=3)) == 3
|
||||||
|
|
||||||
|
assert len(pick_cheapest_chat_models_from_llm_provider("unknown", n=1)) == 0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue