Litellm dev 01 11 2025 p3 (#7702)

* fix(__init__.py): fix init to exclude pricing-only model cost values from real model names prevents bad health checks on wildcard routes * fix(get_llm_provider.py): fix to handle calling bedrock_converse models
2025-04-27 11:43:54 +00:00 · 2025-01-11 20:06:54 -08:00 · 2025-01-11 20:06:54 -08:00 · 267be77720
commit 267be77720
parent 9ebb8a8795
11 changed files with 141 additions and 66 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -18,6 +18,7 @@ from litellm._logging import (
    _turn_on_json,
    log_level,
 )
 import re
 from litellm.constants import (
    DEFAULT_BATCH_SIZE,
    DEFAULT_FLUSH_INTERVAL_SECONDS,
@ -484,9 +485,44 @@ galadriel_models: List = []
 sambanova_models: List = []
 def is_bedrock_pricing_only_model(key: str) -> bool:
    """
    Excludes keys with the pattern 'bedrock/<region>/<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
    Args:
        key (str): A key to filter.
    Returns:
        bool: True if the key matches the Bedrock pattern, False otherwise.
    """
    # Regex to match 'bedrock/<region>/<model>'
    bedrock_pattern = re.compile(r"^bedrock/[a-zA-Z0-9_-]+/.+$")
    if "month-commitment" in key:
        return True
    is_match = bedrock_pattern.match(key)
    return is_match is not None
 def is_openai_finetune_model(key: str) -> bool:
    """
    Excludes model cost keys with the pattern 'ft:<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
    Args:
        key (str): A key to filter.
    Returns:
        bool: True if the key matches the OpenAI finetune pattern, False otherwise.
    """
    return key.startswith("ft:") and not key.count(":") > 1
 def add_known_models():
    for key, value in model_cost.items():
-        if value.get("litellm_provider") == "openai":
+        if value.get("litellm_provider") == "openai" and not is_openai_finetune_model(
            key
        ):
            open_ai_chat_completion_models.append(key)
        elif value.get("litellm_provider") == "text-completion-openai":
            open_ai_text_completion_models.append(key)
@ -542,7 +578,9 @@ def add_known_models():
            nlp_cloud_models.append(key)
        elif value.get("litellm_provider") == "aleph_alpha":
            aleph_alpha_models.append(key)
-        elif value.get("litellm_provider") == "bedrock":
+        elif value.get(
            "litellm_provider"
        ) == "bedrock" and not is_bedrock_pricing_only_model(key):
            bedrock_models.append(key)
        elif value.get("litellm_provider") == "bedrock_converse":
            bedrock_converse_models.append(key)
--- a/litellm/litellm_core_utils/get_llm_provider_logic.py
+++ b/litellm/litellm_core_utils/get_llm_provider_logic.py
@ -306,7 +306,9 @@ def get_llm_provider(  # noqa: PLR0915
            custom_llm_provider = "petals"
        ## bedrock
        elif (
-            model in litellm.bedrock_models or model in litellm.bedrock_embedding_models
+            model in litellm.bedrock_models
            or model in litellm.bedrock_embedding_models
            or model in litellm.bedrock_converse_models
        ):
            custom_llm_provider = "bedrock"
        elif model in litellm.watsonx_models:
--- a/litellm/litellm_core_utils/llm_request_utils.py
+++ b/litellm/litellm_core_utils/llm_request_utils.py
@ -30,16 +30,23 @@ def _ensure_extra_body_is_safe(extra_body: Optional[Dict]) -> Optional[Dict]:
    return extra_body
-def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str):
+def pick_cheapest_chat_models_from_llm_provider(custom_llm_provider: str, n=1):
    """
-    Pick the cheapest chat model from the LLM provider.
+    Pick the n cheapest chat models from the LLM provider.
    Args:
        custom_llm_provider (str): The name of the LLM provider.
        n (int): The number of cheapest models to return.
    Returns:
        list[str]: A list of the n cheapest chat models.
    """
    if custom_llm_provider not in litellm.models_by_provider:
-        raise ValueError(f"Unknown LLM provider: {custom_llm_provider}")
+        return []
    known_models = litellm.models_by_provider.get(custom_llm_provider, [])
-    min_cost = float("inf")
+    model_costs = []
-    cheapest_model = None
+
    for model in known_models:
        try:
            model_info = litellm.get_model_info(
@ -52,7 +59,10 @@ def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str):
        _cost = model_info.get("input_cost_per_token", 0) + model_info.get(
            "output_cost_per_token", 0
        )
-        if _cost < min_cost:
+        model_costs.append((model, _cost))
-            min_cost = _cost
+
-            cheapest_model = model
+    # Sort by cost (ascending)
-    return cheapest_model
+    model_costs.sort(key=lambda x: x[1])
    # Return the top n cheapest models
    return [model for model, _ in model_costs[:n]]
--- a/litellm/main.py
+++ b/litellm/main.py
@ -57,6 +57,9 @@ from litellm.litellm_core_utils.health_check_utils import (
    _filter_model_params,
 )
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.litellm_core_utils.llm_request_utils import (
    pick_cheapest_chat_models_from_llm_provider,
 )
 from litellm.litellm_core_utils.mock_functions import (
    mock_embedding,
    mock_image_generation,
@ -5080,25 +5083,26 @@ def speech(
 async def ahealth_check_wildcard_models(
    model: str, custom_llm_provider: str, model_params: dict
 ) -> dict:
    from litellm.litellm_core_utils.llm_request_utils import (
        pick_cheapest_chat_model_from_llm_provider,
    )
    # this is a wildcard model, we need to pick a random model from the provider
-    cheapest_model = pick_cheapest_chat_model_from_llm_provider(
+    cheapest_models = pick_cheapest_chat_models_from_llm_provider(
-        custom_llm_provider=custom_llm_provider
+        custom_llm_provider=custom_llm_provider, n=3
    )
-    fallback_models: Optional[List] = None
+    if len(cheapest_models) == 0:
-    if custom_llm_provider in litellm.models_by_provider:
+        raise Exception(
-        models = litellm.models_by_provider[custom_llm_provider]
+            f"Unable to health check wildcard model for provider {custom_llm_provider}. Add a model on your config.yaml or contribute here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
-        random.shuffle(models)  # Shuffle the models list in place
+        )
-        fallback_models = models[:2]  # Pick the first 2 models from the shuffled list
+    if len(cheapest_models) > 1:
-    model_params["model"] = cheapest_model
+        fallback_models = cheapest_models[
            1:
        ]  # Pick the last 2 models from the shuffled list
    else:
        fallback_models = None
    model_params["model"] = cheapest_models[0]
    model_params["fallbacks"] = fallback_models
    model_params["max_tokens"] = 1
    await acompletion(**model_params)
-    response: dict = {}  # args like remaining ratelimit etc.
+    return {}
    return response
 async def ahealth_check(
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ b/litellm/proxy/_experimental/out/model_hub.html
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,44 +1,43 @@
 model_list:
-  - model_name: azure-embedding-model
+  # At least one model must exist for the proxy to start.
  - model_name: gpt-4o
    litellm_params:
-      model: azure/azure-embedding-model
+      model: gpt-4o
      api_key: os.environ/AZURE_API_KEY
      api_base: os.environ/AZURE_API_BASE
  - model_name: openai-text-completion
    litellm_params:
      model: openai/gpt-3.5-turbo
      api_key: os.environ/OPENAI_API_KEY
-  - model_name: chatbot_actions
+      # timeout: 0.1                      # timeout in (seconds)
      # stream_timeout: 0.01              # timeout for stream requests (seconds)
  - model_name: anthropic.claude-3-5-sonnet-20241022-v2:0
    litellm_params:
-      model: langfuse/gpt-3.5-turbo
+      model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
  - model_name: nova-lite
    litellm_params:
      model: bedrock/us.amazon.nova-lite-v1:0
  - model_name: llama3-2-11b-instruct-v1:0
    litellm_params:
      model: bedrock/us.meta.llama3-2-11b-instruct-v1:0
  - model_name: gpt-4o-bad
    litellm_params:
      model: gpt-4o
      api_key: bad
  - model_name: "bedrock/*"
    litellm_params:
      model: "bedrock/*"
  - model_name: "openai/*"
    litellm_params:
      model: "openai/*"
      api_key: os.environ/OPENAI_API_KEY
-      tpm: 1000000
+general_settings:
-      prompt_id: "jokes"
+  store_model_in_db: true
-  - model_name: openai-deepseek
+  disable_prisma_schema_update: true
-    litellm_params:
+#   master_key: os.environ/LITELLM_MASTER_KEY
      model: deepseek/deepseek-chat
      api_key: os.environ/OPENAI_API_KEY
    model_info:
      access_groups: ["restricted-models"]
      custom_tokenizer: 
        identifier: deepseek-ai/DeepSeek-V3-Base
        revision: main
        auth_token: os.environ/HUGGINGFACE_API_KEY
  - model_name: watsonx/ibm/granite-13b-chat-v2 # tried to keep original name for backwards compatibility but I've also tried watsonx_text
    litellm_params: 
      model: watsonx_text/ibm/granite-13b-chat-v2
    model_info:
      input_cost_per_token: 0.0000006
      output_cost_per_token: 0.0000006
 litellm_settings:
-  success_callback: ["s3"]
+  fallbacks: [{"gpt-4o-bad": ["gpt-4o"]}] #, {"gpt-4o": ["nova-lite"]}]
-  enable_preview_features: true
+  request_timeout: 600    # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set
-  s3_callback_params:
+  # set_verbose: false      # Switch off Debug Logging, ensure your logs do not have any debugging on
-    s3_bucket_name: my-new-test-bucket-litellm   # AWS Bucket Name for S3
+  # json_logs: true         # Get debug logs in json format
-    s3_region_name: us-west-2              # AWS Region Name for S3
+  ssl_verify: true
-    s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID  # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
+  callbacks: ["prometheus"]
-    s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY  # AWS Secret Access Key for S3
+  service_callback: ["prometheus_system"]
-    s3_use_team_prefix: true
+  turn_off_message_logging: true  # turn off messages in otel
-
+  #callbacks: ["langfuse"]
  redact_user_api_key_info: true
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@ -2704,7 +2704,6 @@ def test_select_model_name_for_cost_calc():
    assert return_model == "azure_ai/mistral-large"
 def test_moderations():
    from litellm import moderation
@ -2722,6 +2721,7 @@ def test_moderations():
    cost = completion_cost(response, model="omni-moderation-latest")
    assert cost == 0
 def test_cost_calculator_azure_embedding():
    from litellm.cost_calculator import response_cost_calculator
    from litellm.types.utils import EmbeddingResponse, Usage
@ -2747,3 +2747,10 @@ def test_cost_calculator_azure_embedding():
    except Exception as e:
        traceback.print_exc()
        pytest.fail(f"Error: {e}")
 def test_add_known_models():
    litellm.add_known_models()
    assert (
        "bedrock/us-west-1/meta.llama3-70b-instruct-v1:0" not in litellm.bedrock_models
    )
--- a/tests/local_testing/test_get_llm_provider.py
+++ b/tests/local_testing/test_get_llm_provider.py
@ -200,3 +200,11 @@ def test_azure_global_standard_get_llm_provider():
        api_key="fake-api-key",
    )
    assert custom_llm_provider == "azure_ai"
 def test_nova_bedrock_converse():
    model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider(
        model="amazon.nova-micro-v1:0",
    )
    assert custom_llm_provider == "bedrock"
    assert model == "amazon.nova-micro-v1:0"
--- a/tests/local_testing/test_utils.py
+++ b/tests/local_testing/test_utils.py
@ -1457,3 +1457,13 @@ def test_supports_vision_gemini():
    from litellm.utils import supports_vision
    assert supports_vision("gemini-1.5-pro") is True
 def test_pick_cheapest_chat_model_from_llm_provider():
    from litellm.litellm_core_utils.llm_request_utils import (
        pick_cheapest_chat_models_from_llm_provider,
    )
    assert len(pick_cheapest_chat_models_from_llm_provider("openai", n=3)) == 3
    assert len(pick_cheapest_chat_models_from_llm_provider("unknown", n=1)) == 0