Litellm dev 01 11 2025 p3 (#7702)

* fix(__init__.py): fix init to exclude pricing-only model cost values from real model names prevents bad health checks on wildcard routes * fix(get_llm_provider.py): fix to handle calling bedrock_converse models
2025-04-27 03:34:10 +00:00 · 2025-01-11 20:06:54 -08:00 · 2025-01-11 20:06:54 -08:00 · 267be77720
commit 267be77720
parent 9ebb8a8795
11 changed files with 141 additions and 66 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -18,6 +18,7 @@ from litellm._logging import (
    _turn_on_json,
    log_level,
 )
+import re
 from litellm.constants import (
    DEFAULT_BATCH_SIZE,
    DEFAULT_FLUSH_INTERVAL_SECONDS,
@ -484,9 +485,44 @@ galadriel_models: List = []
 sambanova_models: List = []


+def is_bedrock_pricing_only_model(key: str) -> bool:
+    """
+    Excludes keys with the pattern 'bedrock/<region>/<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
+
+    Args:
+        key (str): A key to filter.
+
+    Returns:
+        bool: True if the key matches the Bedrock pattern, False otherwise.
+    """
+    # Regex to match 'bedrock/<region>/<model>'
+    bedrock_pattern = re.compile(r"^bedrock/[a-zA-Z0-9_-]+/.+$")
+
+    if "month-commitment" in key:
+        return True
+
+    is_match = bedrock_pattern.match(key)
+    return is_match is not None
+
+
+def is_openai_finetune_model(key: str) -> bool:
+    """
+    Excludes model cost keys with the pattern 'ft:<model>'. These are in the model_prices_and_context_window.json file for pricing purposes only.
+
+    Args:
+        key (str): A key to filter.
+
+    Returns:
+        bool: True if the key matches the OpenAI finetune pattern, False otherwise.
+    """
+    return key.startswith("ft:") and not key.count(":") > 1
+
+
 def add_known_models():
    for key, value in model_cost.items():
-        if value.get("litellm_provider") == "openai":
+        if value.get("litellm_provider") == "openai" and not is_openai_finetune_model(
+            key
+        ):
            open_ai_chat_completion_models.append(key)
        elif value.get("litellm_provider") == "text-completion-openai":
            open_ai_text_completion_models.append(key)
@ -542,7 +578,9 @@ def add_known_models():
            nlp_cloud_models.append(key)
        elif value.get("litellm_provider") == "aleph_alpha":
            aleph_alpha_models.append(key)
-        elif value.get("litellm_provider") == "bedrock":
+        elif value.get(
+            "litellm_provider"
+        ) == "bedrock" and not is_bedrock_pricing_only_model(key):
            bedrock_models.append(key)
        elif value.get("litellm_provider") == "bedrock_converse":
            bedrock_converse_models.append(key)
--- a/litellm/litellm_core_utils/get_llm_provider_logic.py
+++ b/litellm/litellm_core_utils/get_llm_provider_logic.py
@ -306,7 +306,9 @@ def get_llm_provider(  # noqa: PLR0915
            custom_llm_provider = "petals"
        ## bedrock
        elif (
-            model in litellm.bedrock_models or model in litellm.bedrock_embedding_models
+            model in litellm.bedrock_models
+            or model in litellm.bedrock_embedding_models
+            or model in litellm.bedrock_converse_models
        ):
            custom_llm_provider = "bedrock"
        elif model in litellm.watsonx_models:
--- a/litellm/litellm_core_utils/llm_request_utils.py
+++ b/litellm/litellm_core_utils/llm_request_utils.py
@ -30,16 +30,23 @@ def _ensure_extra_body_is_safe(extra_body: Optional[Dict]) -> Optional[Dict]:
    return extra_body


-def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str):
+def pick_cheapest_chat_models_from_llm_provider(custom_llm_provider: str, n=1):
    """
-    Pick the cheapest chat model from the LLM provider.
+    Pick the n cheapest chat models from the LLM provider.
+
+    Args:
+        custom_llm_provider (str): The name of the LLM provider.
+        n (int): The number of cheapest models to return.
+
+    Returns:
+        list[str]: A list of the n cheapest chat models.
    """
    if custom_llm_provider not in litellm.models_by_provider:
-        raise ValueError(f"Unknown LLM provider: {custom_llm_provider}")
+        return []

    known_models = litellm.models_by_provider.get(custom_llm_provider, [])
-    min_cost = float("inf")
-    cheapest_model = None
+    model_costs = []
+
    for model in known_models:
        try:
            model_info = litellm.get_model_info(
@ -52,7 +59,10 @@ def pick_cheapest_chat_model_from_llm_provider(custom_llm_provider: str):
        _cost = model_info.get("input_cost_per_token", 0) + model_info.get(
            "output_cost_per_token", 0
        )
-        if _cost < min_cost:
-            min_cost = _cost
-            cheapest_model = model
-    return cheapest_model
+        model_costs.append((model, _cost))
+
+    # Sort by cost (ascending)
+    model_costs.sort(key=lambda x: x[1])
+
+    # Return the top n cheapest models
+    return [model for model, _ in model_costs[:n]]
--- a/litellm/main.py
+++ b/litellm/main.py
@ -57,6 +57,9 @@ from litellm.litellm_core_utils.health_check_utils import (
    _filter_model_params,
 )
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.litellm_core_utils.llm_request_utils import (
+    pick_cheapest_chat_models_from_llm_provider,
+)
 from litellm.litellm_core_utils.mock_functions import (
    mock_embedding,
    mock_image_generation,
@ -5080,25 +5083,26 @@ def speech(
 async def ahealth_check_wildcard_models(
    model: str, custom_llm_provider: str, model_params: dict
 ) -> dict:
-    from litellm.litellm_core_utils.llm_request_utils import (
-        pick_cheapest_chat_model_from_llm_provider,
-    )

    # this is a wildcard model, we need to pick a random model from the provider
-    cheapest_model = pick_cheapest_chat_model_from_llm_provider(
-        custom_llm_provider=custom_llm_provider
+    cheapest_models = pick_cheapest_chat_models_from_llm_provider(
+        custom_llm_provider=custom_llm_provider, n=3
    )
-    fallback_models: Optional[List] = None
-    if custom_llm_provider in litellm.models_by_provider:
-        models = litellm.models_by_provider[custom_llm_provider]
-        random.shuffle(models)  # Shuffle the models list in place
-        fallback_models = models[:2]  # Pick the first 2 models from the shuffled list
-    model_params["model"] = cheapest_model
+    if len(cheapest_models) == 0:
+        raise Exception(
+            f"Unable to health check wildcard model for provider {custom_llm_provider}. Add a model on your config.yaml or contribute here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
+        )
+    if len(cheapest_models) > 1:
+        fallback_models = cheapest_models[
+            1:
+        ]  # Pick the last 2 models from the shuffled list
+    else:
+        fallback_models = None
+    model_params["model"] = cheapest_models[0]
    model_params["fallbacks"] = fallback_models
    model_params["max_tokens"] = 1
    await acompletion(**model_params)
-    response: dict = {}  # args like remaining ratelimit etc.
-    return response
+    return {}


 async def ahealth_check(
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ b/litellm/proxy/_experimental/out/model_hub.html
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,44 +1,43 @@
 model_list:
-  - model_name: azure-embedding-model
+  # At least one model must exist for the proxy to start.
+  - model_name: gpt-4o
    litellm_params:
-      model: azure/azure-embedding-model
-      api_key: os.environ/AZURE_API_KEY
-      api_base: os.environ/AZURE_API_BASE
-  - model_name: openai-text-completion
-    litellm_params:
-      model: openai/gpt-3.5-turbo
+      model: gpt-4o
      api_key: os.environ/OPENAI_API_KEY
-  - model_name: chatbot_actions
+      # timeout: 0.1                      # timeout in (seconds)
+      # stream_timeout: 0.01              # timeout for stream requests (seconds)
+  - model_name: anthropic.claude-3-5-sonnet-20241022-v2:0
    litellm_params:
-      model: langfuse/gpt-3.5-turbo
+      model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
+  - model_name: nova-lite
+    litellm_params:
+      model: bedrock/us.amazon.nova-lite-v1:0
+  - model_name: llama3-2-11b-instruct-v1:0
+    litellm_params:
+      model: bedrock/us.meta.llama3-2-11b-instruct-v1:0
+  - model_name: gpt-4o-bad
+    litellm_params:
+      model: gpt-4o
+      api_key: bad
+  - model_name: "bedrock/*"
+    litellm_params:
+      model: "bedrock/*"
+  - model_name: "openai/*"
+    litellm_params:
+      model: "openai/*"
      api_key: os.environ/OPENAI_API_KEY
-      tpm: 1000000
-      prompt_id: "jokes"
-  - model_name: openai-deepseek
-    litellm_params:
-      model: deepseek/deepseek-chat
-      api_key: os.environ/OPENAI_API_KEY
-    model_info:
-      access_groups: ["restricted-models"]
-      custom_tokenizer: 
-        identifier: deepseek-ai/DeepSeek-V3-Base
-        revision: main
-        auth_token: os.environ/HUGGINGFACE_API_KEY
-  - model_name: watsonx/ibm/granite-13b-chat-v2 # tried to keep original name for backwards compatibility but I've also tried watsonx_text
-    litellm_params: 
-      model: watsonx_text/ibm/granite-13b-chat-v2
-    model_info:
-      input_cost_per_token: 0.0000006
-      output_cost_per_token: 0.0000006
-  
-
+general_settings:
+  store_model_in_db: true
+  disable_prisma_schema_update: true
+#   master_key: os.environ/LITELLM_MASTER_KEY
 litellm_settings:
-  success_callback: ["s3"]
-  enable_preview_features: true
-  s3_callback_params:
-    s3_bucket_name: my-new-test-bucket-litellm   # AWS Bucket Name for S3
-    s3_region_name: us-west-2              # AWS Region Name for S3
-    s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID  # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
-    s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY  # AWS Secret Access Key for S3
-    s3_use_team_prefix: true
-
+  fallbacks: [{"gpt-4o-bad": ["gpt-4o"]}] #, {"gpt-4o": ["nova-lite"]}]
+  request_timeout: 600    # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set
+  # set_verbose: false      # Switch off Debug Logging, ensure your logs do not have any debugging on
+  # json_logs: true         # Get debug logs in json format
+  ssl_verify: true
+  callbacks: ["prometheus"]
+  service_callback: ["prometheus_system"]
+  turn_off_message_logging: true  # turn off messages in otel
+  #callbacks: ["langfuse"]
+  redact_user_api_key_info: true
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@ -2704,7 +2704,6 @@ def test_select_model_name_for_cost_calc():
    assert return_model == "azure_ai/mistral-large"


-
 def test_moderations():
    from litellm import moderation

@ -2722,6 +2721,7 @@ def test_moderations():
    cost = completion_cost(response, model="omni-moderation-latest")
    assert cost == 0

+
 def test_cost_calculator_azure_embedding():
    from litellm.cost_calculator import response_cost_calculator
    from litellm.types.utils import EmbeddingResponse, Usage
@ -2747,3 +2747,10 @@ def test_cost_calculator_azure_embedding():
    except Exception as e:
        traceback.print_exc()
        pytest.fail(f"Error: {e}")
+
+
+def test_add_known_models():
+    litellm.add_known_models()
+    assert (
+        "bedrock/us-west-1/meta.llama3-70b-instruct-v1:0" not in litellm.bedrock_models
+    )
--- a/tests/local_testing/test_get_llm_provider.py
+++ b/tests/local_testing/test_get_llm_provider.py
@ -200,3 +200,11 @@ def test_azure_global_standard_get_llm_provider():
        api_key="fake-api-key",
    )
    assert custom_llm_provider == "azure_ai"
+
+
+def test_nova_bedrock_converse():
+    model, custom_llm_provider, dynamic_api_key, api_base = litellm.get_llm_provider(
+        model="amazon.nova-micro-v1:0",
+    )
+    assert custom_llm_provider == "bedrock"
+    assert model == "amazon.nova-micro-v1:0"
--- a/tests/local_testing/test_utils.py
+++ b/tests/local_testing/test_utils.py
@ -1457,3 +1457,13 @@ def test_supports_vision_gemini():
    from litellm.utils import supports_vision

    assert supports_vision("gemini-1.5-pro") is True
+
+
+def test_pick_cheapest_chat_model_from_llm_provider():
+    from litellm.litellm_core_utils.llm_request_utils import (
+        pick_cheapest_chat_models_from_llm_provider,
+    )
+
+    assert len(pick_cheapest_chat_models_from_llm_provider("openai", n=3)) == 3
+
+    assert len(pick_cheapest_chat_models_from_llm_provider("unknown", n=1)) == 0