QA: ensure all bedrock regional models have same supported_ as base + Anthropic nested pydantic object support (#7844)

* build: ensure all regional bedrock models have same supported values as base bedrock model prevents drift * test(base_llm_unit_tests.py): add testing for nested pydantic objects * fix(test_utils.py): add test_get_potential_model_names * fix(anthropic/chat/transformation.py): support nested pydantic objects Fixes https://github.com/BerriAI/litellm/issues/7755
2025-04-25 18:54:30 +00:00 · 2025-01-17 19:49:12 -08:00 · 2025-01-17 19:49:12 -08:00 · 71c41f8f33
commit 71c41f8f33
parent 62188a01b0
12 changed files with 259 additions and 62 deletions
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -8,6 +8,7 @@ import litellm
 from litellm.constants import RESPONSE_FORMAT_TOOL_NAME
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.litellm_core_utils.prompt_templates.factory import anthropic_messages_pt
 from litellm.llms.base_llm.base_utils import type_to_response_format_param
 from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
 from litellm.types.llms.anthropic import (
    AllAnthropicToolsValues,
@ -94,6 +95,13 @@ class AnthropicConfig(BaseConfig):
            "user",
        ]
    def get_json_schema_from_pydantic_object(
        self, response_format: Union[Any, Dict, None]
    ) -> Optional[dict]:
        return type_to_response_format_param(
            response_format, ref_template="/$defs/{model}"
        )  # Relevant issue: https://github.com/BerriAI/litellm/issues/7755
    def get_cache_control_headers(self) -> dict:
        return {
            "anthropic-version": "2023-06-01",
--- a/litellm/llms/base_llm/base_utils.py
+++ b/litellm/llms/base_llm/base_utils.py
@ -1,5 +1,8 @@
 from abc import ABC, abstractmethod
-from typing import List, Optional
+from typing import List, Optional, Type, Union
 from openai.lib import _parsing, _pydantic
 from pydantic import BaseModel
 from litellm.types.utils import ModelInfoBase
@ -26,3 +29,39 @@ class BaseLLMModelInfo(ABC):
    @abstractmethod
    def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
        pass
 def type_to_response_format_param(
    response_format: Optional[Union[Type[BaseModel], dict]],
    ref_template: Optional[str] = None,
 ) -> Optional[dict]:
    """
    Re-implementation of openai's 'type_to_response_format_param' function
    Used for converting pydantic object to api schema.
    """
    if response_format is None:
        return None
    if isinstance(response_format, dict):
        return response_format
    # type checkers don't narrow the negation of a `TypeGuard` as it isn't
    # a safe default behaviour but we know that at this point the `response_format`
    # can only be a `type`
    if not _parsing._completions.is_basemodel_type(response_format):
        raise TypeError(f"Unsupported response_format type - {response_format}")
    if ref_template is not None:
        schema = response_format.model_json_schema(ref_template=ref_template)
    else:
        schema = _pydantic.to_strict_json_schema(response_format)
    return {
        "type": "json_schema",
        "json_schema": {
            "schema": schema,
            "name": response_format.__name__,
            "strict": True,
        },
    }
--- a/litellm/llms/base_llm/chat/transformation.py
+++ b/litellm/llms/base_llm/chat/transformation.py
@ -4,13 +4,25 @@ Common base config for all LLM providers
 import types
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, AsyncIterator, Iterator, List, Optional, Union
+from typing import (
    TYPE_CHECKING,
    Any,
    AsyncIterator,
    Iterator,
    List,
    Optional,
    Type,
    Union,
 )
 import httpx
 from pydantic import BaseModel
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import ModelResponse
 from ..base_utils import type_to_response_format_param
 if TYPE_CHECKING:
    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
@ -71,6 +83,11 @@ class BaseConfig(ABC):
            and v is not None
        }
    def get_json_schema_from_pydantic_object(
        self, response_format: Optional[Union[Type[BaseModel], dict]]
    ) -> Optional[dict]:
        return type_to_response_format_param(response_format=response_format)
    def should_fake_stream(
        self,
        model: Optional[str],
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -31,7 +31,14 @@ from litellm.types.llms.openai import (
 from litellm.types.utils import ModelResponse, Usage
 from litellm.utils import CustomStreamWrapper, add_dummy_tool, has_tool_call_blocks
-from ..common_utils import BedrockError, get_bedrock_tool_name
+from ..common_utils import (
    AmazonBedrockGlobalConfig,
    BedrockError,
    get_bedrock_tool_name,
 )
 global_config = AmazonBedrockGlobalConfig()
 all_global_regions = global_config.get_all_regions()
 class AmazonConverseConfig:
@ -573,13 +580,24 @@ class AmazonConverseConfig:
        Handle model names like - "us.meta.llama3-2-11b-instruct-v1:0" -> "meta.llama3-2-11b-instruct-v1"
        AND "meta.llama3-2-11b-instruct-v1:0" -> "meta.llama3-2-11b-instruct-v1"
        """
        if model.startswith("bedrock/"):
-            model = model.split("/")[1]
+            model = model.split("/", 1)[1]
        if model.startswith("converse/"):
-            model = model.split("/")[1]
+            model = model.split("/", 1)[1]
        potential_region = model.split(".", 1)[0]
        alt_potential_region = model.split("/", 1)[
            0
        ]  # in model cost map we store regional information like `/us-west-2/bedrock-model`
        if potential_region in self._supported_cross_region_inference_region():
            return model.split(".", 1)[1]
        elif (
            alt_potential_region in all_global_regions and len(model.split("/", 1)) > 1
        ):
            return model.split("/", 1)[1]
        return model
--- a/litellm/llms/bedrock/common_utils.py
+++ b/litellm/llms/bedrock/common_utils.py
@ -42,16 +42,35 @@ class AmazonBedrockGlobalConfig:
                optional_params[mapped_params[param]] = value
        return optional_params
    def get_all_regions(self) -> List[str]:
        return (
            self.get_us_regions()
            + self.get_eu_regions()
            + self.get_ap_regions()
            + self.get_ca_regions()
            + self.get_sa_regions()
        )
    def get_ap_regions(self) -> List[str]:
        return ["ap-northeast-1", "ap-northeast-2", "ap-northeast-3", "ap-south-1"]
    def get_sa_regions(self) -> List[str]:
        return ["sa-east-1"]
    def get_eu_regions(self) -> List[str]:
        """
        Source: https://www.aws-services.info/bedrock.html
        """
        return [
            "eu-west-1",
            "eu-west-2",
            "eu-west-3",
            "eu-central-1",
        ]
    def get_ca_regions(self) -> List[str]:
        return ["ca-central-1"]
    def get_us_regions(self) -> List[str]:
        """
        Source: https://www.aws-services.info/bedrock.html
@ -59,6 +78,7 @@ class AmazonBedrockGlobalConfig:
        return [
            "us-east-2",
            "us-east-1",
            "us-west-1",
            "us-west-2",
            "us-gov-west-1",
        ]
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -5364,7 +5364,8 @@
        "input_cost_per_token": 0.000008,
        "output_cost_per_token": 0.000024,
        "litellm_provider": "bedrock",
-        "mode": "chat"
+        "mode": "chat",
        "supports_function_calling": true
    },
    "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": {
        "max_tokens": 8191,
@ -5456,7 +5457,8 @@
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_assistant_prefill": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true, 
        "supports_response_schema": true
    },
    "anthropic.claude-3-haiku-20240307-v1:0": {
        "max_tokens": 4096, 
@ -5524,7 +5526,9 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
-        "supports_assistant_prefill": true
+        "supports_assistant_prefill": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true
    },
    "us.anthropic.claude-3-haiku-20240307-v1:0": {
        "max_tokens": 4096,
@ -5546,7 +5550,8 @@
        "litellm_provider": "bedrock",
        "mode": "chat",
        "supports_assistant_prefill": true,
-        "supports_function_calling": true
+        "supports_function_calling": true,
        "supports_prompt_caching": true
    },
    "us.anthropic.claude-3-opus-20240229-v1:0": {
        "max_tokens": 4096,
@ -5591,7 +5596,9 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
-        "supports_assistant_prefill": true
+        "supports_assistant_prefill": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true
    },
    "eu.anthropic.claude-3-haiku-20240307-v1:0": {
        "max_tokens": 4096,
@ -5612,7 +5619,10 @@
        "output_cost_per_token": 0.000005,
        "litellm_provider": "bedrock",
        "mode": "chat",
-        "supports_function_calling": true
+        "supports_function_calling": true,
        "supports_assistant_prefill": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true
    },
    "eu.anthropic.claude-3-opus-20240229-v1:0": {
        "max_tokens": 4096,
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,5 +1,5 @@
 model_list:
-  - model_name: embedding-small
+  - model_name: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
    litellm_params:
-      model: openai/text-embedding-3-small
+      model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
-      
+      
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -178,7 +178,10 @@ from openai import OpenAIError as OriginalError
 from litellm.llms.base_llm.audio_transcription.transformation import (
    BaseAudioTranscriptionConfig,
 )
-from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
+from litellm.llms.base_llm.base_utils import (
    BaseLLMModelInfo,
    type_to_response_format_param,
 )
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.base_llm.completion.transformation import BaseTextCompletionConfig
 from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
@ -1474,7 +1477,7 @@ def create_pretrained_tokenizer(
    try:
        tokenizer = Tokenizer.from_pretrained(
-            identifier, revision=revision, auth_token=auth_token
+            identifier, revision=revision, auth_token=auth_token  # type: ignore
        )
    except Exception as e:
        verbose_logger.error(
@ -2773,11 +2776,26 @@ def get_optional_params(  # noqa: PLR0915
                    message=f"Function calling is not supported by {custom_llm_provider}.",
                )
-    if "response_format" in non_default_params:
+    provider_config: Optional[BaseConfig] = None
-        non_default_params["response_format"] = type_to_response_format_param(
+    if custom_llm_provider is not None and custom_llm_provider in [
-            response_format=non_default_params["response_format"]
+        provider.value for provider in LlmProviders
    ]:
        provider_config = ProviderConfigManager.get_provider_chat_config(
            model=model, provider=LlmProviders(custom_llm_provider)
        )
    if "response_format" in non_default_params:
        if provider_config is not None:
            non_default_params["response_format"] = (
                provider_config.get_json_schema_from_pydantic_object(
                    response_format=non_default_params["response_format"]
                )
            )
        else:
            non_default_params["response_format"] = type_to_response_format_param(
                response_format=non_default_params["response_format"]
            )
    if "tools" in non_default_params and isinstance(
        non_default_params, list
    ):  # fixes https://github.com/BerriAI/litellm/issues/4933
@ -2835,13 +2853,6 @@ def get_optional_params(  # noqa: PLR0915
                    message=f"{custom_llm_provider} does not support parameters: {unsupported_params}, for model={model}. To drop these, set `litellm.drop_params=True` or for proxy:\n\n`litellm_settings:\n drop_params: true`\n",
                )
    provider_config: Optional[BaseConfig] = None
    if custom_llm_provider is not None and custom_llm_provider in [
        provider.value for provider in LlmProviders
    ]:
        provider_config = ProviderConfigManager.get_provider_chat_config(
            model=model, provider=LlmProviders(custom_llm_provider)
        )
    supported_params = get_supported_openai_params(
        model=model, custom_llm_provider=custom_llm_provider
    )
@ -4964,36 +4975,6 @@ def _should_retry(status_code: int):
    return False
 def type_to_response_format_param(
    response_format: Optional[Union[Type[BaseModel], dict]],
 ) -> Optional[dict]:
    """
    Re-implementation of openai's 'type_to_response_format_param' function
    Used for converting pydantic object to api schema.
    """
    if response_format is None:
        return None
    if isinstance(response_format, dict):
        return response_format
    # type checkers don't narrow the negation of a `TypeGuard` as it isn't
    # a safe default behaviour but we know that at this point the `response_format`
    # can only be a `type`
    if not _parsing._completions.is_basemodel_type(response_format):
        raise TypeError(f"Unsupported response_format type - {response_format}")
    return {
        "type": "json_schema",
        "json_schema": {
            "schema": _pydantic.to_strict_json_schema(response_format),
            "name": response_format.__name__,
            "strict": True,
        },
    }
 def _get_retry_after_from_exception_header(
    response_headers: Optional[httpx.Headers] = None,
 ):
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -5364,7 +5364,8 @@
        "input_cost_per_token": 0.000008,
        "output_cost_per_token": 0.000024,
        "litellm_provider": "bedrock",
-        "mode": "chat"
+        "mode": "chat",
        "supports_function_calling": true
    },
    "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": {
        "max_tokens": 8191,
@ -5456,7 +5457,8 @@
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_assistant_prefill": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true, 
        "supports_response_schema": true
    },
    "anthropic.claude-3-haiku-20240307-v1:0": {
        "max_tokens": 4096, 
@ -5524,7 +5526,9 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
-        "supports_assistant_prefill": true
+        "supports_assistant_prefill": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true
    },
    "us.anthropic.claude-3-haiku-20240307-v1:0": {
        "max_tokens": 4096,
@ -5546,7 +5550,8 @@
        "litellm_provider": "bedrock",
        "mode": "chat",
        "supports_assistant_prefill": true,
-        "supports_function_calling": true
+        "supports_function_calling": true,
        "supports_prompt_caching": true
    },
    "us.anthropic.claude-3-opus-20240229-v1:0": {
        "max_tokens": 4096,
@ -5591,7 +5596,9 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
-        "supports_assistant_prefill": true
+        "supports_assistant_prefill": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true
    },
    "eu.anthropic.claude-3-haiku-20240307-v1:0": {
        "max_tokens": 4096,
@ -5612,7 +5619,10 @@
        "output_cost_per_token": 0.000005,
        "litellm_provider": "bedrock",
        "mode": "chat",
-        "supports_function_calling": true
+        "supports_function_calling": true,
        "supports_assistant_prefill": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true
    },
    "eu.anthropic.claude-3-opus-20240229-v1:0": {
        "max_tokens": 4096,
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@ -259,6 +259,59 @@ class BaseLLMChatTest(ABC):
        except litellm.InternalServerError:
            pytest.skip("Model is overloaded")
    @pytest.mark.flaky(retries=6, delay=1)
    def test_json_response_pydantic_obj_nested_obj(self):
        litellm.set_verbose = True
        from pydantic import BaseModel
        from litellm.utils import supports_response_schema
        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
        litellm.model_cost = litellm.get_model_cost_map(url="")
    @pytest.mark.flaky(retries=6, delay=1)
    def test_json_response_nested_pydantic_obj(self):
        from pydantic import BaseModel
        from litellm.utils import supports_response_schema
        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
        litellm.model_cost = litellm.get_model_cost_map(url="")
        class CalendarEvent(BaseModel):
            name: str
            date: str
            participants: list[str]
        class EventsList(BaseModel):
            events: list[CalendarEvent]
        messages = [
            {"role": "user", "content": "List 5 important events in the XIX century"}
        ]
        base_completion_call_args = self.get_base_completion_call_args()
        if not supports_response_schema(base_completion_call_args["model"], None):
            pytest.skip(
                f"Model={base_completion_call_args['model']} does not support response schema"
            )
        try:
            res = self.completion_function(
                **base_completion_call_args,
                messages=messages,
                response_format=EventsList,
                timeout=60,
            )
            assert res is not None
            print(res.choices[0].message)
            assert res.choices[0].message.content is not None
            assert res.choices[0].message.tool_calls is None
        except litellm.Timeout:
            pytest.skip("Model took too long to respond")
        except litellm.InternalServerError:
            pytest.skip("Model is overloaded")
    @pytest.mark.flaky(retries=6, delay=1)
    def test_json_response_format_stream(self):
        """
--- a/tests/local_testing/test_get_model_info.py
+++ b/tests/local_testing/test_get_model_info.py
@ -307,3 +307,35 @@ def test_get_model_info_custom_model_router():
    info = get_model_info("openai/meta-llama/Meta-Llama-3-8B-Instruct")
    print("info", info)
    assert info is not None
 def test_get_model_info_bedrock_models():
    """
    Check for drift in base model info for bedrock models and regional model info for bedrock models.
    """
    from litellm import AmazonConverseConfig
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")
    for k, v in litellm.model_cost.items():
        if v["litellm_provider"] == "bedrock":
            k = k.replace("*/", "")
            potential_commitments = [
                "1-month-commitment",
                "3-month-commitment",
                "6-month-commitment",
            ]
            if any(commitment in k for commitment in potential_commitments):
                for commitment in potential_commitments:
                    k = k.replace(f"{commitment}/", "")
            base_model = AmazonConverseConfig()._get_base_model(k)
            base_model_info = litellm.model_cost[base_model]
            for base_model_key, base_model_value in base_model_info.items():
                if base_model_key.startswith("supports_"):
                    assert (
                        base_model_key in v
                    ), f"{base_model_key} is not in model cost map for {k}"
                    assert (
                        v[base_model_key] == base_model_value
                    ), f"{base_model_key} is not equal to {base_model_value} for model {k}"
--- a/tests/local_testing/test_utils.py
+++ b/tests/local_testing/test_utils.py
@ -1471,3 +1471,12 @@ def test_pick_cheapest_chat_model_from_llm_provider():
    assert len(pick_cheapest_chat_models_from_llm_provider("openai", n=3)) == 3
    assert len(pick_cheapest_chat_models_from_llm_provider("unknown", n=1)) == 0
 def test_get_potential_model_names():
    from litellm.utils import _get_potential_model_names
    assert _get_potential_model_names(
        model="bedrock/ap-northeast-1/anthropic.claude-instant-v1",
        custom_llm_provider="bedrock",
    )