From 6eb2346fd64a819e8f1b7445e4b94e0e093201c6 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 17 Jan 2025 19:49:12 -0800
Subject: [PATCH] QA: ensure all bedrock regional models have same `supported_`
  as base + Anthropic nested pydantic object support (#7844)

* build: ensure all regional bedrock models have same supported values as base bedrock model

prevents drift

* test(base_llm_unit_tests.py): add testing for nested pydantic objects

* fix(test_utils.py): add test_get_potential_model_names

* fix(anthropic/chat/transformation.py): support nested pydantic objects

Fixes https://github.com/BerriAI/litellm/issues/7755
---
 litellm/llms/anthropic/chat/transformation.py |  8 +++
 litellm/llms/base_llm/base_utils.py           | 41 +++++++++++-
 litellm/llms/base_llm/chat/transformation.py  | 19 +++++-
 .../bedrock/chat/converse_transformation.py   | 24 ++++++-
 litellm/llms/bedrock/common_utils.py          | 20 ++++++
 ...odel_prices_and_context_window_backup.json | 22 +++++--
 litellm/proxy/_new_secret_config.yaml         |  6 +-
 litellm/utils.py                              | 65 +++++++------------
 model_prices_and_context_window.json          | 22 +++++--
 tests/llm_translation/base_llm_unit_tests.py  | 53 +++++++++++++++
 tests/local_testing/test_get_model_info.py    | 32 +++++++++
 tests/local_testing/test_utils.py             |  9 +++
 12 files changed, 259 insertions(+), 62 deletions(-)

diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py
index 1075807391..29e4e0fa4e 100644
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@@ -8,6 +8,7 @@ import litellm
 from litellm.constants import RESPONSE_FORMAT_TOOL_NAME
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.litellm_core_utils.prompt_templates.factory import anthropic_messages_pt
+from litellm.llms.base_llm.base_utils import type_to_response_format_param
 from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
 from litellm.types.llms.anthropic import (
     AllAnthropicToolsValues,
@@ -94,6 +95,13 @@ class AnthropicConfig(BaseConfig):
             "user",
         ]
 
+    def get_json_schema_from_pydantic_object(
+        self, response_format: Union[Any, Dict, None]
+    ) -> Optional[dict]:
+        return type_to_response_format_param(
+            response_format, ref_template="/$defs/{model}"
+        )  # Relevant issue: https://github.com/BerriAI/litellm/issues/7755
+
     def get_cache_control_headers(self) -> dict:
         return {
             "anthropic-version": "2023-06-01",
diff --git a/litellm/llms/base_llm/base_utils.py b/litellm/llms/base_llm/base_utils.py
index da3d96ba3d..88b3115351 100644
--- a/litellm/llms/base_llm/base_utils.py
+++ b/litellm/llms/base_llm/base_utils.py
@@ -1,5 +1,8 @@
 from abc import ABC, abstractmethod
-from typing import List, Optional
+from typing import List, Optional, Type, Union
+
+from openai.lib import _parsing, _pydantic
+from pydantic import BaseModel
 
 from litellm.types.utils import ModelInfoBase
 
@@ -26,3 +29,39 @@ class BaseLLMModelInfo(ABC):
     @abstractmethod
     def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
         pass
+
+
+def type_to_response_format_param(
+    response_format: Optional[Union[Type[BaseModel], dict]],
+    ref_template: Optional[str] = None,
+) -> Optional[dict]:
+    """
+    Re-implementation of openai's 'type_to_response_format_param' function
+
+    Used for converting pydantic object to api schema.
+    """
+    if response_format is None:
+        return None
+
+    if isinstance(response_format, dict):
+        return response_format
+
+    # type checkers don't narrow the negation of a `TypeGuard` as it isn't
+    # a safe default behaviour but we know that at this point the `response_format`
+    # can only be a `type`
+    if not _parsing._completions.is_basemodel_type(response_format):
+        raise TypeError(f"Unsupported response_format type - {response_format}")
+
+    if ref_template is not None:
+        schema = response_format.model_json_schema(ref_template=ref_template)
+    else:
+        schema = _pydantic.to_strict_json_schema(response_format)
+
+    return {
+        "type": "json_schema",
+        "json_schema": {
+            "schema": schema,
+            "name": response_format.__name__,
+            "strict": True,
+        },
+    }
diff --git a/litellm/llms/base_llm/chat/transformation.py b/litellm/llms/base_llm/chat/transformation.py
index 2d96451239..85ca3fe8b9 100644
--- a/litellm/llms/base_llm/chat/transformation.py
+++ b/litellm/llms/base_llm/chat/transformation.py
@@ -4,13 +4,25 @@ Common base config for all LLM providers
 
 import types
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, AsyncIterator, Iterator, List, Optional, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncIterator,
+    Iterator,
+    List,
+    Optional,
+    Type,
+    Union,
+)
 
 import httpx
+from pydantic import BaseModel
 
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import ModelResponse
 
+from ..base_utils import type_to_response_format_param
+
 if TYPE_CHECKING:
     from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
 
@@ -71,6 +83,11 @@ class BaseConfig(ABC):
             and v is not None
         }
 
+    def get_json_schema_from_pydantic_object(
+        self, response_format: Optional[Union[Type[BaseModel], dict]]
+    ) -> Optional[dict]:
+        return type_to_response_format_param(response_format=response_format)
+
     def should_fake_stream(
         self,
         model: Optional[str],
diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py
index e50159a8fc..b4f1ea3d3c 100644
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@@ -31,7 +31,14 @@ from litellm.types.llms.openai import (
 from litellm.types.utils import ModelResponse, Usage
 from litellm.utils import CustomStreamWrapper, add_dummy_tool, has_tool_call_blocks
 
-from ..common_utils import BedrockError, get_bedrock_tool_name
+from ..common_utils import (
+    AmazonBedrockGlobalConfig,
+    BedrockError,
+    get_bedrock_tool_name,
+)
+
+global_config = AmazonBedrockGlobalConfig()
+all_global_regions = global_config.get_all_regions()
 
 
 class AmazonConverseConfig:
@@ -573,13 +580,24 @@ class AmazonConverseConfig:
         Handle model names like - "us.meta.llama3-2-11b-instruct-v1:0" -> "meta.llama3-2-11b-instruct-v1"
         AND "meta.llama3-2-11b-instruct-v1:0" -> "meta.llama3-2-11b-instruct-v1"
         """
+
         if model.startswith("bedrock/"):
-            model = model.split("/")[1]
+            model = model.split("/", 1)[1]
 
         if model.startswith("converse/"):
-            model = model.split("/")[1]
+            model = model.split("/", 1)[1]
 
         potential_region = model.split(".", 1)[0]
+
+        alt_potential_region = model.split("/", 1)[
+            0
+        ]  # in model cost map we store regional information like `/us-west-2/bedrock-model`
+
         if potential_region in self._supported_cross_region_inference_region():
             return model.split(".", 1)[1]
+        elif (
+            alt_potential_region in all_global_regions and len(model.split("/", 1)) > 1
+        ):
+            return model.split("/", 1)[1]
+
         return model
diff --git a/litellm/llms/bedrock/common_utils.py b/litellm/llms/bedrock/common_utils.py
index 531b202f89..7b3040f91a 100644
--- a/litellm/llms/bedrock/common_utils.py
+++ b/litellm/llms/bedrock/common_utils.py
@@ -42,16 +42,35 @@ class AmazonBedrockGlobalConfig:
                 optional_params[mapped_params[param]] = value
         return optional_params
 
+    def get_all_regions(self) -> List[str]:
+        return (
+            self.get_us_regions()
+            + self.get_eu_regions()
+            + self.get_ap_regions()
+            + self.get_ca_regions()
+            + self.get_sa_regions()
+        )
+
+    def get_ap_regions(self) -> List[str]:
+        return ["ap-northeast-1", "ap-northeast-2", "ap-northeast-3", "ap-south-1"]
+
+    def get_sa_regions(self) -> List[str]:
+        return ["sa-east-1"]
+
     def get_eu_regions(self) -> List[str]:
         """
         Source: https://www.aws-services.info/bedrock.html
         """
         return [
             "eu-west-1",
+            "eu-west-2",
             "eu-west-3",
             "eu-central-1",
         ]
 
+    def get_ca_regions(self) -> List[str]:
+        return ["ca-central-1"]
+
     def get_us_regions(self) -> List[str]:
         """
         Source: https://www.aws-services.info/bedrock.html
@@ -59,6 +78,7 @@ class AmazonBedrockGlobalConfig:
         return [
             "us-east-2",
             "us-east-1",
+            "us-west-1",
             "us-west-2",
             "us-gov-west-1",
         ]
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 7cecd91e20..ed7f036bf9 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -5364,7 +5364,8 @@
         "input_cost_per_token": 0.000008,
         "output_cost_per_token": 0.000024,
         "litellm_provider": "bedrock",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
     },
     "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": {
         "max_tokens": 8191,
@@ -5456,7 +5457,8 @@
         "supports_function_calling": true,
         "supports_vision": true,
         "supports_assistant_prefill": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true, 
+        "supports_response_schema": true
     },
     "anthropic.claude-3-haiku-20240307-v1:0": {
         "max_tokens": 4096, 
@@ -5524,7 +5526,9 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_assistant_prefill": true
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "us.anthropic.claude-3-haiku-20240307-v1:0": {
         "max_tokens": 4096,
@@ -5546,7 +5550,8 @@
         "litellm_provider": "bedrock",
         "mode": "chat",
         "supports_assistant_prefill": true,
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
     },
     "us.anthropic.claude-3-opus-20240229-v1:0": {
         "max_tokens": 4096,
@@ -5591,7 +5596,9 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_assistant_prefill": true
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "eu.anthropic.claude-3-haiku-20240307-v1:0": {
         "max_tokens": 4096,
@@ -5612,7 +5619,10 @@
         "output_cost_per_token": 0.000005,
         "litellm_provider": "bedrock",
         "mode": "chat",
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "eu.anthropic.claude-3-opus-20240229-v1:0": {
         "max_tokens": 4096,
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 5ea6dd6a6c..a8699b5eb8 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,5 +1,5 @@
 model_list:
-  - model_name: embedding-small
+  - model_name: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
     litellm_params:
-      model: openai/text-embedding-3-small
-      
\ No newline at end of file
+      model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
+      
diff --git a/litellm/utils.py b/litellm/utils.py
index 3893f947ec..94f6c0f5c6 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -178,7 +178,10 @@ from openai import OpenAIError as OriginalError
 from litellm.llms.base_llm.audio_transcription.transformation import (
     BaseAudioTranscriptionConfig,
 )
-from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
+from litellm.llms.base_llm.base_utils import (
+    BaseLLMModelInfo,
+    type_to_response_format_param,
+)
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.base_llm.completion.transformation import BaseTextCompletionConfig
 from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
@@ -1474,7 +1477,7 @@ def create_pretrained_tokenizer(
 
     try:
         tokenizer = Tokenizer.from_pretrained(
-            identifier, revision=revision, auth_token=auth_token
+            identifier, revision=revision, auth_token=auth_token  # type: ignore
         )
     except Exception as e:
         verbose_logger.error(
@@ -2773,11 +2776,26 @@ def get_optional_params(  # noqa: PLR0915
                     message=f"Function calling is not supported by {custom_llm_provider}.",
                 )
 
-    if "response_format" in non_default_params:
-        non_default_params["response_format"] = type_to_response_format_param(
-            response_format=non_default_params["response_format"]
+    provider_config: Optional[BaseConfig] = None
+    if custom_llm_provider is not None and custom_llm_provider in [
+        provider.value for provider in LlmProviders
+    ]:
+        provider_config = ProviderConfigManager.get_provider_chat_config(
+            model=model, provider=LlmProviders(custom_llm_provider)
         )
 
+    if "response_format" in non_default_params:
+        if provider_config is not None:
+            non_default_params["response_format"] = (
+                provider_config.get_json_schema_from_pydantic_object(
+                    response_format=non_default_params["response_format"]
+                )
+            )
+        else:
+            non_default_params["response_format"] = type_to_response_format_param(
+                response_format=non_default_params["response_format"]
+            )
+
     if "tools" in non_default_params and isinstance(
         non_default_params, list
     ):  # fixes https://github.com/BerriAI/litellm/issues/4933
@@ -2835,13 +2853,6 @@ def get_optional_params(  # noqa: PLR0915
                     message=f"{custom_llm_provider} does not support parameters: {unsupported_params}, for model={model}. To drop these, set `litellm.drop_params=True` or for proxy:\n\n`litellm_settings:\n drop_params: true`\n",
                 )
 
-    provider_config: Optional[BaseConfig] = None
-    if custom_llm_provider is not None and custom_llm_provider in [
-        provider.value for provider in LlmProviders
-    ]:
-        provider_config = ProviderConfigManager.get_provider_chat_config(
-            model=model, provider=LlmProviders(custom_llm_provider)
-        )
     supported_params = get_supported_openai_params(
         model=model, custom_llm_provider=custom_llm_provider
     )
@@ -4964,36 +4975,6 @@ def _should_retry(status_code: int):
     return False
 
 
-def type_to_response_format_param(
-    response_format: Optional[Union[Type[BaseModel], dict]],
-) -> Optional[dict]:
-    """
-    Re-implementation of openai's 'type_to_response_format_param' function
-
-    Used for converting pydantic object to api schema.
-    """
-    if response_format is None:
-        return None
-
-    if isinstance(response_format, dict):
-        return response_format
-
-    # type checkers don't narrow the negation of a `TypeGuard` as it isn't
-    # a safe default behaviour but we know that at this point the `response_format`
-    # can only be a `type`
-    if not _parsing._completions.is_basemodel_type(response_format):
-        raise TypeError(f"Unsupported response_format type - {response_format}")
-
-    return {
-        "type": "json_schema",
-        "json_schema": {
-            "schema": _pydantic.to_strict_json_schema(response_format),
-            "name": response_format.__name__,
-            "strict": True,
-        },
-    }
-
-
 def _get_retry_after_from_exception_header(
     response_headers: Optional[httpx.Headers] = None,
 ):
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 7cecd91e20..ed7f036bf9 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -5364,7 +5364,8 @@
         "input_cost_per_token": 0.000008,
         "output_cost_per_token": 0.000024,
         "litellm_provider": "bedrock",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true
     },
     "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": {
         "max_tokens": 8191,
@@ -5456,7 +5457,8 @@
         "supports_function_calling": true,
         "supports_vision": true,
         "supports_assistant_prefill": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true, 
+        "supports_response_schema": true
     },
     "anthropic.claude-3-haiku-20240307-v1:0": {
         "max_tokens": 4096, 
@@ -5524,7 +5526,9 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_assistant_prefill": true
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "us.anthropic.claude-3-haiku-20240307-v1:0": {
         "max_tokens": 4096,
@@ -5546,7 +5550,8 @@
         "litellm_provider": "bedrock",
         "mode": "chat",
         "supports_assistant_prefill": true,
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
     },
     "us.anthropic.claude-3-opus-20240229-v1:0": {
         "max_tokens": 4096,
@@ -5591,7 +5596,9 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_assistant_prefill": true
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "eu.anthropic.claude-3-haiku-20240307-v1:0": {
         "max_tokens": 4096,
@@ -5612,7 +5619,10 @@
         "output_cost_per_token": 0.000005,
         "litellm_provider": "bedrock",
         "mode": "chat",
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "eu.anthropic.claude-3-opus-20240229-v1:0": {
         "max_tokens": 4096,
diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py
index 590c2d10c0..58536d7831 100644
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@@ -259,6 +259,59 @@ class BaseLLMChatTest(ABC):
         except litellm.InternalServerError:
             pytest.skip("Model is overloaded")
 
+    @pytest.mark.flaky(retries=6, delay=1)
+    def test_json_response_pydantic_obj_nested_obj(self):
+        litellm.set_verbose = True
+        from pydantic import BaseModel
+        from litellm.utils import supports_response_schema
+
+        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+        litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    @pytest.mark.flaky(retries=6, delay=1)
+    def test_json_response_nested_pydantic_obj(self):
+        from pydantic import BaseModel
+        from litellm.utils import supports_response_schema
+
+        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+        litellm.model_cost = litellm.get_model_cost_map(url="")
+
+        class CalendarEvent(BaseModel):
+            name: str
+            date: str
+            participants: list[str]
+
+        class EventsList(BaseModel):
+            events: list[CalendarEvent]
+
+        messages = [
+            {"role": "user", "content": "List 5 important events in the XIX century"}
+        ]
+
+        base_completion_call_args = self.get_base_completion_call_args()
+        if not supports_response_schema(base_completion_call_args["model"], None):
+            pytest.skip(
+                f"Model={base_completion_call_args['model']} does not support response schema"
+            )
+
+        try:
+            res = self.completion_function(
+                **base_completion_call_args,
+                messages=messages,
+                response_format=EventsList,
+                timeout=60,
+            )
+            assert res is not None
+
+            print(res.choices[0].message)
+
+            assert res.choices[0].message.content is not None
+            assert res.choices[0].message.tool_calls is None
+        except litellm.Timeout:
+            pytest.skip("Model took too long to respond")
+        except litellm.InternalServerError:
+            pytest.skip("Model is overloaded")
+
     @pytest.mark.flaky(retries=6, delay=1)
     def test_json_response_format_stream(self):
         """
diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py
index a5f468d96b..10fa4782f0 100644
--- a/tests/local_testing/test_get_model_info.py
+++ b/tests/local_testing/test_get_model_info.py
@@ -307,3 +307,35 @@ def test_get_model_info_custom_model_router():
     info = get_model_info("openai/meta-llama/Meta-Llama-3-8B-Instruct")
     print("info", info)
     assert info is not None
+
+
+def test_get_model_info_bedrock_models():
+    """
+    Check for drift in base model info for bedrock models and regional model info for bedrock models.
+    """
+    from litellm import AmazonConverseConfig
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    for k, v in litellm.model_cost.items():
+        if v["litellm_provider"] == "bedrock":
+            k = k.replace("*/", "")
+            potential_commitments = [
+                "1-month-commitment",
+                "3-month-commitment",
+                "6-month-commitment",
+            ]
+            if any(commitment in k for commitment in potential_commitments):
+                for commitment in potential_commitments:
+                    k = k.replace(f"{commitment}/", "")
+            base_model = AmazonConverseConfig()._get_base_model(k)
+            base_model_info = litellm.model_cost[base_model]
+            for base_model_key, base_model_value in base_model_info.items():
+                if base_model_key.startswith("supports_"):
+                    assert (
+                        base_model_key in v
+                    ), f"{base_model_key} is not in model cost map for {k}"
+                    assert (
+                        v[base_model_key] == base_model_value
+                    ), f"{base_model_key} is not equal to {base_model_value} for model {k}"
diff --git a/tests/local_testing/test_utils.py b/tests/local_testing/test_utils.py
index 5c7fc054d0..eb3c02b862 100644
--- a/tests/local_testing/test_utils.py
+++ b/tests/local_testing/test_utils.py
@@ -1471,3 +1471,12 @@ def test_pick_cheapest_chat_model_from_llm_provider():
     assert len(pick_cheapest_chat_models_from_llm_provider("openai", n=3)) == 3
 
     assert len(pick_cheapest_chat_models_from_llm_provider("unknown", n=1)) == 0
+
+
+def test_get_potential_model_names():
+    from litellm.utils import _get_potential_model_names
+
+    assert _get_potential_model_names(
+        model="bedrock/ap-northeast-1/anthropic.claude-instant-v1",
+        custom_llm_provider="bedrock",
+    )