Fix bedrock passing response_format: {"type": "text"} (#8900)

* fix(converse_transformation.py): ignore type: text, value in response_format no-op for bedrock * fix(converse_transformation.py): handle adding response format value to tools * fix(base_invoke_transformation.py): fix 'get_bedrock_invoke_provider' to handle cross-region-inferencing models * test(test_bedrock_completion.py): add unit testing for bedrock invoke provider logic * test: update test * fix(exception_mapping_utils.py): add context window exceeded error handling for databricks provider route * fix(fireworks_ai/): support passing tools + response_format together * fix: cleanup * fix(base_invoke_transformation.py): fix imports
2025-04-26 11:14:04 +00:00 · 2025-02-28 20:09:59 -08:00 · 2025-02-28 20:09:59 -08:00 · c84b489d58
commit c84b489d58
parent c8dc4f3eec
8 changed files with 194 additions and 24 deletions
--- a/litellm/litellm_core_utils/exception_mapping_utils.py
+++ b/litellm/litellm_core_utils/exception_mapping_utils.py
@ -278,6 +278,7 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                    "This model's maximum context length is" in error_str
                    or "string too long. Expected a string with maximum length"
                    in error_str
                    or "model's maximum context limit" in error_str
                ):
                    exception_mapping_worked = True
                    raise ContextWindowExceededError(
@ -692,6 +693,13 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                        response=getattr(original_exception, "response", None),
                        litellm_debug_info=extra_information,
                    )
                elif "model's maximum context limit" in error_str:
                    exception_mapping_worked = True
                    raise ContextWindowExceededError(
                        message=f"{custom_llm_provider}Exception: Context Window Error - {error_str}",
                        model=model,
                        llm_provider=custom_llm_provider,
                    )
                elif "token_quota_reached" in error_str:
                    exception_mapping_worked = True
                    raise RateLimitError(
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -293,18 +293,6 @@ class AnthropicConfig(BaseConfig):
                new_stop = new_v
        return new_stop
    def _add_tools_to_optional_params(
        self, optional_params: dict, tools: List[AllAnthropicToolsValues]
    ) -> dict:
        if "tools" not in optional_params:
            optional_params["tools"] = tools
        else:
            optional_params["tools"] = [
                *optional_params["tools"],
                *tools,
            ]
        return optional_params
    def map_openai_params(
        self,
        non_default_params: dict,
--- a/litellm/llms/base_llm/chat/transformation.py
+++ b/litellm/llms/base_llm/chat/transformation.py
@ -111,6 +111,19 @@ class BaseConfig(ABC):
        """
        return False
    def _add_tools_to_optional_params(self, optional_params: dict, tools: List) -> dict:
        """
        Helper util to add tools to optional_params.
        """
        if "tools" not in optional_params:
            optional_params["tools"] = tools
        else:
            optional_params["tools"] = [
                *optional_params["tools"],
                *tools,
            ]
        return optional_params
    def translate_developer_role_to_system_role(
        self,
        messages: List[AllMessageValues],
@ -158,6 +171,7 @@ class BaseConfig(ABC):
        optional_params: dict,
        value: dict,
        is_response_format_supported: bool,
        enforce_tool_choice: bool = True,
    ) -> dict:
        """
        Follow similar approach to anthropic - translate to a single tool call.
@ -195,9 +209,11 @@ class BaseConfig(ABC):
            optional_params.setdefault("tools", [])
            optional_params["tools"].append(_tool)
-            optional_params["tool_choice"] = _tool_choice
+            if enforce_tool_choice:
                optional_params["tool_choice"] = _tool_choice
            optional_params["json_mode"] = True
-        else:
+        elif is_response_format_supported:
            optional_params["response_format"] = value
        return optional_params
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -227,6 +227,10 @@ class AmazonConverseConfig(BaseConfig):
                    json_schema = value["json_schema"]["schema"]
                    schema_name = value["json_schema"]["name"]
                    description = value["json_schema"].get("description")
                if "type" in value and value["type"] == "text":
                    continue
                """
                Follow similar approach to anthropic - translate to a single tool call. 
@ -240,7 +244,9 @@ class AmazonConverseConfig(BaseConfig):
                    schema_name=schema_name if schema_name != "" else "json_tool_call",
                    description=description,
                )
-                optional_params["tools"] = [_tool]
+                optional_params = self._add_tools_to_optional_params(
                    optional_params=optional_params, tools=[_tool]
                )
                if litellm.utils.supports_tool_choice(
                    model=model, custom_llm_provider=self.custom_llm_provider
                ):
@ -267,7 +273,9 @@ class AmazonConverseConfig(BaseConfig):
            if param == "top_p":
                optional_params["topP"] = value
            if param == "tools":
-                optional_params["tools"] = value
+                optional_params = self._add_tools_to_optional_params(
                    optional_params=optional_params, tools=value
                )
            if param == "tool_choice":
                _tool_choice_value = self.map_tool_choice_values(
                    model=model, tool_choice=value, drop_params=drop_params  # type: ignore
--- a/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
+++ b/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
@ -3,7 +3,7 @@ import json
 import time
 import urllib.parse
 from functools import partial
-from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union, cast, get_args
 import httpx
@ -531,6 +531,60 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
        """
        return False
    @staticmethod
    def get_bedrock_invoke_provider(
        model: str,
    ) -> Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL]:
        """
        Helper function to get the bedrock provider from the model
        handles 4 scenarios:
        1. model=invoke/anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
        2. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
        3. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama`
        4. model=us.amazon.nova-pro-v1:0 -> Returns `nova`
        """
        if model.startswith("invoke/"):
            model = model.replace("invoke/", "", 1)
        _split_model = model.split(".")[0]
        if _split_model in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL):
            return cast(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL, _split_model)
        # If not a known provider, check for pattern with two slashes
        provider = AmazonInvokeConfig._get_provider_from_model_path(model)
        if provider is not None:
            return provider
        # check if provider == "nova"
        if "nova" in model:
            return "nova"
        for provider in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL):
            if provider in model:
                return provider
        return None
    @staticmethod
    def _get_provider_from_model_path(
        model_path: str,
    ) -> Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL]:
        """
        Helper function to get the provider from a model path with format: provider/model-name
        Args:
            model_path (str): The model path (e.g., 'llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n' or 'anthropic/model-name')
        Returns:
            Optional[str]: The provider name, or None if no valid provider found
        """
        parts = model_path.split("/")
        if len(parts) >= 1:
            provider = parts[0]
            if provider in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL):
                return cast(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL, provider)
        return None
    def get_bedrock_model_id(
        self,
        optional_params: dict,
--- a/litellm/llms/fireworks_ai/chat/transformation.py
+++ b/litellm/llms/fireworks_ai/chat/transformation.py
@ -90,6 +90,11 @@ class FireworksAIConfig(OpenAIGPTConfig):
    ) -> dict:
        supported_openai_params = self.get_supported_openai_params(model=model)
        is_tools_set = any(
            param == "tools" and value is not None
            for param, value in non_default_params.items()
        )
        for param, value in non_default_params.items():
            if param == "tool_choice":
                if value == "required":
@ -98,18 +103,30 @@ class FireworksAIConfig(OpenAIGPTConfig):
                else:
                    # pass through the value of tool choice
                    optional_params["tool_choice"] = value
-            elif (
+            elif param == "response_format":
-                param == "response_format" and value.get("type", None) == "json_schema"
+
-            ):
+                if (
-                optional_params["response_format"] = {
+                    is_tools_set
-                    "type": "json_object",
+                ):  # fireworks ai doesn't support tools and response_format together
-                    "schema": value["json_schema"]["schema"],
+                    optional_params = self._add_response_format_to_tools(
-                }
+                        optional_params=optional_params,
                        value=value,
                        is_response_format_supported=False,
                        enforce_tool_choice=False,  # tools and response_format are both set, don't enforce tool_choice
                    )
                elif "json_schema" in value:
                    optional_params["response_format"] = {
                        "type": "json_object",
                        "schema": value["json_schema"]["schema"],
                    }
                else:
                    optional_params["response_format"] = value
            elif param == "max_completion_tokens":
                optional_params["max_tokens"] = value
            elif param in supported_openai_params:
                if value is not None:
                    optional_params[param] = value
        return optional_params
    def _add_transform_inline_image_block(
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@ -254,6 +254,56 @@ class BaseLLMChatTest(ABC):
        # relevant issue: https://github.com/BerriAI/litellm/issues/6741
        assert response.choices[0].message.content is not None
    @pytest.mark.parametrize(
        "response_format",
        [
            {"type": "text"},
        ],
    )
    @pytest.mark.flaky(retries=6, delay=1)
    def test_response_format_type_text_with_tool_calls_no_tool_choice(
        self, response_format
    ):
        base_completion_call_args = self.get_base_completion_call_args()
        messages = [
            {"role": "user", "content": "What's the weather like in Boston today?"},
        ]
        tools = [
            {
                "type": "function",
                "function": {
                    "name": "get_current_weather",
                    "description": "Get the current weather in a given location",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "location": {
                                "type": "string",
                                "description": "The city and state, e.g. San Francisco, CA",
                            },
                            "unit": {
                                "type": "string",
                                "enum": ["celsius", "fahrenheit"],
                            },
                        },
                        "required": ["location"],
                    },
                },
            }
        ]
        try:
            response = self.completion_function(
                **base_completion_call_args,
                messages=messages,
                response_format=response_format,
                tools=tools,
                drop_params=True,
            )
        except litellm.ContextWindowExceededError:
            pytest.skip("Model exceeded context window")
        assert response is not None
    def test_response_format_type_text(self):
        """
        Test that the response format type text does not lead to tool calls
@ -287,6 +337,7 @@ class BaseLLMChatTest(ABC):
        print(f"translated_params={translated_params}")
    @pytest.mark.flaky(retries=6, delay=1)
    def test_json_response_pydantic_obj(self):
        litellm.set_verbose = True
--- a/tests/llm_translation/test_bedrock_completion.py
+++ b/tests/llm_translation/test_bedrock_completion.py
@ -2717,6 +2717,33 @@ def test_bedrock_top_k_param(model, expected_params):
            assert data["additionalModelRequestFields"] == expected_params
 def test_bedrock_invoke_provider():
    assert (
        litellm.AmazonInvokeConfig().get_bedrock_invoke_provider(
            "bedrock/invoke/us.anthropic.claude-3-5-sonnet-20240620-v1:0"
        )
        == "anthropic"
    )
    assert (
        litellm.AmazonInvokeConfig().get_bedrock_invoke_provider(
            "bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0"
        )
        == "anthropic"
    )
    assert (
        litellm.AmazonInvokeConfig().get_bedrock_invoke_provider(
            "bedrock/llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n"
        )
        == "llama"
    )
    assert (
        litellm.AmazonInvokeConfig().get_bedrock_invoke_provider(
            "us.amazon.nova-pro-v1:0"
        )
        == "nova"
    )
 def test_bedrock_description_param():
    from litellm import completion
    from litellm.llms.custom_httpx.http_handler import HTTPHandler
@ -2754,3 +2781,4 @@ def test_bedrock_description_param():
        assert (
            "Find the meaning inside a poem" in request_body_str
        )  # assert description is passed