(Feat) - Add /bedrock/meta.llama3-3-70b-instruct-v1:0 tool calling support + cost tracking + base llm unit test for tool calling (#8545)

* Add support for bedrock meta.llama3-3-70b-instruct-v1:0 tool calling (#8512) * fix(converse_transformation.py): fixing bedrock meta.llama3-3-70b tool calling * test(test_bedrock_completion.py): adding llama3.3 tool compatibility check * add TestBedrockTestSuite * add bedrock llama 3.3 to base llm class * us.meta.llama3-3-70b-instruct-v1:0 * test_basic_tool_calling * TestAzureOpenAIO1 * test_basic_tool_calling * test_basic_tool_calling --------- Co-authored-by: miraclebakelaser <65143272+miraclebakelaser@users.noreply.github.com>
2025-04-25 18:54:30 +00:00 · 2025-02-14 14:15:25 -08:00 · 2025-02-14 14:15:25 -08:00 · 125f6fff67
commit 125f6fff67
parent ce2c618aad
7 changed files with 154 additions and 2 deletions
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -105,6 +105,7 @@ class AmazonConverseConfig(BaseConfig):
            or base_model.startswith("cohere")
            or base_model.startswith("meta.llama3-1")
            or base_model.startswith("meta.llama3-2")
            or base_model.startswith("meta.llama3-3")
            or base_model.startswith("amazon.nova")
        ):
            supported_params.append("tools")
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -7095,7 +7095,9 @@
        "input_cost_per_token": 0.00000072,
        "output_cost_per_token": 0.00000072,
        "litellm_provider": "bedrock_converse",
-        "mode": "chat"
+        "mode": "chat",
        "supports_function_calling": true, 
        "supports_tool_choice": false
    },
    "meta.llama2-13b-chat-v1": {
        "max_tokens": 4096, 
@ -7435,6 +7437,17 @@
        "supports_function_calling": true, 
        "supports_tool_choice": false
    },
    "us.meta.llama3-3-70b-instruct-v1:0": {
        "max_tokens": 4096, 
        "max_input_tokens": 128000,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.00000072,
        "output_cost_per_token": 0.00000072,
        "litellm_provider": "bedrock_converse",
        "mode": "chat",
        "supports_function_calling": true, 
        "supports_tool_choice": false
    },
    "512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
        "max_tokens": 77, 
        "max_input_tokens": 77, 
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -7095,7 +7095,9 @@
        "input_cost_per_token": 0.00000072,
        "output_cost_per_token": 0.00000072,
        "litellm_provider": "bedrock_converse",
-        "mode": "chat"
+        "mode": "chat",
        "supports_function_calling": true, 
        "supports_tool_choice": false
    },
    "meta.llama2-13b-chat-v1": {
        "max_tokens": 4096, 
@ -7435,6 +7437,17 @@
        "supports_function_calling": true, 
        "supports_tool_choice": false
    },
    "us.meta.llama3-3-70b-instruct-v1:0": {
        "max_tokens": 4096, 
        "max_input_tokens": 128000,
        "max_output_tokens": 4096,
        "input_cost_per_token": 0.00000072,
        "output_cost_per_token": 0.00000072,
        "litellm_provider": "bedrock_converse",
        "mode": "chat",
        "supports_function_calling": true, 
        "supports_tool_choice": false
    },
    "512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
        "max_tokens": 77, 
        "max_input_tokens": 77, 
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@ -634,6 +634,107 @@ class BaseLLMChatTest(ABC):
        return url
    def test_basic_tool_calling(self):
        try:
            from litellm import completion, ModelResponse
            litellm.set_verbose = True
            litellm._turn_on_debug()
            from litellm.utils import supports_function_calling
            os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
            litellm.model_cost = litellm.get_model_cost_map(url="")
            base_completion_call_args = self.get_base_completion_call_args()
            if not supports_function_calling(base_completion_call_args["model"], None):
                print("Model does not support function calling")
                pytest.skip("Model does not support function calling")
            tools = [
                {
                    "type": "function",
                    "function": {
                        "name": "get_current_weather",
                        "description": "Get the current weather in a given location",
                        "parameters": {
                            "type": "object",
                            "properties": {
                                "location": {
                                    "type": "string",
                                    "description": "The city and state, e.g. San Francisco, CA",
                                },
                                "unit": {
                                    "type": "string",
                                    "enum": ["celsius", "fahrenheit"],
                                },
                            },
                            "required": ["location"],
                        },
                    },
                }
            ]
            messages = [
                {
                    "role": "user",
                    "content": "What's the weather like in Boston today in fahrenheit?",
                }
            ]
            request_args = {
                "messages": messages,
                "tools": tools,
            }
            request_args.update(self.get_base_completion_call_args())
            response: ModelResponse = completion(**request_args)  # type: ignore
            print(f"response: {response}")
            assert response is not None
            # if the provider did not return any tool calls do not make a subsequent llm api call
            if response.choices[0].message.tool_calls is None:
                return
            # Add any assertions here to check the response
            assert isinstance(
                response.choices[0].message.tool_calls[0].function.name, str
            )
            assert isinstance(
                response.choices[0].message.tool_calls[0].function.arguments, str
            )
            messages.append(
                response.choices[0].message.model_dump()
            )  # Add assistant tool invokes
            tool_result = (
                '{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
            )
            # Add user submitted tool results in the OpenAI format
            messages.append(
                {
                    "tool_call_id": response.choices[0].message.tool_calls[0].id,
                    "role": "tool",
                    "name": response.choices[0].message.tool_calls[0].function.name,
                    "content": tool_result,
                }
            )
            # In the second response, Claude should deduce answer from tool results
            request_2_args = {
                "messages": messages,
                "tools": tools,
            }
            request_2_args.update(self.get_base_completion_call_args())
            second_response: ModelResponse = completion(**request_2_args)  # type: ignore
            print(f"second response: {second_response}")
            assert second_response is not None
            # either content or tool calls should be present
            assert (
                second_response.choices[0].message.content is not None
                or second_response.choices[0].message.tool_calls is not None
            )
        except litellm.RateLimitError:
            pass
        except Exception as e:
            pytest.fail(f"Error occurred: {e}")
    @pytest.mark.asyncio
    async def test_completion_cost(self):
        from litellm import completion_cost
--- a/tests/llm_translation/test_azure_o_series.py
+++ b/tests/llm_translation/test_azure_o_series.py
@ -39,6 +39,9 @@ class TestAzureOpenAIO1(BaseOSeriesModelsTest, BaseLLMChatTest):
        """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
        pass
    def test_basic_tool_calling(self):
        pass
    def test_prompt_caching(self):
        """Temporary override. o1 prompt caching is not working."""
        pass
--- a/tests/llm_translation/test_bedrock_completion.py
+++ b/tests/llm_translation/test_bedrock_completion.py
@ -2092,6 +2092,7 @@ def test_bedrock_prompt_caching_message(messages, expected_cache_control):
        ("bedrock/mistral.mistral-7b-instruct-v0.1:0", True),
        ("bedrock/meta.llama3-1-8b-instruct:0", True),
        ("bedrock/meta.llama3-2-70b-instruct:0", True),
        ("bedrock/meta.llama3-3-70b-instruct-v1:0", True),
        ("bedrock/amazon.titan-embed-text-v1:0", False),
    ],
 )
--- a/tests/llm_translation/test_bedrock_llama.py
+++ b/tests/llm_translation/test_bedrock_llama.py
@ -0,0 +1,20 @@
 from base_llm_unit_tests import BaseLLMChatTest
 import pytest
 import sys
 import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 class TestBedrockTestSuite(BaseLLMChatTest):
    def test_tool_call_no_arguments(self, tool_call_no_arguments):
        pass
    def get_base_completion_call_args(self) -> dict:
        litellm._turn_on_debug()
        return {
            "model": "bedrock/converse/us.meta.llama3-3-70b-instruct-v1:0",
        }