(Feat) - Add /bedrock/meta.llama3-3-70b-instruct-v1:0 tool calling support + cost tracking + base llm unit test for tool calling (#8545)

* Add support for bedrock meta.llama3-3-70b-instruct-v1:0 tool calling (#8512) * fix(converse_transformation.py): fixing bedrock meta.llama3-3-70b tool calling * test(test_bedrock_completion.py): adding llama3.3 tool compatibility check * add TestBedrockTestSuite * add bedrock llama 3.3 to base llm class * us.meta.llama3-3-70b-instruct-v1:0 * test_basic_tool_calling * TestAzureOpenAIO1 * test_basic_tool_calling * test_basic_tool_calling --------- Co-authored-by: miraclebakelaser <65143272+miraclebakelaser@users.noreply.github.com>
2025-04-25 02:34:29 +00:00 · 2025-02-14 14:15:25 -08:00 · 2025-02-14 14:15:25 -08:00 · 125f6fff67
commit 125f6fff67
parent ce2c618aad
7 changed files with 154 additions and 2 deletions
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -105,6 +105,7 @@ class AmazonConverseConfig(BaseConfig):
            or base_model.startswith("cohere")
            or base_model.startswith("meta.llama3-1")
            or base_model.startswith("meta.llama3-2")
+            or base_model.startswith("meta.llama3-3")
            or base_model.startswith("amazon.nova")
        ):
            supported_params.append("tools")
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -7095,7 +7095,9 @@
        "input_cost_per_token": 0.00000072,
        "output_cost_per_token": 0.00000072,
        "litellm_provider": "bedrock_converse",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true, 
+        "supports_tool_choice": false
    },
    "meta.llama2-13b-chat-v1": {
        "max_tokens": 4096, 
@ -7435,6 +7437,17 @@
        "supports_function_calling": true, 
        "supports_tool_choice": false
    },
+    "us.meta.llama3-3-70b-instruct-v1:0": {
+        "max_tokens": 4096, 
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000072,
+        "output_cost_per_token": 0.00000072,
+        "litellm_provider": "bedrock_converse",
+        "mode": "chat",
+        "supports_function_calling": true, 
+        "supports_tool_choice": false
+    },
    "512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
        "max_tokens": 77, 
        "max_input_tokens": 77, 
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -7095,7 +7095,9 @@
        "input_cost_per_token": 0.00000072,
        "output_cost_per_token": 0.00000072,
        "litellm_provider": "bedrock_converse",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_function_calling": true, 
+        "supports_tool_choice": false
    },
    "meta.llama2-13b-chat-v1": {
        "max_tokens": 4096, 
@ -7435,6 +7437,17 @@
        "supports_function_calling": true, 
        "supports_tool_choice": false
    },
+    "us.meta.llama3-3-70b-instruct-v1:0": {
+        "max_tokens": 4096, 
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000072,
+        "output_cost_per_token": 0.00000072,
+        "litellm_provider": "bedrock_converse",
+        "mode": "chat",
+        "supports_function_calling": true, 
+        "supports_tool_choice": false
+    },
    "512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
        "max_tokens": 77, 
        "max_input_tokens": 77, 
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@ -634,6 +634,107 @@ class BaseLLMChatTest(ABC):

        return url

+    def test_basic_tool_calling(self):
+        try:
+            from litellm import completion, ModelResponse
+
+            litellm.set_verbose = True
+            litellm._turn_on_debug()
+            from litellm.utils import supports_function_calling
+
+            os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+            litellm.model_cost = litellm.get_model_cost_map(url="")
+
+            base_completion_call_args = self.get_base_completion_call_args()
+            if not supports_function_calling(base_completion_call_args["model"], None):
+                print("Model does not support function calling")
+                pytest.skip("Model does not support function calling")
+
+            tools = [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_current_weather",
+                        "description": "Get the current weather in a given location",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "location": {
+                                    "type": "string",
+                                    "description": "The city and state, e.g. San Francisco, CA",
+                                },
+                                "unit": {
+                                    "type": "string",
+                                    "enum": ["celsius", "fahrenheit"],
+                                },
+                            },
+                            "required": ["location"],
+                        },
+                    },
+                }
+            ]
+            messages = [
+                {
+                    "role": "user",
+                    "content": "What's the weather like in Boston today in fahrenheit?",
+                }
+            ]
+            request_args = {
+                "messages": messages,
+                "tools": tools,
+            }
+            request_args.update(self.get_base_completion_call_args())
+            response: ModelResponse = completion(**request_args)  # type: ignore
+            print(f"response: {response}")
+
+            assert response is not None
+
+            # if the provider did not return any tool calls do not make a subsequent llm api call
+            if response.choices[0].message.tool_calls is None:
+                return
+            # Add any assertions here to check the response
+
+            assert isinstance(
+                response.choices[0].message.tool_calls[0].function.name, str
+            )
+            assert isinstance(
+                response.choices[0].message.tool_calls[0].function.arguments, str
+            )
+            messages.append(
+                response.choices[0].message.model_dump()
+            )  # Add assistant tool invokes
+            tool_result = (
+                '{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
+            )
+            # Add user submitted tool results in the OpenAI format
+            messages.append(
+                {
+                    "tool_call_id": response.choices[0].message.tool_calls[0].id,
+                    "role": "tool",
+                    "name": response.choices[0].message.tool_calls[0].function.name,
+                    "content": tool_result,
+                }
+            )
+            # In the second response, Claude should deduce answer from tool results
+            request_2_args = {
+                "messages": messages,
+                "tools": tools,
+            }
+            request_2_args.update(self.get_base_completion_call_args())
+            second_response: ModelResponse = completion(**request_2_args)  # type: ignore
+            print(f"second response: {second_response}")
+            assert second_response is not None
+
+            # either content or tool calls should be present
+            assert (
+                second_response.choices[0].message.content is not None
+                or second_response.choices[0].message.tool_calls is not None
+            )
+        except litellm.RateLimitError:
+            pass
+        except Exception as e:
+            pytest.fail(f"Error occurred: {e}")
+
    @pytest.mark.asyncio
    async def test_completion_cost(self):
        from litellm import completion_cost
--- a/tests/llm_translation/test_azure_o_series.py
+++ b/tests/llm_translation/test_azure_o_series.py
@ -39,6 +39,9 @@ class TestAzureOpenAIO1(BaseOSeriesModelsTest, BaseLLMChatTest):
        """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
        pass

+    def test_basic_tool_calling(self):
+        pass
+
    def test_prompt_caching(self):
        """Temporary override. o1 prompt caching is not working."""
        pass
--- a/tests/llm_translation/test_bedrock_completion.py
+++ b/tests/llm_translation/test_bedrock_completion.py
@ -2092,6 +2092,7 @@ def test_bedrock_prompt_caching_message(messages, expected_cache_control):
        ("bedrock/mistral.mistral-7b-instruct-v0.1:0", True),
        ("bedrock/meta.llama3-1-8b-instruct:0", True),
        ("bedrock/meta.llama3-2-70b-instruct:0", True),
+        ("bedrock/meta.llama3-3-70b-instruct-v1:0", True),
        ("bedrock/amazon.titan-embed-text-v1:0", False),
    ],
 )
--- a/tests/llm_translation/test_bedrock_llama.py
+++ b/tests/llm_translation/test_bedrock_llama.py
@ -0,0 +1,20 @@
+from base_llm_unit_tests import BaseLLMChatTest
+import pytest
+import sys
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+
+
+class TestBedrockTestSuite(BaseLLMChatTest):
+    def test_tool_call_no_arguments(self, tool_call_no_arguments):
+        pass
+
+    def get_base_completion_call_args(self) -> dict:
+        litellm._turn_on_debug()
+        return {
+            "model": "bedrock/converse/us.meta.llama3-3-70b-instruct-v1:0",
+        }