From 125f6fff675d66a04d37b89c2dec9ddf5528edaf Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 14 Feb 2025 14:15:25 -0800 Subject: [PATCH] (Feat) - Add `/bedrock/meta.llama3-3-70b-instruct-v1:0` tool calling support + cost tracking + base llm unit test for tool calling (#8545) * Add support for bedrock meta.llama3-3-70b-instruct-v1:0 tool calling (#8512) * fix(converse_transformation.py): fixing bedrock meta.llama3-3-70b tool calling * test(test_bedrock_completion.py): adding llama3.3 tool compatibility check * add TestBedrockTestSuite * add bedrock llama 3.3 to base llm class * us.meta.llama3-3-70b-instruct-v1:0 * test_basic_tool_calling * TestAzureOpenAIO1 * test_basic_tool_calling * test_basic_tool_calling --------- Co-authored-by: miraclebakelaser <65143272+miraclebakelaser@users.noreply.github.com> --- .../bedrock/chat/converse_transformation.py | 1 + ...odel_prices_and_context_window_backup.json | 15 ++- model_prices_and_context_window.json | 15 ++- tests/llm_translation/base_llm_unit_tests.py | 101 ++++++++++++++++++ tests/llm_translation/test_azure_o_series.py | 3 + .../test_bedrock_completion.py | 1 + tests/llm_translation/test_bedrock_llama.py | 20 ++++ 7 files changed, 154 insertions(+), 2 deletions(-) create mode 100644 tests/llm_translation/test_bedrock_llama.py diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index 548e6f690a..ae79bcb0af 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -105,6 +105,7 @@ class AmazonConverseConfig(BaseConfig): or base_model.startswith("cohere") or base_model.startswith("meta.llama3-1") or base_model.startswith("meta.llama3-2") + or base_model.startswith("meta.llama3-3") or base_model.startswith("amazon.nova") ): supported_params.append("tools") diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index f6fa0c5b9d..5eec1fcf2b 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -7095,7 +7095,9 @@ "input_cost_per_token": 0.00000072, "output_cost_per_token": 0.00000072, "litellm_provider": "bedrock_converse", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false }, "meta.llama2-13b-chat-v1": { "max_tokens": 4096, @@ -7435,6 +7437,17 @@ "supports_function_calling": true, "supports_tool_choice": false }, + "us.meta.llama3-3-70b-instruct-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000072, + "output_cost_per_token": 0.00000072, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { "max_tokens": 77, "max_input_tokens": 77, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index f6fa0c5b9d..5eec1fcf2b 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -7095,7 +7095,9 @@ "input_cost_per_token": 0.00000072, "output_cost_per_token": 0.00000072, "litellm_provider": "bedrock_converse", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false }, "meta.llama2-13b-chat-v1": { "max_tokens": 4096, @@ -7435,6 +7437,17 @@ "supports_function_calling": true, "supports_tool_choice": false }, + "us.meta.llama3-3-70b-instruct-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000072, + "output_cost_per_token": 0.00000072, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": false + }, "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { "max_tokens": 77, "max_input_tokens": 77, diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py index bc489dde54..07083f4515 100644 --- a/tests/llm_translation/base_llm_unit_tests.py +++ b/tests/llm_translation/base_llm_unit_tests.py @@ -634,6 +634,107 @@ class BaseLLMChatTest(ABC): return url + def test_basic_tool_calling(self): + try: + from litellm import completion, ModelResponse + + litellm.set_verbose = True + litellm._turn_on_debug() + from litellm.utils import supports_function_calling + + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + base_completion_call_args = self.get_base_completion_call_args() + if not supports_function_calling(base_completion_call_args["model"], None): + print("Model does not support function calling") + pytest.skip("Model does not support function calling") + + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["location"], + }, + }, + } + ] + messages = [ + { + "role": "user", + "content": "What's the weather like in Boston today in fahrenheit?", + } + ] + request_args = { + "messages": messages, + "tools": tools, + } + request_args.update(self.get_base_completion_call_args()) + response: ModelResponse = completion(**request_args) # type: ignore + print(f"response: {response}") + + assert response is not None + + # if the provider did not return any tool calls do not make a subsequent llm api call + if response.choices[0].message.tool_calls is None: + return + # Add any assertions here to check the response + + assert isinstance( + response.choices[0].message.tool_calls[0].function.name, str + ) + assert isinstance( + response.choices[0].message.tool_calls[0].function.arguments, str + ) + messages.append( + response.choices[0].message.model_dump() + ) # Add assistant tool invokes + tool_result = ( + '{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}' + ) + # Add user submitted tool results in the OpenAI format + messages.append( + { + "tool_call_id": response.choices[0].message.tool_calls[0].id, + "role": "tool", + "name": response.choices[0].message.tool_calls[0].function.name, + "content": tool_result, + } + ) + # In the second response, Claude should deduce answer from tool results + request_2_args = { + "messages": messages, + "tools": tools, + } + request_2_args.update(self.get_base_completion_call_args()) + second_response: ModelResponse = completion(**request_2_args) # type: ignore + print(f"second response: {second_response}") + assert second_response is not None + + # either content or tool calls should be present + assert ( + second_response.choices[0].message.content is not None + or second_response.choices[0].message.tool_calls is not None + ) + except litellm.RateLimitError: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + @pytest.mark.asyncio async def test_completion_cost(self): from litellm import completion_cost diff --git a/tests/llm_translation/test_azure_o_series.py b/tests/llm_translation/test_azure_o_series.py index aec724afb7..13ba4169ce 100644 --- a/tests/llm_translation/test_azure_o_series.py +++ b/tests/llm_translation/test_azure_o_series.py @@ -39,6 +39,9 @@ class TestAzureOpenAIO1(BaseOSeriesModelsTest, BaseLLMChatTest): """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833""" pass + def test_basic_tool_calling(self): + pass + def test_prompt_caching(self): """Temporary override. o1 prompt caching is not working.""" pass diff --git a/tests/llm_translation/test_bedrock_completion.py b/tests/llm_translation/test_bedrock_completion.py index 17dd013ac3..8823ccdbe3 100644 --- a/tests/llm_translation/test_bedrock_completion.py +++ b/tests/llm_translation/test_bedrock_completion.py @@ -2092,6 +2092,7 @@ def test_bedrock_prompt_caching_message(messages, expected_cache_control): ("bedrock/mistral.mistral-7b-instruct-v0.1:0", True), ("bedrock/meta.llama3-1-8b-instruct:0", True), ("bedrock/meta.llama3-2-70b-instruct:0", True), + ("bedrock/meta.llama3-3-70b-instruct-v1:0", True), ("bedrock/amazon.titan-embed-text-v1:0", False), ], ) diff --git a/tests/llm_translation/test_bedrock_llama.py b/tests/llm_translation/test_bedrock_llama.py new file mode 100644 index 0000000000..b18928747e --- /dev/null +++ b/tests/llm_translation/test_bedrock_llama.py @@ -0,0 +1,20 @@ +from base_llm_unit_tests import BaseLLMChatTest +import pytest +import sys +import os + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import litellm + + +class TestBedrockTestSuite(BaseLLMChatTest): + def test_tool_call_no_arguments(self, tool_call_no_arguments): + pass + + def get_base_completion_call_args(self) -> dict: + litellm._turn_on_debug() + return { + "model": "bedrock/converse/us.meta.llama3-3-70b-instruct-v1:0", + }