diff --git a/litellm/llms/azure/chat/gpt_transformation.py b/litellm/llms/azure/chat/gpt_transformation.py index 53a7fdd687..b117583bd0 100644 --- a/litellm/llms/azure/chat/gpt_transformation.py +++ b/litellm/llms/azure/chat/gpt_transformation.py @@ -113,6 +113,17 @@ class AzureOpenAIConfig(BaseConfig): return False + def _is_response_format_supported_api_version( + self, api_version_year: str, api_version_month: str + ) -> bool: + """ + - check if api_version is supported for response_format + """ + + is_supported = int(api_version_year) <= 2024 and int(api_version_month) >= 8 + + return is_supported + def map_openai_params( self, non_default_params: dict, @@ -171,13 +182,20 @@ class AzureOpenAIConfig(BaseConfig): _is_response_format_supported_model = ( self._is_response_format_supported_model(model) ) - should_convert_response_format_to_tool = ( - api_version_year <= "2024" and api_version_month < "08" - ) or not _is_response_format_supported_model + + is_response_format_supported_api_version = ( + self._is_response_format_supported_api_version( + api_version_year, api_version_month + ) + ) + is_response_format_supported = ( + is_response_format_supported_api_version + and _is_response_format_supported_model + ) optional_params = self._add_response_format_to_tools( optional_params=optional_params, value=value, - should_convert_response_format_to_tool=should_convert_response_format_to_tool, + is_response_format_supported=is_response_format_supported, ) elif param == "tools" and isinstance(value, list): optional_params.setdefault("tools", []) diff --git a/litellm/llms/base_llm/chat/transformation.py b/litellm/llms/base_llm/chat/transformation.py index 1004cc9012..9d3778ed68 100644 --- a/litellm/llms/base_llm/chat/transformation.py +++ b/litellm/llms/base_llm/chat/transformation.py @@ -20,6 +20,7 @@ from pydantic import BaseModel from litellm._logging import verbose_logger from litellm.constants import RESPONSE_FORMAT_TOOL_NAME +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.types.llms.openai import ( AllMessageValues, ChatCompletionToolChoiceFunctionParam, @@ -27,9 +28,6 @@ from litellm.types.llms.openai import ( ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk, ) - -from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler - from litellm.types.utils import ModelResponse from litellm.utils import CustomStreamWrapper @@ -163,7 +161,7 @@ class BaseConfig(ABC): self, optional_params: dict, value: dict, - should_convert_response_format_to_tool: bool, + is_response_format_supported: bool, ) -> dict: """ Follow similar approach to anthropic - translate to a single tool call. @@ -183,7 +181,8 @@ class BaseConfig(ABC): elif "json_schema" in value: json_schema = value["json_schema"]["schema"] - if json_schema and should_convert_response_format_to_tool: + if json_schema and not is_response_format_supported: + _tool_choice = ChatCompletionToolChoiceObjectParam( type="function", function=ChatCompletionToolChoiceFunctionParam( diff --git a/tests/llm_translation/test_azure_openai.py b/tests/llm_translation/test_azure_openai.py index 92d2de7c48..d4715b8906 100644 --- a/tests/llm_translation/test_azure_openai.py +++ b/tests/llm_translation/test_azure_openai.py @@ -285,12 +285,27 @@ def test_azure_openai_gpt_4o_naming(monkeypatch): assert "tool_calls" not in mock_post.call_args.kwargs -def test_azure_gpt_4o_with_tool_call_and_response_format(): +@pytest.mark.parametrize( + "api_version", + [ + "2024-10-21", + # "2024-02-15-preview", + ], +) +def test_azure_gpt_4o_with_tool_call_and_response_format(api_version): from litellm import completion from typing import Optional from pydantic import BaseModel import litellm + from openai import AzureOpenAI + + client = AzureOpenAI( + api_key="fake-key", + base_url="https://fake-azure.openai.azure.com", + api_version=api_version, + ) + class InvestigationOutput(BaseModel): alert_explanation: Optional[str] = None investigation: Optional[str] = None @@ -322,25 +337,34 @@ def test_azure_gpt_4o_with_tool_call_and_response_format(): } ] - response = litellm.completion( - model="azure/gpt-4o", - messages=[ - { - "role": "system", - "content": "You are a tool-calling AI assist provided with common devops and IT tools that you can use to troubleshoot problems or answer questions.\nWhenever possible you MUST first use tools to investigate then answer the question.", - }, - {"role": "user", "content": "What is the current date and time in NYC?"}, - ], - drop_params=True, - temperature=0.00000001, - tools=tools, - tool_choice="auto", - response_format=InvestigationOutput, # commenting this line will cause the output to be correct - ) + with patch.object(client.chat.completions.with_raw_response, "create") as mock_post: + response = litellm.completion( + model="azure/gpt-4o", + messages=[ + { + "role": "system", + "content": "You are a tool-calling AI assist provided with common devops and IT tools that you can use to troubleshoot problems or answer questions.\nWhenever possible you MUST first use tools to investigate then answer the question.", + }, + { + "role": "user", + "content": "What is the current date and time in NYC?", + }, + ], + drop_params=True, + temperature=0.00000001, + tools=tools, + tool_choice="auto", + response_format=InvestigationOutput, # commenting this line will cause the output to be correct + api_version=api_version, + client=client, + ) - assert response.choices[0].finish_reason == "tool_calls" + mock_post.assert_called_once() - print(response.to_json()) + if api_version == "2024-10-21": + assert "response_format" in mock_post.call_args.kwargs + else: + assert "response_format" not in mock_post.call_args.kwargs def test_map_openai_params():