diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index 876d3b899..8a06e0f3b 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -47,6 +47,10 @@ from ..types.llms.openai import ( AsyncAssistantEventHandler, AsyncAssistantStreamManager, AsyncCursorPage, + ChatCompletionToolChoiceFunctionParam, + ChatCompletionToolChoiceObjectParam, + ChatCompletionToolParam, + ChatCompletionToolParamFunctionChunk, HttpxBinaryResponseContent, MessageData, OpenAICreateThreadParamsMessage, @@ -204,8 +208,8 @@ class AzureOpenAIConfig: and api_version_day < "01" ) ): - if litellm.drop_params == True or ( - drop_params is not None and drop_params == True + if litellm.drop_params is True or ( + drop_params is not None and drop_params is True ): pass else: @@ -227,6 +231,41 @@ class AzureOpenAIConfig: ) else: optional_params["tool_choice"] = value + if param == "response_format" and isinstance(value, dict): + json_schema: Optional[dict] = None + schema_name: str = "" + if "response_schema" in value: + json_schema = value["response_schema"] + schema_name = "json_tool_call" + elif "json_schema" in value: + json_schema = value["json_schema"]["schema"] + schema_name = value["json_schema"]["name"] + """ + Follow similar approach to anthropic - translate to a single tool call. + + When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode + - You usually want to provide a single tool + - You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool + - Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective. + """ + if json_schema is not None: + _tool_choice = ChatCompletionToolChoiceObjectParam( + type="function", + function=ChatCompletionToolChoiceFunctionParam( + name=schema_name + ), + ) + + _tool = ChatCompletionToolParam( + type="function", + function=ChatCompletionToolParamFunctionChunk( + name=schema_name, parameters=json_schema + ), + ) + + optional_params["tools"] = [_tool] + optional_params["tool_choice"] = _tool_choice + optional_params["json_mode"] = True elif param in supported_openai_params: optional_params[param] = value return optional_params @@ -513,6 +552,7 @@ class AzureChatCompletion(BaseLLM): ) max_retries = optional_params.pop("max_retries", 2) + json_mode: Optional[bool] = optional_params.pop("json_mode", False) ### CHECK IF CLOUDFLARE AI GATEWAY ### ### if so - set the model as part of the base url @@ -578,6 +618,7 @@ class AzureChatCompletion(BaseLLM): timeout=timeout, client=client, logging_obj=logging_obj, + convert_tool_call_to_json_mode=json_mode, ) elif "stream" in optional_params and optional_params["stream"] == True: return self.streaming( @@ -656,6 +697,7 @@ class AzureChatCompletion(BaseLLM): return convert_to_model_response_object( response_object=stringified_response, model_response_object=model_response, + convert_tool_call_to_json_mode=json_mode, ) except AzureOpenAIError as e: exception_mapping_worked = True @@ -677,6 +719,7 @@ class AzureChatCompletion(BaseLLM): model_response: ModelResponse, logging_obj: LiteLLMLoggingObj, azure_ad_token: Optional[str] = None, + convert_tool_call_to_json_mode: Optional[bool] = None, client=None, # this is the AsyncAzureOpenAI ): response = None @@ -742,11 +785,13 @@ class AzureChatCompletion(BaseLLM): original_response=stringified_response, additional_args={"complete_input_dict": data}, ) + return convert_to_model_response_object( response_object=stringified_response, model_response_object=model_response, hidden_params={"headers": headers}, _response_headers=headers, + convert_tool_call_to_json_mode=convert_tool_call_to_json_mode, ) except AzureOpenAIError as e: ## LOGGING diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 7687ad1b4..6718f4cde 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -2162,37 +2162,44 @@ def test_completion_openai(): pytest.fail(f"Error occurred: {e}") -def test_completion_openai_pydantic(): +@pytest.mark.parametrize("model", ["gpt-4o-2024-08-06", "azure/chatgpt-v-2"]) +def test_completion_openai_pydantic(model): try: litellm.set_verbose = True from pydantic import BaseModel + messages = [ + {"role": "user", "content": "List 5 important events in the XIX century"} + ] + class CalendarEvent(BaseModel): name: str date: str participants: list[str] - print(f"api key: {os.environ['OPENAI_API_KEY']}") - litellm.api_key = os.environ["OPENAI_API_KEY"] - response = completion( - model="gpt-4o-2024-08-06", - messages=[{"role": "user", "content": "Hey"}], - max_tokens=10, - metadata={"hi": "bye"}, - response_format=CalendarEvent, - ) + class EventsList(BaseModel): + events: list[CalendarEvent] + + litellm.enable_json_schema_validation = True + for _ in range(3): + try: + response = completion( + model=model, + messages=messages, + metadata={"hi": "bye"}, + response_format=EventsList, + ) + break + except litellm.JSONSchemaValidationError: + print("ERROR OCCURRED! INVALID JSON") + print("This is the response object\n", response) response_str = response["choices"][0]["message"]["content"] - response_str_2 = response.choices[0].message.content - cost = completion_cost(completion_response=response) - print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}") - assert response_str == response_str_2 - assert type(response_str) == str - assert len(response_str) > 1 + print(f"response_str: {response_str}") + json.loads(response_str) # check valid json is returned - litellm.api_key = None except Timeout as e: pass except Exception as e: diff --git a/litellm/utils.py b/litellm/utils.py index eff3b4346..744a3380c 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -843,13 +843,13 @@ def client(original_function): and str(original_function.__name__) in litellm.cache.supported_call_types ): - print_verbose(f"Checking Cache") + print_verbose("Checking Cache") preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs) kwargs["preset_cache_key"] = ( preset_cache_key # for streaming calls, we need to pass the preset_cache_key ) cached_result = litellm.cache.get_cache(*args, **kwargs) - if cached_result != None: + if cached_result is not None: if "detail" in cached_result: # implies an error occurred pass @@ -5907,6 +5907,9 @@ def convert_to_model_response_object( end_time=None, hidden_params: Optional[dict] = None, _response_headers: Optional[dict] = None, + convert_tool_call_to_json_mode: Optional[ + bool + ] = None, # used for supporting 'json_schema' on older models ): received_args = locals() if _response_headers is not None: @@ -5945,7 +5948,7 @@ def convert_to_model_response_object( ): if response_object is None or model_response_object is None: raise Exception("Error in response object format") - if stream == True: + if stream is True: # for returning cached responses, we need to yield a generator return convert_to_streaming_response(response_object=response_object) choice_list = [] @@ -5955,16 +5958,31 @@ def convert_to_model_response_object( ) for idx, choice in enumerate(response_object["choices"]): - message = Message( - content=choice["message"].get("content", None), - role=choice["message"]["role"] or "assistant", - function_call=choice["message"].get("function_call", None), - tool_calls=choice["message"].get("tool_calls", None), - ) - finish_reason = choice.get("finish_reason", None) - if finish_reason == None: + ## HANDLE JSON MODE - anthropic returns single function call] + tool_calls = choice["message"].get("tool_calls", None) + if ( + convert_tool_call_to_json_mode + and tool_calls is not None + and len(tool_calls) == 1 + ): + # to support 'json_schema' logic on older models + json_mode_content_str: Optional[str] = tool_calls[0][ + "function" + ].get("arguments") + if json_mode_content_str is not None: + message = litellm.Message(content=json_mode_content_str) + finish_reason = "stop" + else: + message = Message( + content=choice["message"].get("content", None), + role=choice["message"]["role"] or "assistant", + function_call=choice["message"].get("function_call", None), + tool_calls=choice["message"].get("tool_calls", None), + ) + finish_reason = choice.get("finish_reason", None) + if finish_reason is None: # gpt-4 vision can return 'finish_reason' or 'finish_details' - finish_reason = choice.get("finish_details") + finish_reason = choice.get("finish_details") or "stop" logprobs = choice.get("logprobs", None) enhancements = choice.get("enhancements", None) choice = Choices(