diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index 2cf87ea240..122c874adc 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -194,7 +194,6 @@ class AzureChatCompletion(BaseLLM): azure_client_params["azure_ad_token"] = azure_ad_token azure_client = AsyncAzureOpenAI(**azure_client_params) response = await azure_client.chat.completions.create(**data) - response.model = "azure/" + str(response.model) return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response) except AzureOpenAIError as e: exception_mapping_worked = True diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py index 5907d37d06..fc3f12499b 100644 --- a/litellm/tests/test_stream_chunk_builder.py +++ b/litellm/tests/test_stream_chunk_builder.py @@ -95,7 +95,7 @@ def test_stream_chunk_builder_litellm_tool_call(): try: litellm.set_verbose = False response = litellm.completion( - model="gpt-3.5-turbo", + model="azure/chatgpt-functioncalling", messages=messages, tools=tools_schema, stream=True, diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index e8bdeea3da..537c8c25fd 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -137,6 +137,30 @@ def streaming_format_tests(idx, chunk): print(f"extracted chunk: {extracted_chunk}") return extracted_chunk, finished +tools_schema = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"] + } + }, + "required": ["location"] + } + } + } + ] + # def test_completion_cohere_stream(): # # this is a flaky test due to the cohere API endpoint being unstable # try: @@ -231,6 +255,26 @@ def test_completion_azure_stream(): pytest.fail(f"Error occurred: {e}") # test_completion_azure_stream() +def test_completion_azure_function_calling_stream(): + try: + litellm.set_verbose = False + user_message = "What is the current weather in Boston?" + messages = [{"content": user_message, "role": "user"}] + response = completion( + model="azure/chatgpt-functioncalling", messages=messages, stream=True, tools=tools_schema + ) + # Add any assertions here to check the response + for chunk in response: + print(chunk) + if chunk["choices"][0]["finish_reason"] == "stop": + break + print(chunk["choices"][0]["finish_reason"]) + print(chunk["choices"][0]["delta"]["content"]) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +test_completion_azure_function_calling_stream() + def test_completion_claude_stream(): try: messages = [ @@ -347,7 +391,7 @@ def test_completion_nlp_cloud_stream(): except Exception as e: print(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}") -test_completion_nlp_cloud_stream() +# test_completion_nlp_cloud_stream() def test_completion_claude_stream_bad_key(): try: diff --git a/litellm/utils.py b/litellm/utils.py index eb107692c6..e45b345602 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4933,7 +4933,6 @@ class CustomStreamWrapper: is_finished = True finish_reason = str_line.choices[0].finish_reason - return { "text": text, "is_finished": is_finished, @@ -5173,7 +5172,7 @@ class CustomStreamWrapper: print_verbose(f"completion obj content: {completion_obj['content']}") if response_obj["is_finished"]: model_response.choices[0].finish_reason = response_obj["finish_reason"] - + model_response.model = self.model print_verbose(f"model_response: {model_response}; completion_obj: {completion_obj}") print_verbose(f"model_response finish reason 3: {model_response.choices[0].finish_reason}") @@ -5196,11 +5195,14 @@ class CustomStreamWrapper: # enter this branch when no content has been passed in response original_chunk = response_obj.get("original_chunk", None) model_response.id = original_chunk.id - try: - delta = dict(original_chunk.choices[0].delta) - model_response.choices[0].delta = Delta(**delta) - except: - model_response.choices[0].delta = Delta() + if len(original_chunk.choices) > 0: + try: + delta = dict(original_chunk.choices[0].delta) + model_response.choices[0].delta = Delta(**delta) + except Exception as e: + model_response.choices[0].delta = Delta() + else: + return model_response.system_fingerprint = original_chunk.system_fingerprint if self.sent_first_chunk == False: model_response.choices[0].delta["role"] = "assistant" @@ -5232,10 +5234,8 @@ class CustomStreamWrapper: else: chunk = next(self.completion_stream) - print_verbose(f"chunk in __next__: {chunk}") if chunk is not None and chunk != b'': response = self.chunk_creator(chunk=chunk) - print_verbose(f"response in __next__: {response}") if response is not None: return response except StopIteration: