forked from phoenix/litellm-mirror
fix(utils.py): azure tool calling streaming
This commit is contained in:
parent
4cdd930fa2
commit
e8331a4647
4 changed files with 55 additions and 12 deletions
|
@ -194,7 +194,6 @@ class AzureChatCompletion(BaseLLM):
|
||||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||||
azure_client = AsyncAzureOpenAI(**azure_client_params)
|
azure_client = AsyncAzureOpenAI(**azure_client_params)
|
||||||
response = await azure_client.chat.completions.create(**data)
|
response = await azure_client.chat.completions.create(**data)
|
||||||
response.model = "azure/" + str(response.model)
|
|
||||||
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
|
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
|
||||||
except AzureOpenAIError as e:
|
except AzureOpenAIError as e:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
|
|
@ -95,7 +95,7 @@ def test_stream_chunk_builder_litellm_tool_call():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = False
|
litellm.set_verbose = False
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model="gpt-3.5-turbo",
|
model="azure/chatgpt-functioncalling",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
tools=tools_schema,
|
tools=tools_schema,
|
||||||
stream=True,
|
stream=True,
|
||||||
|
|
|
@ -137,6 +137,30 @@ def streaming_format_tests(idx, chunk):
|
||||||
print(f"extracted chunk: {extracted_chunk}")
|
print(f"extracted chunk: {extracted_chunk}")
|
||||||
return extracted_chunk, finished
|
return extracted_chunk, finished
|
||||||
|
|
||||||
|
tools_schema = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"description": "Get the current weather in a given location",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state, e.g. San Francisco, CA"
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["location"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
# def test_completion_cohere_stream():
|
# def test_completion_cohere_stream():
|
||||||
# # this is a flaky test due to the cohere API endpoint being unstable
|
# # this is a flaky test due to the cohere API endpoint being unstable
|
||||||
# try:
|
# try:
|
||||||
|
@ -231,6 +255,26 @@ def test_completion_azure_stream():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
# test_completion_azure_stream()
|
# test_completion_azure_stream()
|
||||||
|
|
||||||
|
def test_completion_azure_function_calling_stream():
|
||||||
|
try:
|
||||||
|
litellm.set_verbose = False
|
||||||
|
user_message = "What is the current weather in Boston?"
|
||||||
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
|
response = completion(
|
||||||
|
model="azure/chatgpt-functioncalling", messages=messages, stream=True, tools=tools_schema
|
||||||
|
)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
if chunk["choices"][0]["finish_reason"] == "stop":
|
||||||
|
break
|
||||||
|
print(chunk["choices"][0]["finish_reason"])
|
||||||
|
print(chunk["choices"][0]["delta"]["content"])
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
test_completion_azure_function_calling_stream()
|
||||||
|
|
||||||
def test_completion_claude_stream():
|
def test_completion_claude_stream():
|
||||||
try:
|
try:
|
||||||
messages = [
|
messages = [
|
||||||
|
@ -347,7 +391,7 @@ def test_completion_nlp_cloud_stream():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error occurred: {e}")
|
print(f"Error occurred: {e}")
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
test_completion_nlp_cloud_stream()
|
# test_completion_nlp_cloud_stream()
|
||||||
|
|
||||||
def test_completion_claude_stream_bad_key():
|
def test_completion_claude_stream_bad_key():
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -4933,7 +4933,6 @@ class CustomStreamWrapper:
|
||||||
is_finished = True
|
is_finished = True
|
||||||
finish_reason = str_line.choices[0].finish_reason
|
finish_reason = str_line.choices[0].finish_reason
|
||||||
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"text": text,
|
"text": text,
|
||||||
"is_finished": is_finished,
|
"is_finished": is_finished,
|
||||||
|
@ -5173,7 +5172,7 @@ class CustomStreamWrapper:
|
||||||
print_verbose(f"completion obj content: {completion_obj['content']}")
|
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||||
if response_obj["is_finished"]:
|
if response_obj["is_finished"]:
|
||||||
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
model_response.choices[0].finish_reason = response_obj["finish_reason"]
|
||||||
|
|
||||||
model_response.model = self.model
|
model_response.model = self.model
|
||||||
print_verbose(f"model_response: {model_response}; completion_obj: {completion_obj}")
|
print_verbose(f"model_response: {model_response}; completion_obj: {completion_obj}")
|
||||||
print_verbose(f"model_response finish reason 3: {model_response.choices[0].finish_reason}")
|
print_verbose(f"model_response finish reason 3: {model_response.choices[0].finish_reason}")
|
||||||
|
@ -5196,11 +5195,14 @@ class CustomStreamWrapper:
|
||||||
# enter this branch when no content has been passed in response
|
# enter this branch when no content has been passed in response
|
||||||
original_chunk = response_obj.get("original_chunk", None)
|
original_chunk = response_obj.get("original_chunk", None)
|
||||||
model_response.id = original_chunk.id
|
model_response.id = original_chunk.id
|
||||||
try:
|
if len(original_chunk.choices) > 0:
|
||||||
delta = dict(original_chunk.choices[0].delta)
|
try:
|
||||||
model_response.choices[0].delta = Delta(**delta)
|
delta = dict(original_chunk.choices[0].delta)
|
||||||
except:
|
model_response.choices[0].delta = Delta(**delta)
|
||||||
model_response.choices[0].delta = Delta()
|
except Exception as e:
|
||||||
|
model_response.choices[0].delta = Delta()
|
||||||
|
else:
|
||||||
|
return
|
||||||
model_response.system_fingerprint = original_chunk.system_fingerprint
|
model_response.system_fingerprint = original_chunk.system_fingerprint
|
||||||
if self.sent_first_chunk == False:
|
if self.sent_first_chunk == False:
|
||||||
model_response.choices[0].delta["role"] = "assistant"
|
model_response.choices[0].delta["role"] = "assistant"
|
||||||
|
@ -5232,10 +5234,8 @@ class CustomStreamWrapper:
|
||||||
else:
|
else:
|
||||||
chunk = next(self.completion_stream)
|
chunk = next(self.completion_stream)
|
||||||
|
|
||||||
print_verbose(f"chunk in __next__: {chunk}")
|
|
||||||
if chunk is not None and chunk != b'':
|
if chunk is not None and chunk != b'':
|
||||||
response = self.chunk_creator(chunk=chunk)
|
response = self.chunk_creator(chunk=chunk)
|
||||||
print_verbose(f"response in __next__: {response}")
|
|
||||||
if response is not None:
|
if response is not None:
|
||||||
return response
|
return response
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue