forked from phoenix/litellm-mirror
fix(utils.py): fix azure streaming bug
This commit is contained in:
parent
90c13d39ac
commit
728b879c33
4 changed files with 91 additions and 7 deletions
|
@ -1,3 +1,20 @@
|
|||
model_list:
|
||||
- model_name: "azure-model"
|
||||
litellm_params:
|
||||
model: "azure/gpt-35-turbo"
|
||||
api_key: "os.environ/AZURE_EUROPE_API_KEY"
|
||||
api_base: "https://my-endpoint-europe-berri-992.openai.azure.com/"
|
||||
- model_name: "azure-model"
|
||||
litellm_params:
|
||||
model: "azure/gpt-35-turbo"
|
||||
api_key: "os.environ/AZURE_CANADA_API_KEY"
|
||||
api_base: "https://my-endpoint-canada-berri992.openai.azure.com"
|
||||
- model_name: "azure-model"
|
||||
litellm_params:
|
||||
model: "azure/gpt-turbo"
|
||||
api_key: "os.environ/AZURE-FRANCE-API-KEY"
|
||||
api_base: "https://openai-france-1234.openai.azure.com"
|
||||
|
||||
litellm_settings:
|
||||
drop_params: True
|
||||
set_verbose: True
|
||||
|
|
27
litellm/tests/test_proxy_server_cost.py
Normal file
27
litellm/tests/test_proxy_server_cost.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
# #### What this tests ####
|
||||
# # This tests the cost tracking function works with consecutive calls (~10 consecutive calls)
|
||||
|
||||
# import sys, os
|
||||
# import traceback
|
||||
# import pytest
|
||||
# sys.path.insert(
|
||||
# 0, os.path.abspath("../..")
|
||||
# ) # Adds the parent directory to the system path
|
||||
# import litellm
|
||||
|
||||
# async def test_proxy_cost_tracking():
|
||||
# """
|
||||
# Get expected cost.
|
||||
# Create new key.
|
||||
# Run 10 parallel calls.
|
||||
# Check cost for key at the end.
|
||||
# assert it's = expected cost.
|
||||
# """
|
||||
# model = "gpt-3.5-turbo"
|
||||
# messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
# number_of_calls = 10
|
||||
# expected_cost = litellm.completion_cost(model=model, messages=messages) * number_of_calls
|
||||
# async def litellm_acompletion():
|
||||
|
||||
|
||||
|
|
@ -110,4 +110,26 @@ def test_stream_chunk_builder_litellm_tool_call():
|
|||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
test_stream_chunk_builder_litellm_tool_call()
|
||||
# test_stream_chunk_builder_litellm_tool_call()
|
||||
|
||||
def test_stream_chunk_builder_litellm_tool_call_regular_message():
|
||||
try:
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
litellm.set_verbose = False
|
||||
response = litellm.completion(
|
||||
model="azure/gpt-4-nov-release",
|
||||
messages=messages,
|
||||
tools=tools_schema,
|
||||
stream=True,
|
||||
api_key="os.environ/AZURE_FRANCE_API_KEY",
|
||||
api_base="https://openai-france-1234.openai.azure.com",
|
||||
complete_response = True
|
||||
)
|
||||
|
||||
print(f"complete response: {response}")
|
||||
print(f"complete response usage: {response.usage}")
|
||||
assert response.system_fingerprint is not None
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {str(e)}")
|
||||
|
||||
test_stream_chunk_builder_litellm_tool_call_regular_message()
|
||||
|
|
|
@ -5266,9 +5266,27 @@ class CustomStreamWrapper:
|
|||
print_verbose(f"model_response: {model_response}; completion_obj: {completion_obj}")
|
||||
print_verbose(f"model_response finish reason 3: {model_response.choices[0].finish_reason}")
|
||||
if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
|
||||
hold, model_response_str = self.check_special_tokens(chunk=completion_obj["content"], finish_reason=model_response.choices[0].finish_reason)
|
||||
hold, model_response_str = self.check_special_tokens(chunk=completion_obj["content"], finish_reason=model_response.choices[0].finish_reason) # filter out bos/eos tokens from openai-compatible hf endpoints
|
||||
print_verbose(f"hold - {hold}, model_response_str - {model_response_str}")
|
||||
if hold is False:
|
||||
## check if openai/azure chunk
|
||||
original_chunk = response_obj.get("original_chunk", None)
|
||||
if original_chunk:
|
||||
model_response.id = original_chunk.id
|
||||
if len(original_chunk.choices) > 0:
|
||||
try:
|
||||
delta = dict(original_chunk.choices[0].delta)
|
||||
model_response.choices[0].delta = Delta(**delta)
|
||||
except Exception as e:
|
||||
model_response.choices[0].delta = Delta()
|
||||
else:
|
||||
return
|
||||
model_response.system_fingerprint = original_chunk.system_fingerprint
|
||||
if self.sent_first_chunk == False:
|
||||
model_response.choices[0].delta["role"] = "assistant"
|
||||
self.sent_first_chunk = True
|
||||
else:
|
||||
## else
|
||||
completion_obj["content"] = model_response_str
|
||||
if self.sent_first_chunk == False:
|
||||
completion_obj["role"] = "assistant"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue