diff --git a/litellm/tests/test_config.yaml b/litellm/tests/test_config.yaml index 0e678d2d3b..a38dc76156 100644 --- a/litellm/tests/test_config.yaml +++ b/litellm/tests/test_config.yaml @@ -1,3 +1,20 @@ +model_list: + - model_name: "azure-model" + litellm_params: + model: "azure/gpt-35-turbo" + api_key: "os.environ/AZURE_EUROPE_API_KEY" + api_base: "https://my-endpoint-europe-berri-992.openai.azure.com/" + - model_name: "azure-model" + litellm_params: + model: "azure/gpt-35-turbo" + api_key: "os.environ/AZURE_CANADA_API_KEY" + api_base: "https://my-endpoint-canada-berri992.openai.azure.com" + - model_name: "azure-model" + litellm_params: + model: "azure/gpt-turbo" + api_key: "os.environ/AZURE-FRANCE-API-KEY" + api_base: "https://openai-france-1234.openai.azure.com" + litellm_settings: drop_params: True set_verbose: True diff --git a/litellm/tests/test_proxy_server_cost.py b/litellm/tests/test_proxy_server_cost.py new file mode 100644 index 0000000000..7688e58995 --- /dev/null +++ b/litellm/tests/test_proxy_server_cost.py @@ -0,0 +1,27 @@ +# #### What this tests #### +# # This tests the cost tracking function works with consecutive calls (~10 consecutive calls) + +# import sys, os +# import traceback +# import pytest +# sys.path.insert( +# 0, os.path.abspath("../..") +# ) # Adds the parent directory to the system path +# import litellm + +# async def test_proxy_cost_tracking(): +# """ +# Get expected cost. +# Create new key. +# Run 10 parallel calls. +# Check cost for key at the end. +# assert it's = expected cost. +# """ +# model = "gpt-3.5-turbo" +# messages = [{"role": "user", "content": "Hey, how's it going?"}] +# number_of_calls = 10 +# expected_cost = litellm.completion_cost(model=model, messages=messages) * number_of_calls +# async def litellm_acompletion(): + + + diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py index 807e74cfbe..23f67a2e8b 100644 --- a/litellm/tests/test_stream_chunk_builder.py +++ b/litellm/tests/test_stream_chunk_builder.py @@ -110,4 +110,26 @@ def test_stream_chunk_builder_litellm_tool_call(): except Exception as e: pytest.fail(f"An exception occurred - {str(e)}") -test_stream_chunk_builder_litellm_tool_call() +# test_stream_chunk_builder_litellm_tool_call() + +def test_stream_chunk_builder_litellm_tool_call_regular_message(): + try: + messages = [{"role": "user", "content": "Hey, how's it going?"}] + litellm.set_verbose = False + response = litellm.completion( + model="azure/gpt-4-nov-release", + messages=messages, + tools=tools_schema, + stream=True, + api_key="os.environ/AZURE_FRANCE_API_KEY", + api_base="https://openai-france-1234.openai.azure.com", + complete_response = True + ) + + print(f"complete response: {response}") + print(f"complete response usage: {response.usage}") + assert response.system_fingerprint is not None + except Exception as e: + pytest.fail(f"An exception occurred - {str(e)}") + +test_stream_chunk_builder_litellm_tool_call_regular_message() diff --git a/litellm/utils.py b/litellm/utils.py index dce2592373..3756337b66 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5266,14 +5266,32 @@ class CustomStreamWrapper: print_verbose(f"model_response: {model_response}; completion_obj: {completion_obj}") print_verbose(f"model_response finish reason 3: {model_response.choices[0].finish_reason}") if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string - hold, model_response_str = self.check_special_tokens(chunk=completion_obj["content"], finish_reason=model_response.choices[0].finish_reason) + hold, model_response_str = self.check_special_tokens(chunk=completion_obj["content"], finish_reason=model_response.choices[0].finish_reason) # filter out bos/eos tokens from openai-compatible hf endpoints print_verbose(f"hold - {hold}, model_response_str - {model_response_str}") if hold is False: - completion_obj["content"] = model_response_str - if self.sent_first_chunk == False: - completion_obj["role"] = "assistant" - self.sent_first_chunk = True - model_response.choices[0].delta = Delta(**completion_obj) + ## check if openai/azure chunk + original_chunk = response_obj.get("original_chunk", None) + if original_chunk: + model_response.id = original_chunk.id + if len(original_chunk.choices) > 0: + try: + delta = dict(original_chunk.choices[0].delta) + model_response.choices[0].delta = Delta(**delta) + except Exception as e: + model_response.choices[0].delta = Delta() + else: + return + model_response.system_fingerprint = original_chunk.system_fingerprint + if self.sent_first_chunk == False: + model_response.choices[0].delta["role"] = "assistant" + self.sent_first_chunk = True + else: + ## else + completion_obj["content"] = model_response_str + if self.sent_first_chunk == False: + completion_obj["role"] = "assistant" + self.sent_first_chunk = True + model_response.choices[0].delta = Delta(**completion_obj) # LOGGING threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start() print_verbose(f"model_response: {model_response}")