diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py index 3caaf5377..ad72b22e2 100644 --- a/litellm/tests/test_stream_chunk_builder.py +++ b/litellm/tests/test_stream_chunk_builder.py @@ -135,4 +135,40 @@ def test_stream_chunk_builder_litellm_tool_call_regular_message(): pytest.fail(f"An exception occurred - {str(e)}") -test_stream_chunk_builder_litellm_tool_call_regular_message() +# test_stream_chunk_builder_litellm_tool_call_regular_message() + + +def test_stream_chunk_builder_count_prompt_tokens(): + # test the prompt tokens for streamed responses == prompt tokens for non-streamed + # test the model for streamed responses == model for non-streamed + try: + messages = [{"role": "user", "content": "Hey, how's it going?"}] + litellm.set_verbose = False + response = litellm.completion( + model="azure/chatgpt-v-2", + messages=messages, + stream=True, + max_tokens=1, + complete_response=True, + ) + print(f"Stream Assembled response: {response}") + + stream_prompt_tokens = response.usage.prompt_tokens + stream_model = response.model + + response = litellm.completion( + model="azure/chatgpt-v-2", messages=messages, max_tokens=1 + ) + print(f"\nNon Stream Response: {response}") + + non_stream_prompt_tokens = response.usage.prompt_tokens + non_stream_model = response.model + + assert stream_prompt_tokens == non_stream_prompt_tokens + assert stream_model != non_stream_model + + except Exception as e: + pytest.fail(f"An exception occurred - {str(e)}") + + +# test_stream_chunk_builder_count_prompt_tokens() diff --git a/litellm/tests/test_utils.py b/litellm/tests/test_utils.py index 904715e2b..a9e1c75fe 100644 --- a/litellm/tests/test_utils.py +++ b/litellm/tests/test_utils.py @@ -317,24 +317,3 @@ def test_token_counter(): # test_token_counter() - - -def test_token_counter_azure(): - # test that prompt_tokens counted == prompt_tokens from Azure API - try: - messages = [{"role": "user", "content": "hi how are you what time is it"}] - tokens_counted = token_counter(model="gpt-35-turbo", messages=messages) - print("Tokens Counted=", tokens_counted) - - response = litellm.completion(model="azure/chatgpt-v-2", messages=messages) - - prompt_tokens = response.usage.prompt_tokens - - print("Tokens from Azure API=", prompt_tokens) - - assert prompt_tokens == tokens_counted - except: - pytest.fail(f"Error occurred: {e}") - - -# test_token_counter_azure()