diff --git a/tests/test_keys.py b/tests/test_keys.py index f06b6721e..a0bf7387d 100644 --- a/tests/test_keys.py +++ b/tests/test_keys.py @@ -98,6 +98,8 @@ async def chat_completion_streaming(session, key, model="gpt-4"): {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"}, ] + prompt_tokens = litellm.token_counter(model="gpt-35-turbo", messages=messages) + assert prompt_tokens == 19 data = { "model": model, "messages": messages, @@ -110,7 +112,7 @@ async def chat_completion_streaming(session, key, model="gpt-4"): content += chunk.choices[0].delta.content or "" print(f"content: {content}") - prompt_tokens = litellm.token_counter(model="azure/gpt-35-turbo", messages=messages) + completion_tokens = litellm.token_counter( model="azure/gpt-35-turbo", text=content, count_response_tokens=True ) @@ -249,7 +251,7 @@ async def test_key_info_spend_values(): ) print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}") prompt_cost, completion_cost = litellm.cost_per_token( - model="azure/gpt-35-turbo", + model="gpt-35-turbo", prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, )