test(test_keys.py): fix streaming test

This commit is contained in:
Krrish Dholakia 2024-01-23 16:27:45 -08:00
parent 7e0adbb9bd
commit f47db44b4f

View file

@ -98,6 +98,8 @@ async def chat_completion_streaming(session, key, model="gpt-4"):
{"role": "system", "content": "You are a helpful assistant."}, {"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"}, {"role": "user", "content": "Hello!"},
] ]
prompt_tokens = litellm.token_counter(model="gpt-35-turbo", messages=messages)
assert prompt_tokens == 19
data = { data = {
"model": model, "model": model,
"messages": messages, "messages": messages,
@ -110,7 +112,7 @@ async def chat_completion_streaming(session, key, model="gpt-4"):
content += chunk.choices[0].delta.content or "" content += chunk.choices[0].delta.content or ""
print(f"content: {content}") print(f"content: {content}")
prompt_tokens = litellm.token_counter(model="azure/gpt-35-turbo", messages=messages)
completion_tokens = litellm.token_counter( completion_tokens = litellm.token_counter(
model="azure/gpt-35-turbo", text=content, count_response_tokens=True model="azure/gpt-35-turbo", text=content, count_response_tokens=True
) )
@ -249,7 +251,7 @@ async def test_key_info_spend_values():
) )
print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}") print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}")
prompt_cost, completion_cost = litellm.cost_per_token( prompt_cost, completion_cost = litellm.cost_per_token(
model="azure/gpt-35-turbo", model="gpt-35-turbo",
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens, completion_tokens=completion_tokens,
) )