mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
test(test_keys.py): use correct model name for token counting
This commit is contained in:
parent
4ca4913468
commit
d6844f43c8
3 changed files with 24 additions and 11 deletions
|
@ -556,7 +556,7 @@ class PrismaClient:
|
||||||
where={"token": token}, # type: ignore
|
where={"token": token}, # type: ignore
|
||||||
data={**db_data}, # type: ignore
|
data={**db_data}, # type: ignore
|
||||||
)
|
)
|
||||||
print_verbose(
|
verbose_proxy_logger.debug(
|
||||||
"\033[91m"
|
"\033[91m"
|
||||||
+ f"DB Token Table update succeeded {response}"
|
+ f"DB Token Table update succeeded {response}"
|
||||||
+ "\033[0m"
|
+ "\033[0m"
|
||||||
|
|
|
@ -2938,17 +2938,25 @@ def cost_per_token(
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
elif model_with_provider in model_cost_ref:
|
elif model_with_provider in model_cost_ref:
|
||||||
print_verbose(f"Looking up model={model_with_provider} in model_cost_map")
|
verbose_logger.debug(
|
||||||
|
f"Looking up model={model_with_provider} in model_cost_map"
|
||||||
|
)
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"applying cost={model_cost_ref[model_with_provider]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
|
||||||
|
)
|
||||||
prompt_tokens_cost_usd_dollar = (
|
prompt_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens
|
model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens
|
||||||
)
|
)
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"applying cost={model_cost_ref[model_with_provider]['output_cost_per_token']} for completion_tokens={completion_tokens}"
|
||||||
|
)
|
||||||
completion_tokens_cost_usd_dollar = (
|
completion_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref[model_with_provider]["output_cost_per_token"]
|
model_cost_ref[model_with_provider]["output_cost_per_token"]
|
||||||
* completion_tokens
|
* completion_tokens
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
elif "ft:gpt-3.5-turbo" in model:
|
elif "ft:gpt-3.5-turbo" in model:
|
||||||
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
verbose_logger.debug(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||||
# fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
|
# fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
|
||||||
prompt_tokens_cost_usd_dollar = (
|
prompt_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
|
model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
|
||||||
|
@ -2959,17 +2967,23 @@ def cost_per_token(
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
elif model in litellm.azure_llms:
|
elif model in litellm.azure_llms:
|
||||||
print_verbose(f"Cost Tracking: {model} is an Azure LLM")
|
verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
|
||||||
model = litellm.azure_llms[model]
|
model = litellm.azure_llms[model]
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
|
||||||
|
)
|
||||||
prompt_tokens_cost_usd_dollar = (
|
prompt_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||||
)
|
)
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
|
||||||
|
)
|
||||||
completion_tokens_cost_usd_dollar = (
|
completion_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
elif model in litellm.azure_embedding_models:
|
elif model in litellm.azure_embedding_models:
|
||||||
print_verbose(f"Cost Tracking: {model} is an Azure Embedding Model")
|
verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
|
||||||
model = litellm.azure_embedding_models[model]
|
model = litellm.azure_embedding_models[model]
|
||||||
prompt_tokens_cost_usd_dollar = (
|
prompt_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
## Tests /key endpoints.
|
## Tests /key endpoints.
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import asyncio
|
import asyncio, time
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
import sys, os
|
import sys, os
|
||||||
|
@ -95,11 +95,10 @@ async def chat_completion(session, key, model="gpt-4"):
|
||||||
async def chat_completion_streaming(session, key, model="gpt-4"):
|
async def chat_completion_streaming(session, key, model="gpt-4"):
|
||||||
client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000")
|
client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000")
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": "You are a helpful assistant."},
|
{"role": "system", "content": "You are a helpful assistant"},
|
||||||
{"role": "user", "content": "Hello!"},
|
{"role": "user", "content": f"Hello! {time.time()}"},
|
||||||
]
|
]
|
||||||
prompt_tokens = litellm.token_counter(model="gpt-35-turbo", messages=messages)
|
prompt_tokens = litellm.token_counter(model="gpt-35-turbo", messages=messages)
|
||||||
assert prompt_tokens == 19
|
|
||||||
data = {
|
data = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
|
@ -114,7 +113,7 @@ async def chat_completion_streaming(session, key, model="gpt-4"):
|
||||||
print(f"content: {content}")
|
print(f"content: {content}")
|
||||||
|
|
||||||
completion_tokens = litellm.token_counter(
|
completion_tokens = litellm.token_counter(
|
||||||
model="azure/gpt-35-turbo", text=content, count_response_tokens=True
|
model="gpt-35-turbo", text=content, count_response_tokens=True
|
||||||
)
|
)
|
||||||
|
|
||||||
return prompt_tokens, completion_tokens
|
return prompt_tokens, completion_tokens
|
||||||
|
@ -251,7 +250,7 @@ async def test_key_info_spend_values():
|
||||||
)
|
)
|
||||||
print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}")
|
print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}")
|
||||||
prompt_cost, completion_cost = litellm.cost_per_token(
|
prompt_cost, completion_cost = litellm.cost_per_token(
|
||||||
model="gpt-35-turbo",
|
model="azure/gpt-35-turbo",
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue