mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
fix(llm_cost_calc/google.py): fix google embedding cost calculation
Fixes https://github.com/BerriAI/litellm/issues/4630
This commit is contained in:
parent
db7d417727
commit
3f965df68b
6 changed files with 133 additions and 18 deletions
|
@ -15,10 +15,12 @@ from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||||
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||||
cost_per_token as google_cost_per_token,
|
cost_per_token as google_cost_per_token,
|
||||||
)
|
)
|
||||||
|
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||||
|
cost_router as google_cost_router,
|
||||||
|
)
|
||||||
from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character
|
from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character
|
||||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||||
|
|
||||||
from litellm.utils import (
|
from litellm.utils import (
|
||||||
CallTypes,
|
CallTypes,
|
||||||
CostPerToken,
|
CostPerToken,
|
||||||
|
@ -160,22 +162,32 @@ def cost_per_token(
|
||||||
|
|
||||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||||
if custom_llm_provider == "vertex_ai" and "claude" in model:
|
|
||||||
return google_cost_per_token(
|
|
||||||
model=model_without_prefix,
|
|
||||||
custom_llm_provider=custom_llm_provider,
|
|
||||||
prompt_tokens=prompt_tokens,
|
|
||||||
completion_tokens=completion_tokens,
|
|
||||||
)
|
|
||||||
if custom_llm_provider == "vertex_ai":
|
if custom_llm_provider == "vertex_ai":
|
||||||
return google_cost_per_character(
|
cost_router = google_cost_router(
|
||||||
model=model_without_prefix,
|
model=model_without_prefix,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_characters=prompt_characters,
|
prompt_characters=prompt_characters,
|
||||||
completion_characters=completion_characters,
|
completion_characters=completion_characters,
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
|
call_type=call_type,
|
||||||
)
|
)
|
||||||
|
if cost_router == "cost_per_character":
|
||||||
|
return google_cost_per_character(
|
||||||
|
model=model_without_prefix,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
prompt_characters=prompt_characters,
|
||||||
|
completion_characters=completion_characters,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
)
|
||||||
|
elif cost_router == "cost_per_token":
|
||||||
|
return google_cost_per_token(
|
||||||
|
model=model_without_prefix,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
)
|
||||||
elif custom_llm_provider == "gemini":
|
elif custom_llm_provider == "gemini":
|
||||||
return google_cost_per_token(
|
return google_cost_per_token(
|
||||||
model=model_without_prefix,
|
model=model_without_prefix,
|
||||||
|
|
|
@ -1530,9 +1530,9 @@ Model Info:
|
||||||
"""Log deployment latency"""
|
"""Log deployment latency"""
|
||||||
try:
|
try:
|
||||||
if "daily_reports" in self.alert_types:
|
if "daily_reports" in self.alert_types:
|
||||||
model_id = (
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||||
kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "")
|
model_info = litellm_params.get("model_info", {}) or {}
|
||||||
)
|
model_id = model_info.get("id", "") or ""
|
||||||
response_s: timedelta = end_time - start_time
|
response_s: timedelta = end_time - start_time
|
||||||
|
|
||||||
final_value = response_s
|
final_value = response_s
|
||||||
|
|
|
@ -1275,7 +1275,7 @@ class Logging:
|
||||||
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
|
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
|
||||||
)
|
)
|
||||||
except litellm.NotFoundError as e:
|
except litellm.NotFoundError as e:
|
||||||
verbose_logger.error(
|
verbose_logger.warning(
|
||||||
f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None"
|
f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None"
|
||||||
)
|
)
|
||||||
self.model_call_details["response_cost"] = None
|
self.model_call_details["response_cost"] = None
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# What is this?
|
# What is this?
|
||||||
## Cost calculation for Google AI Studio / Vertex AI models
|
## Cost calculation for Google AI Studio / Vertex AI models
|
||||||
import traceback
|
import traceback
|
||||||
from typing import List, Literal, Optional, Tuple
|
from typing import List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
|
@ -29,6 +29,32 @@ def _is_above_128k(tokens: float) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def cost_router(
|
||||||
|
model: str,
|
||||||
|
custom_llm_provider: str,
|
||||||
|
prompt_tokens: float,
|
||||||
|
completion_tokens: float,
|
||||||
|
prompt_characters: float,
|
||||||
|
completion_characters: float,
|
||||||
|
call_type: Union[Literal["embedding", "aembedding"], str],
|
||||||
|
) -> Literal["cost_per_character", "cost_per_token"]:
|
||||||
|
"""
|
||||||
|
Route the cost calc to the right place, based on model/call_type/etc.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
- str, the specific google cost calc function it should route to.
|
||||||
|
"""
|
||||||
|
if custom_llm_provider == "vertex_ai" and "claude" in model:
|
||||||
|
return "cost_per_token"
|
||||||
|
elif custom_llm_provider == "gemini":
|
||||||
|
return "cost_per_token"
|
||||||
|
elif custom_llm_provider == "vertex_ai" and (
|
||||||
|
call_type == "embedding" or call_type == "aembedding"
|
||||||
|
):
|
||||||
|
return "cost_per_token"
|
||||||
|
return "cost_per_character"
|
||||||
|
|
||||||
|
|
||||||
def cost_per_character(
|
def cost_per_character(
|
||||||
model: str,
|
model: str,
|
||||||
custom_llm_provider: str,
|
custom_llm_provider: str,
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: "*"
|
- model_name: azure-ai-mistral
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "openai/*"
|
api_base: os.environ/AZURE_AI_MISTRAL_API_BASE
|
||||||
- model_name: claude-3-5-sonnet-20240620
|
api_key: os.environ/AZURE_AI_MISTRAL_API_KEY
|
||||||
|
model: azure_ai/Mistral-large-nmefg
|
||||||
|
- model_name: azure-ai-phi
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: gpt-3.5-turbo
|
api_base: os.environ/AZURE_AI_PHI_API_BASE
|
||||||
|
api_key: os.environ/AZURE_AI_PHI_API_KEY
|
||||||
|
model: azure_ai/Phi-3-medium-128k-instruct-fpmvj
|
||||||
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
|
|
|
@ -712,6 +712,79 @@ def test_vertex_ai_claude_completion_cost():
|
||||||
assert cost == predicted_cost
|
assert cost == predicted_cost
|
||||||
|
|
||||||
|
|
||||||
|
def test_vertex_ai_embedding_completion_cost(caplog):
|
||||||
|
"""
|
||||||
|
Relevant issue - https://github.com/BerriAI/litellm/issues/4630
|
||||||
|
"""
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
|
text = "The quick brown fox jumps over the lazy dog."
|
||||||
|
input_tokens = litellm.token_counter(
|
||||||
|
model="vertex_ai/textembedding-gecko", text=text
|
||||||
|
)
|
||||||
|
|
||||||
|
model_info = litellm.get_model_info(model="vertex_ai/textembedding-gecko")
|
||||||
|
|
||||||
|
print("\nExpected model info:\n{}\n\n".format(model_info))
|
||||||
|
|
||||||
|
expected_input_cost = input_tokens * model_info["input_cost_per_token"]
|
||||||
|
|
||||||
|
## CALCULATED COST
|
||||||
|
calculated_input_cost, calculated_output_cost = cost_per_token(
|
||||||
|
model="textembedding-gecko",
|
||||||
|
custom_llm_provider="vertex_ai",
|
||||||
|
prompt_tokens=input_tokens,
|
||||||
|
call_type="aembedding",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert round(expected_input_cost, 6) == round(calculated_input_cost, 6)
|
||||||
|
print("expected_input_cost: {}".format(expected_input_cost))
|
||||||
|
print("calculated_input_cost: {}".format(calculated_input_cost))
|
||||||
|
|
||||||
|
captured_logs = [rec.message for rec in caplog.records]
|
||||||
|
for item in captured_logs:
|
||||||
|
print("\nitem:{}\n".format(item))
|
||||||
|
if (
|
||||||
|
"litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Exception occured "
|
||||||
|
in item
|
||||||
|
):
|
||||||
|
raise Exception("Error log raised for calculating embedding cost")
|
||||||
|
|
||||||
|
|
||||||
|
# def test_vertex_ai_embedding_completion_cost_e2e():
|
||||||
|
# """
|
||||||
|
# Relevant issue - https://github.com/BerriAI/litellm/issues/4630
|
||||||
|
# """
|
||||||
|
# from litellm.tests.test_amazing_vertex_completion import load_vertex_ai_credentials
|
||||||
|
|
||||||
|
# load_vertex_ai_credentials()
|
||||||
|
# os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
# litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
|
# text = "The quick brown fox jumps over the lazy dog."
|
||||||
|
# input_tokens = litellm.token_counter(
|
||||||
|
# model="vertex_ai/textembedding-gecko", text=text
|
||||||
|
# )
|
||||||
|
|
||||||
|
# model_info = litellm.get_model_info(model="vertex_ai/textembedding-gecko")
|
||||||
|
|
||||||
|
# print("\nExpected model info:\n{}\n\n".format(model_info))
|
||||||
|
|
||||||
|
# expected_input_cost = input_tokens * model_info["input_cost_per_token"]
|
||||||
|
|
||||||
|
# ## CALCULATED COST
|
||||||
|
# resp = litellm.embedding(model="textembedding-gecko", input=[text])
|
||||||
|
|
||||||
|
# calculated_input_cost = resp._hidden_params["response_cost"]
|
||||||
|
|
||||||
|
# assert round(expected_input_cost, 6) == round(calculated_input_cost, 6)
|
||||||
|
# print("expected_input_cost: {}".format(expected_input_cost))
|
||||||
|
# print("calculated_input_cost: {}".format(calculated_input_cost))
|
||||||
|
|
||||||
|
# assert False
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_completion_cost_hidden_params(sync_mode):
|
async def test_completion_cost_hidden_params(sync_mode):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue