litellm-mirror/tests/litellm/test_cost_calculator.py
Krish Dholakia 4351c77253
Support Gemini audio token cost tracking + fix openai audio input token cost tracking (#9535)
* fix(vertex_and_google_ai_studio_gemini.py): log gemini audio tokens in usage object

enables accurate cost tracking

* refactor(vertex_ai/cost_calculator.py): refactor 128k+ token cost calculation to only run if model info has it

Google has moved away from this for gemini-2.0 models

* refactor(vertex_ai/cost_calculator.py): migrate to usage object for more flexible data passthrough

* fix(llm_cost_calc/utils.py): support audio token cost tracking in generic cost per token

enables vertex ai cost tracking to work with audio tokens

* fix(llm_cost_calc/utils.py): default to total prompt tokens if text tokens field not set

* refactor(llm_cost_calc/utils.py): move openai cost tracking to generic cost per token

more consistent behaviour across providers

* test: add unit test for gemini audio token cost calculation

* ci: bump ci config

* test: fix test
2025-03-26 17:26:25 -07:00

73 lines
2 KiB
Python

import json
import os
import sys
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from unittest.mock import MagicMock, patch
from pydantic import BaseModel
import litellm
from litellm.cost_calculator import response_cost_calculator
from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage
def test_cost_calculator_with_response_cost_in_additional_headers():
class MockResponse(BaseModel):
_hidden_params = {
"additional_headers": {"llm_provider-x-litellm-response-cost": 1000}
}
result = response_cost_calculator(
response_object=MockResponse(),
model="",
custom_llm_provider=None,
call_type="",
optional_params={},
cache_hit=None,
base_model=None,
)
assert result == 1000
def test_cost_calculator_with_usage():
from litellm import get_model_info
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
usage = Usage(
prompt_tokens=100,
completion_tokens=100,
prompt_tokens_details=PromptTokensDetailsWrapper(
text_tokens=10, audio_tokens=90
),
)
mr = ModelResponse(usage=usage, model="gemini-2.0-flash-001")
result = response_cost_calculator(
response_object=mr,
model="",
custom_llm_provider="vertex_ai",
call_type="acompletion",
optional_params={},
cache_hit=None,
base_model=None,
)
model_info = litellm.model_cost["gemini-2.0-flash-001"]
expected_cost = (
usage.prompt_tokens_details.audio_tokens
* model_info["input_cost_per_audio_token"]
+ usage.prompt_tokens_details.text_tokens * model_info["input_cost_per_token"]
+ usage.completion_tokens * model_info["output_cost_per_token"]
)
assert result == expected_cost, f"Got {result}, Expected {expected_cost}"