Revert avglogprobs change + Add azure/gpt-4o-realtime-audio cost tracking (#9893)

* test: initial commit fixing gemini logprobs

Fixes https://github.com/BerriAI/litellm/issues/9888

* fix(vertex_and_google_ai_studio.py): Revert avglogprobs change

Fixes https://github.com/BerriAI/litellm/issues/8890

* build(model_prices_and_context_window.json): add gpt-4o-realtime-preview cost to model cost map

Fixes https://github.com/BerriAI/litellm/issues/9814

* test: add cost calculation unit testing

* test: fix test

* test: update test
This commit is contained in:
Krish Dholakia 2025-04-10 21:23:55 -07:00 committed by GitHub
parent 892964272f
commit 78879c68a9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 214 additions and 133 deletions

View file

@ -748,9 +748,6 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
chat_completion_logprobs = self._transform_logprobs(
logprobs_result=candidate["logprobsResult"]
)
# Handle avgLogprobs for Gemini Flash 2.0
elif "avgLogprobs" in candidate:
chat_completion_logprobs = candidate["avgLogprobs"]
if tools:
chat_completion_message["tool_calls"] = tools

View file

@ -1288,6 +1288,68 @@
"supports_system_messages": true,
"supports_tool_choice": true
},
"azure/gpt-4o-realtime-preview-2024-12-17": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"input_cost_per_audio_token": 0.00004,
"cache_read_input_token_cost": 0.0000025,
"output_cost_per_token": 0.00002,
"output_cost_per_audio_token": 0.00008,
"litellm_provider": "azure",
"mode": "chat",
"supported_modalities": ["text", "audio"],
"supported_output_modalities": ["text", "audio"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true,
"supports_system_messages": true,
"supports_tool_choice": true
},
"azure/us/gpt-4o-realtime-preview-2024-12-17": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 5.5e-6,
"input_cost_per_audio_token": 44e-6,
"cache_read_input_token_cost": 2.75e-6,
"cache_read_input_audio_token_cost": 2.5e-6,
"output_cost_per_token": 22e-6,
"output_cost_per_audio_token": 80e-6,
"litellm_provider": "azure",
"mode": "chat",
"supported_modalities": ["text", "audio"],
"supported_output_modalities": ["text", "audio"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true,
"supports_system_messages": true,
"supports_tool_choice": true
},
"azure/eu/gpt-4o-realtime-preview-2024-12-17": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 5.5e-6,
"input_cost_per_audio_token": 44e-6,
"cache_read_input_token_cost": 2.75e-6,
"cache_read_input_audio_token_cost": 2.5e-6,
"output_cost_per_token": 22e-6,
"output_cost_per_audio_token": 80e-6,
"litellm_provider": "azure",
"mode": "chat",
"supported_modalities": ["text", "audio"],
"supported_output_modalities": ["text", "audio"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true,
"supports_system_messages": true,
"supports_tool_choice": true
},
"azure/gpt-4o-realtime-preview-2024-10-01": {
"max_tokens": 4096,
"max_input_tokens": 128000,

View file

@ -724,7 +724,7 @@ class Choices(OpenAIObject):
finish_reason=None,
index=0,
message: Optional[Union[Message, dict]] = None,
logprobs=None,
logprobs: Optional[Union[ChoiceLogprobs, dict, Any]] = None,
enhancements=None,
**params,
):

View file

@ -1288,6 +1288,68 @@
"supports_system_messages": true,
"supports_tool_choice": true
},
"azure/gpt-4o-realtime-preview-2024-12-17": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"input_cost_per_audio_token": 0.00004,
"cache_read_input_token_cost": 0.0000025,
"output_cost_per_token": 0.00002,
"output_cost_per_audio_token": 0.00008,
"litellm_provider": "azure",
"mode": "chat",
"supported_modalities": ["text", "audio"],
"supported_output_modalities": ["text", "audio"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true,
"supports_system_messages": true,
"supports_tool_choice": true
},
"azure/us/gpt-4o-realtime-preview-2024-12-17": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 5.5e-6,
"input_cost_per_audio_token": 44e-6,
"cache_read_input_token_cost": 2.75e-6,
"cache_read_input_audio_token_cost": 2.5e-6,
"output_cost_per_token": 22e-6,
"output_cost_per_audio_token": 80e-6,
"litellm_provider": "azure",
"mode": "chat",
"supported_modalities": ["text", "audio"],
"supported_output_modalities": ["text", "audio"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true,
"supports_system_messages": true,
"supports_tool_choice": true
},
"azure/eu/gpt-4o-realtime-preview-2024-12-17": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 5.5e-6,
"input_cost_per_audio_token": 44e-6,
"cache_read_input_token_cost": 2.75e-6,
"cache_read_input_audio_token_cost": 2.5e-6,
"output_cost_per_token": 22e-6,
"output_cost_per_audio_token": 80e-6,
"litellm_provider": "azure",
"mode": "chat",
"supported_modalities": ["text", "audio"],
"supported_output_modalities": ["text", "audio"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_audio_input": true,
"supports_audio_output": true,
"supports_system_messages": true,
"supports_tool_choice": true
},
"azure/gpt-4o-realtime-preview-2024-10-01": {
"max_tokens": 4096,
"max_input_tokens": 128000,

View file

@ -1,68 +0,0 @@
import json
import os
import sys
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
sys.path.insert(
0, os.path.abspath("../../../../..")
) # Adds the parent directory to the system path
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
VertexGeminiConfig,
)
def test_top_logprobs():
non_default_params = {
"top_logprobs": 2,
"logprobs": True,
}
optional_params = {}
model = "gemini"
v = VertexGeminiConfig().map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
model=model,
drop_params=False,
)
assert v["responseLogprobs"] is non_default_params["logprobs"]
assert v["logprobs"] is non_default_params["top_logprobs"]
def test_get_model_for_vertex_ai_url():
# Test case 1: Regular model name
model = "gemini-pro"
result = VertexGeminiConfig.get_model_for_vertex_ai_url(model)
assert result == "gemini-pro"
# Test case 2: Gemini spec model with UUID
model = "gemini/ft-uuid-123"
result = VertexGeminiConfig.get_model_for_vertex_ai_url(model)
assert result == "ft-uuid-123"
def test_is_model_gemini_spec_model():
# Test case 1: None input
assert VertexGeminiConfig._is_model_gemini_spec_model(None) == False
# Test case 2: Regular model name
assert VertexGeminiConfig._is_model_gemini_spec_model("gemini-pro") == False
# Test case 3: Gemini spec model
assert VertexGeminiConfig._is_model_gemini_spec_model("gemini/custom-model") == True
def test_get_model_name_from_gemini_spec_model():
# Test case 1: Regular model name
model = "gemini-pro"
result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model)
assert result == "gemini-pro"
# Test case 2: Gemini spec model
model = "gemini/ft-uuid-123"
result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model)
assert result == "ft-uuid-123"

View file

@ -1,66 +1,64 @@
import pytest
import asyncio
from unittest.mock import MagicMock
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexGeminiConfig
import pytest
import litellm
from litellm import ModelResponse
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
VertexGeminiConfig,
)
from litellm.types.utils import ChoiceLogprobs
@pytest.mark.asyncio
async def test_transform_response_with_avglogprobs():
"""
Test that the transform_response method correctly handles the avgLogprobs key
from Gemini Flash 2.0 responses.
"""
# Create a mock response with avgLogprobs
response_json = {
"candidates": [{
"content": {"parts": [{"text": "Test response"}], "role": "model"},
"finishReason": "STOP",
"avgLogprobs": -0.3445799010140555
}],
"usageMetadata": {
"promptTokenCount": 10,
"candidatesTokenCount": 5,
"totalTokenCount": 15
}
def test_top_logprobs():
non_default_params = {
"top_logprobs": 2,
"logprobs": True,
}
# Create a mock HTTP response
mock_response = MagicMock()
mock_response.json.return_value = response_json
# Create a mock logging object
mock_logging = MagicMock()
# Create an instance of VertexGeminiConfig
config = VertexGeminiConfig()
# Create a ModelResponse object
model_response = ModelResponse(
id="test-id",
choices=[],
created=1234567890,
model="gemini-2.0-flash",
usage={
"prompt_tokens": 10,
"completion_tokens": 5,
"total_tokens": 15
}
optional_params = {}
model = "gemini"
v = VertexGeminiConfig().map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
model=model,
drop_params=False,
)
# Call the transform_response method
transformed_response = config.transform_response(
model="gemini-2.0-flash",
raw_response=mock_response,
model_response=model_response,
logging_obj=mock_logging,
request_data={},
messages=[],
optional_params={},
litellm_params={},
encoding=None
)
# Assert that the avgLogprobs was correctly added to the model response
assert len(transformed_response.choices) == 1
assert transformed_response.choices[0].logprobs == -0.3445799010140555
assert v["responseLogprobs"] is non_default_params["logprobs"]
assert v["logprobs"] is non_default_params["top_logprobs"]
def test_get_model_for_vertex_ai_url():
# Test case 1: Regular model name
model = "gemini-pro"
result = VertexGeminiConfig.get_model_for_vertex_ai_url(model)
assert result == "gemini-pro"
# Test case 2: Gemini spec model with UUID
model = "gemini/ft-uuid-123"
result = VertexGeminiConfig.get_model_for_vertex_ai_url(model)
assert result == "ft-uuid-123"
def test_is_model_gemini_spec_model():
# Test case 1: None input
assert VertexGeminiConfig._is_model_gemini_spec_model(None) == False
# Test case 2: Regular model name
assert VertexGeminiConfig._is_model_gemini_spec_model("gemini-pro") == False
# Test case 3: Gemini spec model
assert VertexGeminiConfig._is_model_gemini_spec_model("gemini/custom-model") == True
def test_get_model_name_from_gemini_spec_model():
# Test case 1: Regular model name
model = "gemini-pro"
result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model)
assert result == "gemini-pro"
# Test case 2: Gemini spec model
model = "gemini/ft-uuid-123"
result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model)
assert result == "ft-uuid-123"

View file

@ -211,3 +211,30 @@ def test_custom_pricing_with_router_model_id():
assert model_info["output_cost_per_token"] == 0.00003
assert model_info["cache_creation_input_token_cost"] == 0.0000075
assert model_info["cache_read_input_token_cost"] == 0.0000006
def test_azure_realtime_cost_calculator():
from litellm import get_model_info
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
cost = handle_realtime_stream_cost_calculation(
results=[
{
"type": "session.created",
"session": {"model": "gpt-4o-realtime-preview-2024-12-17"},
},
],
combined_usage_object=Usage(
prompt_tokens=100,
completion_tokens=100,
prompt_tokens_details=PromptTokensDetailsWrapper(
text_tokens=10, audio_tokens=90
),
),
custom_llm_provider="azure",
litellm_model_name="my-custom-azure-deployment",
)
assert cost > 0

View file

@ -82,4 +82,6 @@ def test_gemini_image_generation():
messages=[{"role": "user", "content": "Generate an image of a cat"}],
modalities=["image", "text"],
)
assert response.choices[0].message.content is not None
assert response.choices[0].message.content is not None

View file

@ -425,6 +425,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
"cache_creation_input_audio_token_cost": {"type": "number"},
"cache_creation_input_token_cost": {"type": "number"},
"cache_read_input_token_cost": {"type": "number"},
"cache_read_input_audio_token_cost": {"type": "number"},
"deprecation_date": {"type": "string"},
"input_cost_per_audio_per_second": {"type": "number"},
"input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"},
@ -549,7 +550,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
"type": "array",
"items": {
"type": "string",
"enum": ["text", "image"],
"enum": ["text", "image", "audio"],
},
},
"supports_native_streaming": {"type": "boolean"},