mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Revert avglogprobs change + Add azure/gpt-4o-realtime-audio cost tracking (#9893)
* test: initial commit fixing gemini logprobs Fixes https://github.com/BerriAI/litellm/issues/9888 * fix(vertex_and_google_ai_studio.py): Revert avglogprobs change Fixes https://github.com/BerriAI/litellm/issues/8890 * build(model_prices_and_context_window.json): add gpt-4o-realtime-preview cost to model cost map Fixes https://github.com/BerriAI/litellm/issues/9814 * test: add cost calculation unit testing * test: fix test * test: update test
This commit is contained in:
parent
892964272f
commit
78879c68a9
9 changed files with 214 additions and 133 deletions
|
@ -748,9 +748,6 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
chat_completion_logprobs = self._transform_logprobs(
|
chat_completion_logprobs = self._transform_logprobs(
|
||||||
logprobs_result=candidate["logprobsResult"]
|
logprobs_result=candidate["logprobsResult"]
|
||||||
)
|
)
|
||||||
# Handle avgLogprobs for Gemini Flash 2.0
|
|
||||||
elif "avgLogprobs" in candidate:
|
|
||||||
chat_completion_logprobs = candidate["avgLogprobs"]
|
|
||||||
|
|
||||||
if tools:
|
if tools:
|
||||||
chat_completion_message["tool_calls"] = tools
|
chat_completion_message["tool_calls"] = tools
|
||||||
|
|
|
@ -1288,6 +1288,68 @@
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"azure/gpt-4o-realtime-preview-2024-12-17": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000005,
|
||||||
|
"input_cost_per_audio_token": 0.00004,
|
||||||
|
"cache_read_input_token_cost": 0.0000025,
|
||||||
|
"output_cost_per_token": 0.00002,
|
||||||
|
"output_cost_per_audio_token": 0.00008,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_modalities": ["text", "audio"],
|
||||||
|
"supported_output_modalities": ["text", "audio"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_audio_input": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
|
"azure/us/gpt-4o-realtime-preview-2024-12-17": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 5.5e-6,
|
||||||
|
"input_cost_per_audio_token": 44e-6,
|
||||||
|
"cache_read_input_token_cost": 2.75e-6,
|
||||||
|
"cache_read_input_audio_token_cost": 2.5e-6,
|
||||||
|
"output_cost_per_token": 22e-6,
|
||||||
|
"output_cost_per_audio_token": 80e-6,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_modalities": ["text", "audio"],
|
||||||
|
"supported_output_modalities": ["text", "audio"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_audio_input": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
|
"azure/eu/gpt-4o-realtime-preview-2024-12-17": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 5.5e-6,
|
||||||
|
"input_cost_per_audio_token": 44e-6,
|
||||||
|
"cache_read_input_token_cost": 2.75e-6,
|
||||||
|
"cache_read_input_audio_token_cost": 2.5e-6,
|
||||||
|
"output_cost_per_token": 22e-6,
|
||||||
|
"output_cost_per_audio_token": 80e-6,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_modalities": ["text", "audio"],
|
||||||
|
"supported_output_modalities": ["text", "audio"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_audio_input": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"azure/gpt-4o-realtime-preview-2024-10-01": {
|
"azure/gpt-4o-realtime-preview-2024-10-01": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
|
|
|
@ -724,7 +724,7 @@ class Choices(OpenAIObject):
|
||||||
finish_reason=None,
|
finish_reason=None,
|
||||||
index=0,
|
index=0,
|
||||||
message: Optional[Union[Message, dict]] = None,
|
message: Optional[Union[Message, dict]] = None,
|
||||||
logprobs=None,
|
logprobs: Optional[Union[ChoiceLogprobs, dict, Any]] = None,
|
||||||
enhancements=None,
|
enhancements=None,
|
||||||
**params,
|
**params,
|
||||||
):
|
):
|
||||||
|
|
|
@ -1288,6 +1288,68 @@
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"azure/gpt-4o-realtime-preview-2024-12-17": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000005,
|
||||||
|
"input_cost_per_audio_token": 0.00004,
|
||||||
|
"cache_read_input_token_cost": 0.0000025,
|
||||||
|
"output_cost_per_token": 0.00002,
|
||||||
|
"output_cost_per_audio_token": 0.00008,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_modalities": ["text", "audio"],
|
||||||
|
"supported_output_modalities": ["text", "audio"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_audio_input": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
|
"azure/us/gpt-4o-realtime-preview-2024-12-17": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 5.5e-6,
|
||||||
|
"input_cost_per_audio_token": 44e-6,
|
||||||
|
"cache_read_input_token_cost": 2.75e-6,
|
||||||
|
"cache_read_input_audio_token_cost": 2.5e-6,
|
||||||
|
"output_cost_per_token": 22e-6,
|
||||||
|
"output_cost_per_audio_token": 80e-6,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_modalities": ["text", "audio"],
|
||||||
|
"supported_output_modalities": ["text", "audio"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_audio_input": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
|
"azure/eu/gpt-4o-realtime-preview-2024-12-17": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 5.5e-6,
|
||||||
|
"input_cost_per_audio_token": 44e-6,
|
||||||
|
"cache_read_input_token_cost": 2.75e-6,
|
||||||
|
"cache_read_input_audio_token_cost": 2.5e-6,
|
||||||
|
"output_cost_per_token": 22e-6,
|
||||||
|
"output_cost_per_audio_token": 80e-6,
|
||||||
|
"litellm_provider": "azure",
|
||||||
|
"mode": "chat",
|
||||||
|
"supported_modalities": ["text", "audio"],
|
||||||
|
"supported_output_modalities": ["text", "audio"],
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_audio_input": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"azure/gpt-4o-realtime-preview-2024-10-01": {
|
"azure/gpt-4o-realtime-preview-2024-10-01": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
|
|
|
@ -1,68 +0,0 @@
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
sys.path.insert(
|
|
||||||
0, os.path.abspath("../../../../..")
|
|
||||||
) # Adds the parent directory to the system path
|
|
||||||
|
|
||||||
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
|
||||||
VertexGeminiConfig,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_top_logprobs():
|
|
||||||
non_default_params = {
|
|
||||||
"top_logprobs": 2,
|
|
||||||
"logprobs": True,
|
|
||||||
}
|
|
||||||
optional_params = {}
|
|
||||||
model = "gemini"
|
|
||||||
|
|
||||||
v = VertexGeminiConfig().map_openai_params(
|
|
||||||
non_default_params=non_default_params,
|
|
||||||
optional_params=optional_params,
|
|
||||||
model=model,
|
|
||||||
drop_params=False,
|
|
||||||
)
|
|
||||||
assert v["responseLogprobs"] is non_default_params["logprobs"]
|
|
||||||
assert v["logprobs"] is non_default_params["top_logprobs"]
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_model_for_vertex_ai_url():
|
|
||||||
# Test case 1: Regular model name
|
|
||||||
model = "gemini-pro"
|
|
||||||
result = VertexGeminiConfig.get_model_for_vertex_ai_url(model)
|
|
||||||
assert result == "gemini-pro"
|
|
||||||
|
|
||||||
# Test case 2: Gemini spec model with UUID
|
|
||||||
model = "gemini/ft-uuid-123"
|
|
||||||
result = VertexGeminiConfig.get_model_for_vertex_ai_url(model)
|
|
||||||
assert result == "ft-uuid-123"
|
|
||||||
|
|
||||||
|
|
||||||
def test_is_model_gemini_spec_model():
|
|
||||||
# Test case 1: None input
|
|
||||||
assert VertexGeminiConfig._is_model_gemini_spec_model(None) == False
|
|
||||||
|
|
||||||
# Test case 2: Regular model name
|
|
||||||
assert VertexGeminiConfig._is_model_gemini_spec_model("gemini-pro") == False
|
|
||||||
|
|
||||||
# Test case 3: Gemini spec model
|
|
||||||
assert VertexGeminiConfig._is_model_gemini_spec_model("gemini/custom-model") == True
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_model_name_from_gemini_spec_model():
|
|
||||||
# Test case 1: Regular model name
|
|
||||||
model = "gemini-pro"
|
|
||||||
result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model)
|
|
||||||
assert result == "gemini-pro"
|
|
||||||
|
|
||||||
# Test case 2: Gemini spec model
|
|
||||||
model = "gemini/ft-uuid-123"
|
|
||||||
result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model)
|
|
||||||
assert result == "ft-uuid-123"
|
|
|
@ -1,66 +1,64 @@
|
||||||
import pytest
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexGeminiConfig
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import ModelResponse
|
from litellm import ModelResponse
|
||||||
|
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
||||||
|
VertexGeminiConfig,
|
||||||
|
)
|
||||||
|
from litellm.types.utils import ChoiceLogprobs
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_transform_response_with_avglogprobs():
|
def test_top_logprobs():
|
||||||
"""
|
non_default_params = {
|
||||||
Test that the transform_response method correctly handles the avgLogprobs key
|
"top_logprobs": 2,
|
||||||
from Gemini Flash 2.0 responses.
|
"logprobs": True,
|
||||||
"""
|
|
||||||
# Create a mock response with avgLogprobs
|
|
||||||
response_json = {
|
|
||||||
"candidates": [{
|
|
||||||
"content": {"parts": [{"text": "Test response"}], "role": "model"},
|
|
||||||
"finishReason": "STOP",
|
|
||||||
"avgLogprobs": -0.3445799010140555
|
|
||||||
}],
|
|
||||||
"usageMetadata": {
|
|
||||||
"promptTokenCount": 10,
|
|
||||||
"candidatesTokenCount": 5,
|
|
||||||
"totalTokenCount": 15
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
optional_params = {}
|
||||||
# Create a mock HTTP response
|
model = "gemini"
|
||||||
mock_response = MagicMock()
|
|
||||||
mock_response.json.return_value = response_json
|
v = VertexGeminiConfig().map_openai_params(
|
||||||
|
non_default_params=non_default_params,
|
||||||
# Create a mock logging object
|
optional_params=optional_params,
|
||||||
mock_logging = MagicMock()
|
model=model,
|
||||||
|
drop_params=False,
|
||||||
# Create an instance of VertexGeminiConfig
|
|
||||||
config = VertexGeminiConfig()
|
|
||||||
|
|
||||||
# Create a ModelResponse object
|
|
||||||
model_response = ModelResponse(
|
|
||||||
id="test-id",
|
|
||||||
choices=[],
|
|
||||||
created=1234567890,
|
|
||||||
model="gemini-2.0-flash",
|
|
||||||
usage={
|
|
||||||
"prompt_tokens": 10,
|
|
||||||
"completion_tokens": 5,
|
|
||||||
"total_tokens": 15
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
assert v["responseLogprobs"] is non_default_params["logprobs"]
|
||||||
# Call the transform_response method
|
assert v["logprobs"] is non_default_params["top_logprobs"]
|
||||||
transformed_response = config.transform_response(
|
|
||||||
model="gemini-2.0-flash",
|
|
||||||
raw_response=mock_response,
|
def test_get_model_for_vertex_ai_url():
|
||||||
model_response=model_response,
|
# Test case 1: Regular model name
|
||||||
logging_obj=mock_logging,
|
model = "gemini-pro"
|
||||||
request_data={},
|
result = VertexGeminiConfig.get_model_for_vertex_ai_url(model)
|
||||||
messages=[],
|
assert result == "gemini-pro"
|
||||||
optional_params={},
|
|
||||||
litellm_params={},
|
# Test case 2: Gemini spec model with UUID
|
||||||
encoding=None
|
model = "gemini/ft-uuid-123"
|
||||||
)
|
result = VertexGeminiConfig.get_model_for_vertex_ai_url(model)
|
||||||
|
assert result == "ft-uuid-123"
|
||||||
# Assert that the avgLogprobs was correctly added to the model response
|
|
||||||
assert len(transformed_response.choices) == 1
|
|
||||||
assert transformed_response.choices[0].logprobs == -0.3445799010140555
|
def test_is_model_gemini_spec_model():
|
||||||
|
# Test case 1: None input
|
||||||
|
assert VertexGeminiConfig._is_model_gemini_spec_model(None) == False
|
||||||
|
|
||||||
|
# Test case 2: Regular model name
|
||||||
|
assert VertexGeminiConfig._is_model_gemini_spec_model("gemini-pro") == False
|
||||||
|
|
||||||
|
# Test case 3: Gemini spec model
|
||||||
|
assert VertexGeminiConfig._is_model_gemini_spec_model("gemini/custom-model") == True
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_model_name_from_gemini_spec_model():
|
||||||
|
# Test case 1: Regular model name
|
||||||
|
model = "gemini-pro"
|
||||||
|
result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model)
|
||||||
|
assert result == "gemini-pro"
|
||||||
|
|
||||||
|
# Test case 2: Gemini spec model
|
||||||
|
model = "gemini/ft-uuid-123"
|
||||||
|
result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model)
|
||||||
|
assert result == "ft-uuid-123"
|
||||||
|
|
|
@ -211,3 +211,30 @@ def test_custom_pricing_with_router_model_id():
|
||||||
assert model_info["output_cost_per_token"] == 0.00003
|
assert model_info["output_cost_per_token"] == 0.00003
|
||||||
assert model_info["cache_creation_input_token_cost"] == 0.0000075
|
assert model_info["cache_creation_input_token_cost"] == 0.0000075
|
||||||
assert model_info["cache_read_input_token_cost"] == 0.0000006
|
assert model_info["cache_read_input_token_cost"] == 0.0000006
|
||||||
|
|
||||||
|
|
||||||
|
def test_azure_realtime_cost_calculator():
|
||||||
|
from litellm import get_model_info
|
||||||
|
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
|
cost = handle_realtime_stream_cost_calculation(
|
||||||
|
results=[
|
||||||
|
{
|
||||||
|
"type": "session.created",
|
||||||
|
"session": {"model": "gpt-4o-realtime-preview-2024-12-17"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
combined_usage_object=Usage(
|
||||||
|
prompt_tokens=100,
|
||||||
|
completion_tokens=100,
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
text_tokens=10, audio_tokens=90
|
||||||
|
),
|
||||||
|
),
|
||||||
|
custom_llm_provider="azure",
|
||||||
|
litellm_model_name="my-custom-azure-deployment",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert cost > 0
|
||||||
|
|
|
@ -82,4 +82,6 @@ def test_gemini_image_generation():
|
||||||
messages=[{"role": "user", "content": "Generate an image of a cat"}],
|
messages=[{"role": "user", "content": "Generate an image of a cat"}],
|
||||||
modalities=["image", "text"],
|
modalities=["image", "text"],
|
||||||
)
|
)
|
||||||
assert response.choices[0].message.content is not None
|
assert response.choices[0].message.content is not None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -425,6 +425,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
|
||||||
"cache_creation_input_audio_token_cost": {"type": "number"},
|
"cache_creation_input_audio_token_cost": {"type": "number"},
|
||||||
"cache_creation_input_token_cost": {"type": "number"},
|
"cache_creation_input_token_cost": {"type": "number"},
|
||||||
"cache_read_input_token_cost": {"type": "number"},
|
"cache_read_input_token_cost": {"type": "number"},
|
||||||
|
"cache_read_input_audio_token_cost": {"type": "number"},
|
||||||
"deprecation_date": {"type": "string"},
|
"deprecation_date": {"type": "string"},
|
||||||
"input_cost_per_audio_per_second": {"type": "number"},
|
"input_cost_per_audio_per_second": {"type": "number"},
|
||||||
"input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"},
|
"input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"},
|
||||||
|
@ -549,7 +550,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": ["text", "image"],
|
"enum": ["text", "image", "audio"],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"supports_native_streaming": {"type": "boolean"},
|
"supports_native_streaming": {"type": "boolean"},
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue