diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index b3b7857ea1..c57090093b 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -748,9 +748,6 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): chat_completion_logprobs = self._transform_logprobs( logprobs_result=candidate["logprobsResult"] ) - # Handle avgLogprobs for Gemini Flash 2.0 - elif "avgLogprobs" in candidate: - chat_completion_logprobs = candidate["avgLogprobs"] if tools: chat_completion_message["tool_calls"] = tools diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index c67c3b85af..e7485f7378 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1288,6 +1288,68 @@ "supports_system_messages": true, "supports_tool_choice": true }, + "azure/gpt-4o-realtime-preview-2024-12-17": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "input_cost_per_audio_token": 0.00004, + "cache_read_input_token_cost": 0.0000025, + "output_cost_per_token": 0.00002, + "output_cost_per_audio_token": 0.00008, + "litellm_provider": "azure", + "mode": "chat", + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/us/gpt-4o-realtime-preview-2024-12-17": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 5.5e-6, + "input_cost_per_audio_token": 44e-6, + "cache_read_input_token_cost": 2.75e-6, + "cache_read_input_audio_token_cost": 2.5e-6, + "output_cost_per_token": 22e-6, + "output_cost_per_audio_token": 80e-6, + "litellm_provider": "azure", + "mode": "chat", + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/eu/gpt-4o-realtime-preview-2024-12-17": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 5.5e-6, + "input_cost_per_audio_token": 44e-6, + "cache_read_input_token_cost": 2.75e-6, + "cache_read_input_audio_token_cost": 2.5e-6, + "output_cost_per_token": 22e-6, + "output_cost_per_audio_token": 80e-6, + "litellm_provider": "azure", + "mode": "chat", + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, "azure/gpt-4o-realtime-preview-2024-10-01": { "max_tokens": 4096, "max_input_tokens": 128000, diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 475aa8aa6a..6ca7b33a31 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -724,7 +724,7 @@ class Choices(OpenAIObject): finish_reason=None, index=0, message: Optional[Union[Message, dict]] = None, - logprobs=None, + logprobs: Optional[Union[ChoiceLogprobs, dict, Any]] = None, enhancements=None, **params, ): diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index c67c3b85af..e7485f7378 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1288,6 +1288,68 @@ "supports_system_messages": true, "supports_tool_choice": true }, + "azure/gpt-4o-realtime-preview-2024-12-17": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000005, + "input_cost_per_audio_token": 0.00004, + "cache_read_input_token_cost": 0.0000025, + "output_cost_per_token": 0.00002, + "output_cost_per_audio_token": 0.00008, + "litellm_provider": "azure", + "mode": "chat", + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/us/gpt-4o-realtime-preview-2024-12-17": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 5.5e-6, + "input_cost_per_audio_token": 44e-6, + "cache_read_input_token_cost": 2.75e-6, + "cache_read_input_audio_token_cost": 2.5e-6, + "output_cost_per_token": 22e-6, + "output_cost_per_audio_token": 80e-6, + "litellm_provider": "azure", + "mode": "chat", + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/eu/gpt-4o-realtime-preview-2024-12-17": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 5.5e-6, + "input_cost_per_audio_token": 44e-6, + "cache_read_input_token_cost": 2.75e-6, + "cache_read_input_audio_token_cost": 2.5e-6, + "output_cost_per_token": 22e-6, + "output_cost_per_audio_token": 80e-6, + "litellm_provider": "azure", + "mode": "chat", + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, "azure/gpt-4o-realtime-preview-2024-10-01": { "max_tokens": 4096, "max_input_tokens": 128000, diff --git a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio.py b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio.py deleted file mode 100644 index 18d965200c..0000000000 --- a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio.py +++ /dev/null @@ -1,68 +0,0 @@ -import json -import os -import sys -from unittest.mock import AsyncMock, MagicMock, patch - -import httpx -import pytest - -sys.path.insert( - 0, os.path.abspath("../../../../..") -) # Adds the parent directory to the system path - -from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( - VertexGeminiConfig, -) - - -def test_top_logprobs(): - non_default_params = { - "top_logprobs": 2, - "logprobs": True, - } - optional_params = {} - model = "gemini" - - v = VertexGeminiConfig().map_openai_params( - non_default_params=non_default_params, - optional_params=optional_params, - model=model, - drop_params=False, - ) - assert v["responseLogprobs"] is non_default_params["logprobs"] - assert v["logprobs"] is non_default_params["top_logprobs"] - - -def test_get_model_for_vertex_ai_url(): - # Test case 1: Regular model name - model = "gemini-pro" - result = VertexGeminiConfig.get_model_for_vertex_ai_url(model) - assert result == "gemini-pro" - - # Test case 2: Gemini spec model with UUID - model = "gemini/ft-uuid-123" - result = VertexGeminiConfig.get_model_for_vertex_ai_url(model) - assert result == "ft-uuid-123" - - -def test_is_model_gemini_spec_model(): - # Test case 1: None input - assert VertexGeminiConfig._is_model_gemini_spec_model(None) == False - - # Test case 2: Regular model name - assert VertexGeminiConfig._is_model_gemini_spec_model("gemini-pro") == False - - # Test case 3: Gemini spec model - assert VertexGeminiConfig._is_model_gemini_spec_model("gemini/custom-model") == True - - -def test_get_model_name_from_gemini_spec_model(): - # Test case 1: Regular model name - model = "gemini-pro" - result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model) - assert result == "gemini-pro" - - # Test case 2: Gemini spec model - model = "gemini/ft-uuid-123" - result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model) - assert result == "ft-uuid-123" diff --git a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py index 7ef34b095e..d2169c299e 100644 --- a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py +++ b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py @@ -1,66 +1,64 @@ -import pytest import asyncio from unittest.mock import MagicMock -from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexGeminiConfig + +import pytest + import litellm from litellm import ModelResponse +from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( + VertexGeminiConfig, +) +from litellm.types.utils import ChoiceLogprobs -@pytest.mark.asyncio -async def test_transform_response_with_avglogprobs(): - """ - Test that the transform_response method correctly handles the avgLogprobs key - from Gemini Flash 2.0 responses. - """ - # Create a mock response with avgLogprobs - response_json = { - "candidates": [{ - "content": {"parts": [{"text": "Test response"}], "role": "model"}, - "finishReason": "STOP", - "avgLogprobs": -0.3445799010140555 - }], - "usageMetadata": { - "promptTokenCount": 10, - "candidatesTokenCount": 5, - "totalTokenCount": 15 - } + +def test_top_logprobs(): + non_default_params = { + "top_logprobs": 2, + "logprobs": True, } - - # Create a mock HTTP response - mock_response = MagicMock() - mock_response.json.return_value = response_json - - # Create a mock logging object - mock_logging = MagicMock() - - # Create an instance of VertexGeminiConfig - config = VertexGeminiConfig() - - # Create a ModelResponse object - model_response = ModelResponse( - id="test-id", - choices=[], - created=1234567890, - model="gemini-2.0-flash", - usage={ - "prompt_tokens": 10, - "completion_tokens": 5, - "total_tokens": 15 - } + optional_params = {} + model = "gemini" + + v = VertexGeminiConfig().map_openai_params( + non_default_params=non_default_params, + optional_params=optional_params, + model=model, + drop_params=False, ) - - # Call the transform_response method - transformed_response = config.transform_response( - model="gemini-2.0-flash", - raw_response=mock_response, - model_response=model_response, - logging_obj=mock_logging, - request_data={}, - messages=[], - optional_params={}, - litellm_params={}, - encoding=None - ) - - # Assert that the avgLogprobs was correctly added to the model response - assert len(transformed_response.choices) == 1 - assert transformed_response.choices[0].logprobs == -0.3445799010140555 + assert v["responseLogprobs"] is non_default_params["logprobs"] + assert v["logprobs"] is non_default_params["top_logprobs"] + + +def test_get_model_for_vertex_ai_url(): + # Test case 1: Regular model name + model = "gemini-pro" + result = VertexGeminiConfig.get_model_for_vertex_ai_url(model) + assert result == "gemini-pro" + + # Test case 2: Gemini spec model with UUID + model = "gemini/ft-uuid-123" + result = VertexGeminiConfig.get_model_for_vertex_ai_url(model) + assert result == "ft-uuid-123" + + +def test_is_model_gemini_spec_model(): + # Test case 1: None input + assert VertexGeminiConfig._is_model_gemini_spec_model(None) == False + + # Test case 2: Regular model name + assert VertexGeminiConfig._is_model_gemini_spec_model("gemini-pro") == False + + # Test case 3: Gemini spec model + assert VertexGeminiConfig._is_model_gemini_spec_model("gemini/custom-model") == True + + +def test_get_model_name_from_gemini_spec_model(): + # Test case 1: Regular model name + model = "gemini-pro" + result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model) + assert result == "gemini-pro" + + # Test case 2: Gemini spec model + model = "gemini/ft-uuid-123" + result = VertexGeminiConfig._get_model_name_from_gemini_spec_model(model) + assert result == "ft-uuid-123" diff --git a/tests/litellm/test_cost_calculator.py b/tests/litellm/test_cost_calculator.py index 4621dd722a..870906845a 100644 --- a/tests/litellm/test_cost_calculator.py +++ b/tests/litellm/test_cost_calculator.py @@ -211,3 +211,30 @@ def test_custom_pricing_with_router_model_id(): assert model_info["output_cost_per_token"] == 0.00003 assert model_info["cache_creation_input_token_cost"] == 0.0000075 assert model_info["cache_read_input_token_cost"] == 0.0000006 + + +def test_azure_realtime_cost_calculator(): + from litellm import get_model_info + + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + cost = handle_realtime_stream_cost_calculation( + results=[ + { + "type": "session.created", + "session": {"model": "gpt-4o-realtime-preview-2024-12-17"}, + }, + ], + combined_usage_object=Usage( + prompt_tokens=100, + completion_tokens=100, + prompt_tokens_details=PromptTokensDetailsWrapper( + text_tokens=10, audio_tokens=90 + ), + ), + custom_llm_provider="azure", + litellm_model_name="my-custom-azure-deployment", + ) + + assert cost > 0 diff --git a/tests/llm_translation/test_gemini.py b/tests/llm_translation/test_gemini.py index 2763f451f6..9e6105e39a 100644 --- a/tests/llm_translation/test_gemini.py +++ b/tests/llm_translation/test_gemini.py @@ -82,4 +82,6 @@ def test_gemini_image_generation(): messages=[{"role": "user", "content": "Generate an image of a cat"}], modalities=["image", "text"], ) - assert response.choices[0].message.content is not None \ No newline at end of file + assert response.choices[0].message.content is not None + + diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py index eae1b7ef65..15e80e831f 100644 --- a/tests/local_testing/test_get_model_info.py +++ b/tests/local_testing/test_get_model_info.py @@ -425,6 +425,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "cache_creation_input_audio_token_cost": {"type": "number"}, "cache_creation_input_token_cost": {"type": "number"}, "cache_read_input_token_cost": {"type": "number"}, + "cache_read_input_audio_token_cost": {"type": "number"}, "deprecation_date": {"type": "string"}, "input_cost_per_audio_per_second": {"type": "number"}, "input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"}, @@ -549,7 +550,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "type": "array", "items": { "type": "string", - "enum": ["text", "image"], + "enum": ["text", "image", "audio"], }, }, "supports_native_streaming": {"type": "boolean"},