diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 04050abf7b..d6e47b87ce 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -1560,10 +1560,18 @@ class PrometheusLogger(CustomLogger): - Max Budget - Budget Reset At """ - self.litellm_remaining_team_budget_metric.labels( - team.team_id, - team.team_alias or "", - ).set( + enum_values = UserAPIKeyLabelValues( + team=team.team_id, + team_alias=team.team_alias or "", + ) + + _labels = prometheus_label_factory( + supported_enum_labels=PrometheusMetricLabels.get_labels( + label_name="litellm_remaining_team_budget_metric" + ), + enum_values=enum_values, + ) + self.litellm_remaining_team_budget_metric.labels(**_labels).set( self._safe_get_remaining_budget( max_budget=team.max_budget, spend=team.spend, @@ -1571,16 +1579,22 @@ class PrometheusLogger(CustomLogger): ) if team.max_budget is not None: - self.litellm_team_max_budget_metric.labels( - team.team_id, - team.team_alias or "", - ).set(team.max_budget) + _labels = prometheus_label_factory( + supported_enum_labels=PrometheusMetricLabels.get_labels( + label_name="litellm_team_max_budget_metric" + ), + enum_values=enum_values, + ) + self.litellm_team_max_budget_metric.labels(**_labels).set(team.max_budget) if team.budget_reset_at is not None: - self.litellm_team_budget_remaining_hours_metric.labels( - team.team_id, - team.team_alias or "", - ).set( + _labels = prometheus_label_factory( + supported_enum_labels=PrometheusMetricLabels.get_labels( + label_name="litellm_team_budget_remaining_hours_metric" + ), + enum_values=enum_values, + ) + self.litellm_team_budget_remaining_hours_metric.labels(**_labels).set( self._get_remaining_hours_for_budget_reset( budget_reset_at=team.budget_reset_at ) diff --git a/litellm/llms/openai/chat/gpt_transformation.py b/litellm/llms/openai/chat/gpt_transformation.py index 9c1f177fc1..13b591e715 100644 --- a/litellm/llms/openai/chat/gpt_transformation.py +++ b/litellm/llms/openai/chat/gpt_transformation.py @@ -20,7 +20,7 @@ from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator from litellm.llms.base_llm.base_utils import BaseLLMModelInfo from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException from litellm.secret_managers.main import get_secret_str -from litellm.types.llms.openai import AllMessageValues +from litellm.types.llms.openai import AllMessageValues, ChatCompletionImageObject from litellm.types.utils import ModelResponse, ModelResponseStream from litellm.utils import convert_to_model_response_object @@ -178,6 +178,17 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig): def _transform_messages( self, messages: List[AllMessageValues], model: str ) -> List[AllMessageValues]: + """OpenAI no longer supports image_url as a string, so we need to convert it to a dict""" + for message in messages: + message_content = message.get("content") + if message_content and isinstance(message_content, list): + for content_item in message_content: + if content_item.get("type") == "image_url": + content_item = cast(ChatCompletionImageObject, content_item) + if isinstance(content_item["image_url"], str): + content_item["image_url"] = { + "url": content_item["image_url"], + } return messages def transform_request( diff --git a/litellm/llms/openai/chat/o_series_transformation.py b/litellm/llms/openai/chat/o_series_transformation.py index b74c7440b5..b2ffda6e7d 100644 --- a/litellm/llms/openai/chat/o_series_transformation.py +++ b/litellm/llms/openai/chat/o_series_transformation.py @@ -152,4 +152,5 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig): ) messages[i] = new_message # Replace the old message with the new one + messages = super()._transform_messages(messages, model) return messages diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 42ebef110e..bd5738ca7b 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -6499,6 +6499,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { @@ -6512,6 +6513,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "anthropic.claude-3-7-sonnet-20250219-v1:0": { @@ -6539,6 +6541,7 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "supports_pdf_input": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_response_schema": true, @@ -6555,6 +6558,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "anthropic.claude-3-5-haiku-20241022-v1:0": { @@ -6566,6 +6570,7 @@ "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_function_calling": true, "supports_response_schema": true, "supports_prompt_caching": true, @@ -6595,6 +6600,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { @@ -6608,6 +6614,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { @@ -6620,6 +6627,7 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "supports_pdf_input": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_response_schema": true, @@ -6651,6 +6659,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "us.anthropic.claude-3-5-haiku-20241022-v1:0": { @@ -6662,6 +6671,7 @@ "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_function_calling": true, "supports_prompt_caching": true, "supports_response_schema": true, @@ -6691,6 +6701,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { @@ -6704,6 +6715,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { @@ -6716,6 +6728,7 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "supports_pdf_input": true, "supports_assistant_prefill": true, "supports_prompt_caching": true, "supports_response_schema": true, @@ -6732,6 +6745,7 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_vision": true, + "supports_pdf_input": true, "supports_tool_choice": true }, "eu.anthropic.claude-3-5-haiku-20241022-v1:0": { @@ -6744,6 +6758,7 @@ "mode": "chat", "supports_function_calling": true, "supports_assistant_prefill": true, + "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, "supports_tool_choice": true diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index 11decf6639..0000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index bf0d780c4d..c3be5aa1d9 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -23,6 +23,7 @@ model_list: - model_name: bedrock-nova litellm_params: model: bedrock/us.amazon.nova-pro-v1:0 + - model_name: gpt-4o + litellm_params: + model: openai/gpt-4o -litellm_settings: - callbacks: ["langfuse"] \ No newline at end of file diff --git a/litellm/router.py b/litellm/router.py index 84946c6b43..4e93baf328 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -629,37 +629,6 @@ class Router: self.aget_messages = self.factory_function(litellm.aget_messages) self.arun_thread = self.factory_function(litellm.arun_thread) - def validate_fallbacks(self, fallback_param: Optional[List]): - """ - Validate the fallbacks parameter. - """ - if fallback_param is None: - return - for fallback_dict in fallback_param: - if not isinstance(fallback_dict, dict): - raise ValueError(f"Item '{fallback_dict}' is not a dictionary.") - if len(fallback_dict) != 1: - raise ValueError( - f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys." - ) - - def add_optional_pre_call_checks( - self, optional_pre_call_checks: Optional[OptionalPreCallChecks] - ): - if optional_pre_call_checks is not None: - for pre_call_check in optional_pre_call_checks: - _callback: Optional[CustomLogger] = None - if pre_call_check == "prompt_caching": - _callback = PromptCachingDeploymentCheck(cache=self.cache) - elif pre_call_check == "router_budget_limiting": - _callback = RouterBudgetLimiting( - dual_cache=self.cache, - provider_budget_config=self.provider_budget_config, - model_list=self.model_list, - ) - if _callback is not None: - litellm.logging_callback_manager.add_litellm_callback(_callback) - def routing_strategy_init( self, routing_strategy: Union[RoutingStrategy, str], routing_strategy_args: dict ): @@ -725,6 +694,37 @@ class Router: else: pass + def validate_fallbacks(self, fallback_param: Optional[List]): + """ + Validate the fallbacks parameter. + """ + if fallback_param is None: + return + for fallback_dict in fallback_param: + if not isinstance(fallback_dict, dict): + raise ValueError(f"Item '{fallback_dict}' is not a dictionary.") + if len(fallback_dict) != 1: + raise ValueError( + f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys." + ) + + def add_optional_pre_call_checks( + self, optional_pre_call_checks: Optional[OptionalPreCallChecks] + ): + if optional_pre_call_checks is not None: + for pre_call_check in optional_pre_call_checks: + _callback: Optional[CustomLogger] = None + if pre_call_check == "prompt_caching": + _callback = PromptCachingDeploymentCheck(cache=self.cache) + elif pre_call_check == "router_budget_limiting": + _callback = RouterBudgetLimiting( + dual_cache=self.cache, + provider_budget_config=self.provider_budget_config, + model_list=self.model_list, + ) + if _callback is not None: + litellm.logging_callback_manager.add_litellm_callback(_callback) + def print_deployment(self, deployment: dict): """ returns a copy of the deployment with the api key masked diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py index 49510d2894..f91ef0eae9 100644 --- a/tests/llm_translation/base_llm_unit_tests.py +++ b/tests/llm_translation/base_llm_unit_tests.py @@ -254,7 +254,6 @@ class BaseLLMChatTest(ABC): # relevant issue: https://github.com/BerriAI/litellm/issues/6741 assert response.choices[0].message.content is not None - @pytest.mark.parametrize( "response_format", [ @@ -337,7 +336,6 @@ class BaseLLMChatTest(ABC): print(f"translated_params={translated_params}") - @pytest.mark.flaky(retries=6, delay=1) def test_json_response_pydantic_obj(self): litellm.set_verbose = True @@ -613,6 +611,46 @@ class BaseLLMChatTest(ABC): assert response is not None + def test_image_url_string(self): + litellm.set_verbose = True + from litellm.utils import supports_vision + + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + + base_completion_call_args = self.get_base_completion_call_args() + if not supports_vision(base_completion_call_args["model"], None): + pytest.skip("Model does not support image input") + elif "http://" in image_url and "fireworks_ai" in base_completion_call_args.get( + "model" + ): + pytest.skip("Model does not support http:// input") + + image_url_param = image_url + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": image_url_param, + }, + ], + } + ] + + try: + response = self.completion_function( + **base_completion_call_args, messages=messages + ) + except litellm.InternalServerError: + pytest.skip("Model is overloaded") + + assert response is not None + @pytest.mark.flaky(retries=4, delay=1) def test_prompt_caching(self): litellm.set_verbose = True diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py index 641d121fa9..6bc5b42c45 100644 --- a/tests/logging_callback_tests/test_prometheus_unit_tests.py +++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py @@ -28,7 +28,7 @@ from litellm.types.utils import ( ) import pytest from unittest.mock import MagicMock, patch, call -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from litellm.integrations.prometheus import PrometheusLogger from litellm.proxy._types import UserAPIKeyAuth @@ -302,7 +302,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger): # Test remaining budget metrics prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with( - "team1", "team_alias1" + team="team1", team_alias="team_alias1" ) prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with( 40 # 100 - (50 + 10) @@ -317,7 +317,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger): # Test max budget metrics prometheus_logger.litellm_team_max_budget_metric.labels.assert_called_once_with( - "team1", "team_alias1" + team="team1", team_alias="team_alias1" ) prometheus_logger.litellm_team_max_budget_metric.labels().set.assert_called_once_with( 100 @@ -332,7 +332,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger): # Test remaining hours metrics prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.assert_called_once_with( - "team1", "team_alias1" + team="team1", team_alias="team_alias1" ) # The remaining hours should be approximately 10 (with some small difference due to test execution time) remaining_hours_call = prometheus_logger.litellm_team_budget_remaining_hours_metric.labels().set.call_args[ @@ -1159,9 +1159,9 @@ async def test_initialize_remaining_budget_metrics(prometheus_logger): # Verify the labels were called with correct team information label_calls = [ - call.labels("team1", "alias1"), - call.labels("team2", "alias2"), - call.labels("team3", ""), + call.labels(team="team1", team_alias="alias1"), + call.labels(team="team2", team_alias="alias2"), + call.labels(team="team3", team_alias=""), ] prometheus_logger.litellm_team_budget_remaining_hours_metric.assert_has_calls( label_calls, any_order=True @@ -1334,3 +1334,169 @@ async def test_initialize_api_key_budget_metrics(prometheus_logger): prometheus_logger.litellm_api_key_max_budget_metric.assert_has_calls( expected_max_budget_calls, any_order=True ) + + +def test_set_team_budget_metrics_multiple_teams(prometheus_logger): + """ + Test that _set_team_budget_metrics correctly handles multiple teams with different budgets and reset times + """ + # Create test teams with different budgets and reset times + teams = [ + MagicMock( + team_id="team1", + team_alias="alias1", + spend=50.0, + max_budget=100.0, + budget_reset_at=datetime(2024, 12, 31, tzinfo=timezone.utc), + ), + MagicMock( + team_id="team2", + team_alias="alias2", + spend=75.0, + max_budget=150.0, + budget_reset_at=datetime(2024, 6, 30, tzinfo=timezone.utc), + ), + MagicMock( + team_id="team3", + team_alias="alias3", + spend=25.0, + max_budget=200.0, + budget_reset_at=datetime(2024, 3, 31, tzinfo=timezone.utc), + ), + ] + + # Mock the metrics + prometheus_logger.litellm_remaining_team_budget_metric = MagicMock() + prometheus_logger.litellm_team_max_budget_metric = MagicMock() + prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock() + + # Set metrics for each team + for team in teams: + prometheus_logger._set_team_budget_metrics(team) + + # Verify remaining budget metric calls + expected_remaining_budget_calls = [ + call.labels(team="team1", team_alias="alias1").set(50.0), # 100 - 50 + call.labels(team="team2", team_alias="alias2").set(75.0), # 150 - 75 + call.labels(team="team3", team_alias="alias3").set(175.0), # 200 - 25 + ] + prometheus_logger.litellm_remaining_team_budget_metric.assert_has_calls( + expected_remaining_budget_calls, any_order=True + ) + + # Verify max budget metric calls + expected_max_budget_calls = [ + call.labels("team1", "alias1").set(100.0), + call.labels("team2", "alias2").set(150.0), + call.labels("team3", "alias3").set(200.0), + ] + prometheus_logger.litellm_team_max_budget_metric.assert_has_calls( + expected_max_budget_calls, any_order=True + ) + + # Verify budget reset metric calls + # Note: The exact hours will depend on the current time, so we'll just verify the structure + assert ( + prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.call_count + == 3 + ) + assert ( + prometheus_logger.litellm_team_budget_remaining_hours_metric.labels().set.call_count + == 3 + ) + + +def test_set_team_budget_metrics_null_values(prometheus_logger): + """ + Test that _set_team_budget_metrics correctly handles null/None values + """ + # Create test team with null values + team = MagicMock( + team_id="team_null", + team_alias=None, # Test null alias + spend=None, # Test null spend + max_budget=None, # Test null max_budget + budget_reset_at=None, # Test null reset time + ) + + # Mock the metrics + prometheus_logger.litellm_remaining_team_budget_metric = MagicMock() + prometheus_logger.litellm_team_max_budget_metric = MagicMock() + prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock() + + # Set metrics for the team + prometheus_logger._set_team_budget_metrics(team) + + # Verify remaining budget metric is set to infinity when max_budget is None + prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with( + team="team_null", team_alias="" + ) + prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with( + float("inf") + ) + + # Verify max budget metric is not set when max_budget is None + prometheus_logger.litellm_team_max_budget_metric.assert_not_called() + + # Verify reset metric is not set when budget_reset_at is None + prometheus_logger.litellm_team_budget_remaining_hours_metric.assert_not_called() + + +def test_set_team_budget_metrics_with_custom_labels(prometheus_logger, monkeypatch): + """ + Test that _set_team_budget_metrics correctly handles custom prometheus labels + """ + # Set custom prometheus labels + custom_labels = ["metadata.organization", "metadata.environment"] + monkeypatch.setattr("litellm.custom_prometheus_metadata_labels", custom_labels) + + # Create test team with custom metadata + team = MagicMock( + team_id="team1", + team_alias="alias1", + spend=50.0, + max_budget=100.0, + budget_reset_at=datetime(2024, 12, 31, tzinfo=timezone.utc), + ) + + # Mock the metrics + prometheus_logger.litellm_remaining_team_budget_metric = MagicMock() + prometheus_logger.litellm_team_max_budget_metric = MagicMock() + prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock() + + # Set metrics for the team + prometheus_logger._set_team_budget_metrics(team) + + # Verify remaining budget metric includes custom labels + prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with( + team="team1", + team_alias="alias1", + metadata_organization=None, + metadata_environment=None, + ) + prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with( + 50.0 + ) # 100 - 50 + + # Verify max budget metric includes custom labels + prometheus_logger.litellm_team_max_budget_metric.labels.assert_called_once_with( + team="team1", + team_alias="alias1", + metadata_organization=None, + metadata_environment=None, + ) + prometheus_logger.litellm_team_max_budget_metric.labels().set.assert_called_once_with( + 100.0 + ) + + # Verify budget reset metric includes custom labels + budget_reset_calls = ( + prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.call_args_list + ) + assert len(budget_reset_calls) == 1 + assert budget_reset_calls[0][1] == { + "team": "team1", + "team_alias": "alias1", + "metadata_organization": None, + "metadata_environment": None, + }