diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index 04050abf7b..d6e47b87ce 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -1560,10 +1560,18 @@ class PrometheusLogger(CustomLogger):
- Max Budget
- Budget Reset At
"""
- self.litellm_remaining_team_budget_metric.labels(
- team.team_id,
- team.team_alias or "",
- ).set(
+ enum_values = UserAPIKeyLabelValues(
+ team=team.team_id,
+ team_alias=team.team_alias or "",
+ )
+
+ _labels = prometheus_label_factory(
+ supported_enum_labels=PrometheusMetricLabels.get_labels(
+ label_name="litellm_remaining_team_budget_metric"
+ ),
+ enum_values=enum_values,
+ )
+ self.litellm_remaining_team_budget_metric.labels(**_labels).set(
self._safe_get_remaining_budget(
max_budget=team.max_budget,
spend=team.spend,
@@ -1571,16 +1579,22 @@ class PrometheusLogger(CustomLogger):
)
if team.max_budget is not None:
- self.litellm_team_max_budget_metric.labels(
- team.team_id,
- team.team_alias or "",
- ).set(team.max_budget)
+ _labels = prometheus_label_factory(
+ supported_enum_labels=PrometheusMetricLabels.get_labels(
+ label_name="litellm_team_max_budget_metric"
+ ),
+ enum_values=enum_values,
+ )
+ self.litellm_team_max_budget_metric.labels(**_labels).set(team.max_budget)
if team.budget_reset_at is not None:
- self.litellm_team_budget_remaining_hours_metric.labels(
- team.team_id,
- team.team_alias or "",
- ).set(
+ _labels = prometheus_label_factory(
+ supported_enum_labels=PrometheusMetricLabels.get_labels(
+ label_name="litellm_team_budget_remaining_hours_metric"
+ ),
+ enum_values=enum_values,
+ )
+ self.litellm_team_budget_remaining_hours_metric.labels(**_labels).set(
self._get_remaining_hours_for_budget_reset(
budget_reset_at=team.budget_reset_at
)
diff --git a/litellm/llms/openai/chat/gpt_transformation.py b/litellm/llms/openai/chat/gpt_transformation.py
index 9c1f177fc1..13b591e715 100644
--- a/litellm/llms/openai/chat/gpt_transformation.py
+++ b/litellm/llms/openai/chat/gpt_transformation.py
@@ -20,7 +20,7 @@ from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import AllMessageValues
+from litellm.types.llms.openai import AllMessageValues, ChatCompletionImageObject
from litellm.types.utils import ModelResponse, ModelResponseStream
from litellm.utils import convert_to_model_response_object
@@ -178,6 +178,17 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
def _transform_messages(
self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]:
+ """OpenAI no longer supports image_url as a string, so we need to convert it to a dict"""
+ for message in messages:
+ message_content = message.get("content")
+ if message_content and isinstance(message_content, list):
+ for content_item in message_content:
+ if content_item.get("type") == "image_url":
+ content_item = cast(ChatCompletionImageObject, content_item)
+ if isinstance(content_item["image_url"], str):
+ content_item["image_url"] = {
+ "url": content_item["image_url"],
+ }
return messages
def transform_request(
diff --git a/litellm/llms/openai/chat/o_series_transformation.py b/litellm/llms/openai/chat/o_series_transformation.py
index b74c7440b5..b2ffda6e7d 100644
--- a/litellm/llms/openai/chat/o_series_transformation.py
+++ b/litellm/llms/openai/chat/o_series_transformation.py
@@ -152,4 +152,5 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig):
)
messages[i] = new_message # Replace the old message with the new one
+ messages = super()._transform_messages(messages, model)
return messages
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 42ebef110e..bd5738ca7b 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -6499,6 +6499,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_tool_choice": true
},
"anthropic.claude-3-5-sonnet-20240620-v1:0": {
@@ -6512,6 +6513,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_tool_choice": true
},
"anthropic.claude-3-7-sonnet-20250219-v1:0": {
@@ -6539,6 +6541,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
@@ -6555,6 +6558,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_tool_choice": true
},
"anthropic.claude-3-5-haiku-20241022-v1:0": {
@@ -6566,6 +6570,7 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_assistant_prefill": true,
+ "supports_pdf_input": true,
"supports_function_calling": true,
"supports_response_schema": true,
"supports_prompt_caching": true,
@@ -6595,6 +6600,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_tool_choice": true
},
"us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
@@ -6608,6 +6614,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_tool_choice": true
},
"us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
@@ -6620,6 +6627,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
@@ -6651,6 +6659,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_tool_choice": true
},
"us.anthropic.claude-3-5-haiku-20241022-v1:0": {
@@ -6662,6 +6671,7 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_assistant_prefill": true,
+ "supports_pdf_input": true,
"supports_function_calling": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
@@ -6691,6 +6701,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_tool_choice": true
},
"eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
@@ -6704,6 +6715,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_tool_choice": true
},
"eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
@@ -6716,6 +6728,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
@@ -6732,6 +6745,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
+ "supports_pdf_input": true,
"supports_tool_choice": true
},
"eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
@@ -6744,6 +6758,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_assistant_prefill": true,
+ "supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_tool_choice": true
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
deleted file mode 100644
index 11decf6639..0000000000
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ /dev/null
@@ -1 +0,0 @@
-
LiteLLM Dashboard
\ No newline at end of file
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index bf0d780c4d..c3be5aa1d9 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -23,6 +23,7 @@ model_list:
- model_name: bedrock-nova
litellm_params:
model: bedrock/us.amazon.nova-pro-v1:0
+ - model_name: gpt-4o
+ litellm_params:
+ model: openai/gpt-4o
-litellm_settings:
- callbacks: ["langfuse"]
\ No newline at end of file
diff --git a/litellm/router.py b/litellm/router.py
index 84946c6b43..4e93baf328 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -629,37 +629,6 @@ class Router:
self.aget_messages = self.factory_function(litellm.aget_messages)
self.arun_thread = self.factory_function(litellm.arun_thread)
- def validate_fallbacks(self, fallback_param: Optional[List]):
- """
- Validate the fallbacks parameter.
- """
- if fallback_param is None:
- return
- for fallback_dict in fallback_param:
- if not isinstance(fallback_dict, dict):
- raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
- if len(fallback_dict) != 1:
- raise ValueError(
- f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys."
- )
-
- def add_optional_pre_call_checks(
- self, optional_pre_call_checks: Optional[OptionalPreCallChecks]
- ):
- if optional_pre_call_checks is not None:
- for pre_call_check in optional_pre_call_checks:
- _callback: Optional[CustomLogger] = None
- if pre_call_check == "prompt_caching":
- _callback = PromptCachingDeploymentCheck(cache=self.cache)
- elif pre_call_check == "router_budget_limiting":
- _callback = RouterBudgetLimiting(
- dual_cache=self.cache,
- provider_budget_config=self.provider_budget_config,
- model_list=self.model_list,
- )
- if _callback is not None:
- litellm.logging_callback_manager.add_litellm_callback(_callback)
-
def routing_strategy_init(
self, routing_strategy: Union[RoutingStrategy, str], routing_strategy_args: dict
):
@@ -725,6 +694,37 @@ class Router:
else:
pass
+ def validate_fallbacks(self, fallback_param: Optional[List]):
+ """
+ Validate the fallbacks parameter.
+ """
+ if fallback_param is None:
+ return
+ for fallback_dict in fallback_param:
+ if not isinstance(fallback_dict, dict):
+ raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
+ if len(fallback_dict) != 1:
+ raise ValueError(
+ f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys."
+ )
+
+ def add_optional_pre_call_checks(
+ self, optional_pre_call_checks: Optional[OptionalPreCallChecks]
+ ):
+ if optional_pre_call_checks is not None:
+ for pre_call_check in optional_pre_call_checks:
+ _callback: Optional[CustomLogger] = None
+ if pre_call_check == "prompt_caching":
+ _callback = PromptCachingDeploymentCheck(cache=self.cache)
+ elif pre_call_check == "router_budget_limiting":
+ _callback = RouterBudgetLimiting(
+ dual_cache=self.cache,
+ provider_budget_config=self.provider_budget_config,
+ model_list=self.model_list,
+ )
+ if _callback is not None:
+ litellm.logging_callback_manager.add_litellm_callback(_callback)
+
def print_deployment(self, deployment: dict):
"""
returns a copy of the deployment with the api key masked
diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py
index 49510d2894..f91ef0eae9 100644
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@@ -254,7 +254,6 @@ class BaseLLMChatTest(ABC):
# relevant issue: https://github.com/BerriAI/litellm/issues/6741
assert response.choices[0].message.content is not None
-
@pytest.mark.parametrize(
"response_format",
[
@@ -337,7 +336,6 @@ class BaseLLMChatTest(ABC):
print(f"translated_params={translated_params}")
-
@pytest.mark.flaky(retries=6, delay=1)
def test_json_response_pydantic_obj(self):
litellm.set_verbose = True
@@ -613,6 +611,46 @@ class BaseLLMChatTest(ABC):
assert response is not None
+ def test_image_url_string(self):
+ litellm.set_verbose = True
+ from litellm.utils import supports_vision
+
+ os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+ litellm.model_cost = litellm.get_model_cost_map(url="")
+
+ image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+
+ base_completion_call_args = self.get_base_completion_call_args()
+ if not supports_vision(base_completion_call_args["model"], None):
+ pytest.skip("Model does not support image input")
+ elif "http://" in image_url and "fireworks_ai" in base_completion_call_args.get(
+ "model"
+ ):
+ pytest.skip("Model does not support http:// input")
+
+ image_url_param = image_url
+ messages = [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "What's in this image?"},
+ {
+ "type": "image_url",
+ "image_url": image_url_param,
+ },
+ ],
+ }
+ ]
+
+ try:
+ response = self.completion_function(
+ **base_completion_call_args, messages=messages
+ )
+ except litellm.InternalServerError:
+ pytest.skip("Model is overloaded")
+
+ assert response is not None
+
@pytest.mark.flaky(retries=4, delay=1)
def test_prompt_caching(self):
litellm.set_verbose = True
diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py
index 641d121fa9..6bc5b42c45 100644
--- a/tests/logging_callback_tests/test_prometheus_unit_tests.py
+++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py
@@ -28,7 +28,7 @@ from litellm.types.utils import (
)
import pytest
from unittest.mock import MagicMock, patch, call
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
from litellm.integrations.prometheus import PrometheusLogger
from litellm.proxy._types import UserAPIKeyAuth
@@ -302,7 +302,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger):
# Test remaining budget metrics
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with(
- "team1", "team_alias1"
+ team="team1", team_alias="team_alias1"
)
prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with(
40 # 100 - (50 + 10)
@@ -317,7 +317,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger):
# Test max budget metrics
prometheus_logger.litellm_team_max_budget_metric.labels.assert_called_once_with(
- "team1", "team_alias1"
+ team="team1", team_alias="team_alias1"
)
prometheus_logger.litellm_team_max_budget_metric.labels().set.assert_called_once_with(
100
@@ -332,7 +332,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger):
# Test remaining hours metrics
prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.assert_called_once_with(
- "team1", "team_alias1"
+ team="team1", team_alias="team_alias1"
)
# The remaining hours should be approximately 10 (with some small difference due to test execution time)
remaining_hours_call = prometheus_logger.litellm_team_budget_remaining_hours_metric.labels().set.call_args[
@@ -1159,9 +1159,9 @@ async def test_initialize_remaining_budget_metrics(prometheus_logger):
# Verify the labels were called with correct team information
label_calls = [
- call.labels("team1", "alias1"),
- call.labels("team2", "alias2"),
- call.labels("team3", ""),
+ call.labels(team="team1", team_alias="alias1"),
+ call.labels(team="team2", team_alias="alias2"),
+ call.labels(team="team3", team_alias=""),
]
prometheus_logger.litellm_team_budget_remaining_hours_metric.assert_has_calls(
label_calls, any_order=True
@@ -1334,3 +1334,169 @@ async def test_initialize_api_key_budget_metrics(prometheus_logger):
prometheus_logger.litellm_api_key_max_budget_metric.assert_has_calls(
expected_max_budget_calls, any_order=True
)
+
+
+def test_set_team_budget_metrics_multiple_teams(prometheus_logger):
+ """
+ Test that _set_team_budget_metrics correctly handles multiple teams with different budgets and reset times
+ """
+ # Create test teams with different budgets and reset times
+ teams = [
+ MagicMock(
+ team_id="team1",
+ team_alias="alias1",
+ spend=50.0,
+ max_budget=100.0,
+ budget_reset_at=datetime(2024, 12, 31, tzinfo=timezone.utc),
+ ),
+ MagicMock(
+ team_id="team2",
+ team_alias="alias2",
+ spend=75.0,
+ max_budget=150.0,
+ budget_reset_at=datetime(2024, 6, 30, tzinfo=timezone.utc),
+ ),
+ MagicMock(
+ team_id="team3",
+ team_alias="alias3",
+ spend=25.0,
+ max_budget=200.0,
+ budget_reset_at=datetime(2024, 3, 31, tzinfo=timezone.utc),
+ ),
+ ]
+
+ # Mock the metrics
+ prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
+ prometheus_logger.litellm_team_max_budget_metric = MagicMock()
+ prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock()
+
+ # Set metrics for each team
+ for team in teams:
+ prometheus_logger._set_team_budget_metrics(team)
+
+ # Verify remaining budget metric calls
+ expected_remaining_budget_calls = [
+ call.labels(team="team1", team_alias="alias1").set(50.0), # 100 - 50
+ call.labels(team="team2", team_alias="alias2").set(75.0), # 150 - 75
+ call.labels(team="team3", team_alias="alias3").set(175.0), # 200 - 25
+ ]
+ prometheus_logger.litellm_remaining_team_budget_metric.assert_has_calls(
+ expected_remaining_budget_calls, any_order=True
+ )
+
+ # Verify max budget metric calls
+ expected_max_budget_calls = [
+ call.labels("team1", "alias1").set(100.0),
+ call.labels("team2", "alias2").set(150.0),
+ call.labels("team3", "alias3").set(200.0),
+ ]
+ prometheus_logger.litellm_team_max_budget_metric.assert_has_calls(
+ expected_max_budget_calls, any_order=True
+ )
+
+ # Verify budget reset metric calls
+ # Note: The exact hours will depend on the current time, so we'll just verify the structure
+ assert (
+ prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.call_count
+ == 3
+ )
+ assert (
+ prometheus_logger.litellm_team_budget_remaining_hours_metric.labels().set.call_count
+ == 3
+ )
+
+
+def test_set_team_budget_metrics_null_values(prometheus_logger):
+ """
+ Test that _set_team_budget_metrics correctly handles null/None values
+ """
+ # Create test team with null values
+ team = MagicMock(
+ team_id="team_null",
+ team_alias=None, # Test null alias
+ spend=None, # Test null spend
+ max_budget=None, # Test null max_budget
+ budget_reset_at=None, # Test null reset time
+ )
+
+ # Mock the metrics
+ prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
+ prometheus_logger.litellm_team_max_budget_metric = MagicMock()
+ prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock()
+
+ # Set metrics for the team
+ prometheus_logger._set_team_budget_metrics(team)
+
+ # Verify remaining budget metric is set to infinity when max_budget is None
+ prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with(
+ team="team_null", team_alias=""
+ )
+ prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with(
+ float("inf")
+ )
+
+ # Verify max budget metric is not set when max_budget is None
+ prometheus_logger.litellm_team_max_budget_metric.assert_not_called()
+
+ # Verify reset metric is not set when budget_reset_at is None
+ prometheus_logger.litellm_team_budget_remaining_hours_metric.assert_not_called()
+
+
+def test_set_team_budget_metrics_with_custom_labels(prometheus_logger, monkeypatch):
+ """
+ Test that _set_team_budget_metrics correctly handles custom prometheus labels
+ """
+ # Set custom prometheus labels
+ custom_labels = ["metadata.organization", "metadata.environment"]
+ monkeypatch.setattr("litellm.custom_prometheus_metadata_labels", custom_labels)
+
+ # Create test team with custom metadata
+ team = MagicMock(
+ team_id="team1",
+ team_alias="alias1",
+ spend=50.0,
+ max_budget=100.0,
+ budget_reset_at=datetime(2024, 12, 31, tzinfo=timezone.utc),
+ )
+
+ # Mock the metrics
+ prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
+ prometheus_logger.litellm_team_max_budget_metric = MagicMock()
+ prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock()
+
+ # Set metrics for the team
+ prometheus_logger._set_team_budget_metrics(team)
+
+ # Verify remaining budget metric includes custom labels
+ prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with(
+ team="team1",
+ team_alias="alias1",
+ metadata_organization=None,
+ metadata_environment=None,
+ )
+ prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with(
+ 50.0
+ ) # 100 - 50
+
+ # Verify max budget metric includes custom labels
+ prometheus_logger.litellm_team_max_budget_metric.labels.assert_called_once_with(
+ team="team1",
+ team_alias="alias1",
+ metadata_organization=None,
+ metadata_environment=None,
+ )
+ prometheus_logger.litellm_team_max_budget_metric.labels().set.assert_called_once_with(
+ 100.0
+ )
+
+ # Verify budget reset metric includes custom labels
+ budget_reset_calls = (
+ prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.call_args_list
+ )
+ assert len(budget_reset_calls) == 1
+ assert budget_reset_calls[0][1] == {
+ "team": "team1",
+ "team_alias": "alias1",
+ "metadata_organization": None,
+ "metadata_environment": None,
+ }