build: merge branch

2025-04-25 10:44:24 +00:00 · 2025-03-02 08:31:57 -08:00 · 2025-03-02 08:31:57 -08:00 · 4418e6dd14
commit 4418e6dd14
parent f63cf00306
9 changed files with 301 additions and 56 deletions
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -1560,10 +1560,18 @@ class PrometheusLogger(CustomLogger):
        - Max Budget
        - Budget Reset At
        """
-        self.litellm_remaining_team_budget_metric.labels(
+        enum_values = UserAPIKeyLabelValues(
-            team.team_id,
+            team=team.team_id,
-            team.team_alias or "",
+            team_alias=team.team_alias or "",
-        ).set(
+        )
        _labels = prometheus_label_factory(
            supported_enum_labels=PrometheusMetricLabels.get_labels(
                label_name="litellm_remaining_team_budget_metric"
            ),
            enum_values=enum_values,
        )
        self.litellm_remaining_team_budget_metric.labels(**_labels).set(
            self._safe_get_remaining_budget(
                max_budget=team.max_budget,
                spend=team.spend,
@ -1571,16 +1579,22 @@ class PrometheusLogger(CustomLogger):
        )
        if team.max_budget is not None:
-            self.litellm_team_max_budget_metric.labels(
+            _labels = prometheus_label_factory(
-                team.team_id,
+                supported_enum_labels=PrometheusMetricLabels.get_labels(
-                team.team_alias or "",
+                    label_name="litellm_team_max_budget_metric"
-            ).set(team.max_budget)
+                ),
                enum_values=enum_values,
            )
            self.litellm_team_max_budget_metric.labels(**_labels).set(team.max_budget)
        if team.budget_reset_at is not None:
-            self.litellm_team_budget_remaining_hours_metric.labels(
+            _labels = prometheus_label_factory(
-                team.team_id,
+                supported_enum_labels=PrometheusMetricLabels.get_labels(
-                team.team_alias or "",
+                    label_name="litellm_team_budget_remaining_hours_metric"
-            ).set(
+                ),
                enum_values=enum_values,
            )
            self.litellm_team_budget_remaining_hours_metric.labels(**_labels).set(
                self._get_remaining_hours_for_budget_reset(
                    budget_reset_at=team.budget_reset_at
                )
--- a/litellm/llms/openai/chat/gpt_transformation.py
+++ b/litellm/llms/openai/chat/gpt_transformation.py
@ -20,7 +20,7 @@ from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
 from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
 from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
 from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import AllMessageValues
+from litellm.types.llms.openai import AllMessageValues, ChatCompletionImageObject
 from litellm.types.utils import ModelResponse, ModelResponseStream
 from litellm.utils import convert_to_model_response_object
@ -178,6 +178,17 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
    def _transform_messages(
        self, messages: List[AllMessageValues], model: str
    ) -> List[AllMessageValues]:
        """OpenAI no longer supports image_url as a string, so we need to convert it to a dict"""
        for message in messages:
            message_content = message.get("content")
            if message_content and isinstance(message_content, list):
                for content_item in message_content:
                    if content_item.get("type") == "image_url":
                        content_item = cast(ChatCompletionImageObject, content_item)
                        if isinstance(content_item["image_url"], str):
                            content_item["image_url"] = {
                                "url": content_item["image_url"],
                            }
        return messages
    def transform_request(
--- a/litellm/llms/openai/chat/o_series_transformation.py
+++ b/litellm/llms/openai/chat/o_series_transformation.py
@ -152,4 +152,5 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig):
                )
                messages[i] = new_message  # Replace the old message with the new one
        messages = super()._transform_messages(messages, model)
        return messages
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -6499,6 +6499,7 @@
        "supports_function_calling": true,
        "supports_response_schema": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_tool_choice": true
    },
    "anthropic.claude-3-5-sonnet-20240620-v1:0": {
@ -6512,6 +6513,7 @@
        "supports_function_calling": true,
        "supports_response_schema": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_tool_choice": true
    },
    "anthropic.claude-3-7-sonnet-20250219-v1:0": {
@ -6539,6 +6541,7 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_assistant_prefill": true,
        "supports_prompt_caching": true, 
        "supports_response_schema": true,
@ -6555,6 +6558,7 @@
        "supports_function_calling": true,
        "supports_response_schema": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_tool_choice": true
    },
    "anthropic.claude-3-5-haiku-20241022-v1:0": {
@ -6566,6 +6570,7 @@
        "litellm_provider": "bedrock",
        "mode": "chat",
        "supports_assistant_prefill": true,
        "supports_pdf_input": true,
        "supports_function_calling": true,
        "supports_response_schema": true,
        "supports_prompt_caching": true,
@ -6595,6 +6600,7 @@
        "supports_function_calling": true,
        "supports_response_schema": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_tool_choice": true
    },
    "us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
@ -6608,6 +6614,7 @@
        "supports_function_calling": true,
        "supports_response_schema": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_tool_choice": true
    },
    "us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
@ -6620,6 +6627,7 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_assistant_prefill": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true,
@ -6651,6 +6659,7 @@
        "supports_function_calling": true,
        "supports_response_schema": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_tool_choice": true
    },
    "us.anthropic.claude-3-5-haiku-20241022-v1:0": {
@ -6662,6 +6671,7 @@
        "litellm_provider": "bedrock",
        "mode": "chat",
        "supports_assistant_prefill": true,
        "supports_pdf_input": true,
        "supports_function_calling": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true,
@ -6691,6 +6701,7 @@
        "supports_function_calling": true,
        "supports_response_schema": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_tool_choice": true
    },
    "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
@ -6704,6 +6715,7 @@
        "supports_function_calling": true,
        "supports_response_schema": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_tool_choice": true
    },
    "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
@ -6716,6 +6728,7 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_assistant_prefill": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true,
@ -6732,6 +6745,7 @@
        "supports_function_calling": true,
        "supports_response_schema": true,
        "supports_vision": true,
        "supports_pdf_input": true,
        "supports_tool_choice": true
    },
    "eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
@ -6744,6 +6758,7 @@
        "mode": "chat",
        "supports_function_calling": true,
        "supports_assistant_prefill": true,
        "supports_pdf_input": true,
        "supports_prompt_caching": true,
        "supports_response_schema": true,
        "supports_tool_choice": true
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -23,6 +23,7 @@ model_list:
  - model_name: bedrock-nova
    litellm_params:
      model: bedrock/us.amazon.nova-pro-v1:0
  - model_name: gpt-4o
    litellm_params:
      model: openai/gpt-4o
 litellm_settings:
  callbacks: ["langfuse"]
--- a/litellm/router.py
+++ b/litellm/router.py
@ -629,37 +629,6 @@ class Router:
        self.aget_messages = self.factory_function(litellm.aget_messages)
        self.arun_thread = self.factory_function(litellm.arun_thread)
    def validate_fallbacks(self, fallback_param: Optional[List]):
        """
        Validate the fallbacks parameter.
        """
        if fallback_param is None:
            return
        for fallback_dict in fallback_param:
            if not isinstance(fallback_dict, dict):
                raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
            if len(fallback_dict) != 1:
                raise ValueError(
                    f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys."
                )
    def add_optional_pre_call_checks(
        self, optional_pre_call_checks: Optional[OptionalPreCallChecks]
    ):
        if optional_pre_call_checks is not None:
            for pre_call_check in optional_pre_call_checks:
                _callback: Optional[CustomLogger] = None
                if pre_call_check == "prompt_caching":
                    _callback = PromptCachingDeploymentCheck(cache=self.cache)
                elif pre_call_check == "router_budget_limiting":
                    _callback = RouterBudgetLimiting(
                        dual_cache=self.cache,
                        provider_budget_config=self.provider_budget_config,
                        model_list=self.model_list,
                    )
                if _callback is not None:
                    litellm.logging_callback_manager.add_litellm_callback(_callback)
    def routing_strategy_init(
        self, routing_strategy: Union[RoutingStrategy, str], routing_strategy_args: dict
    ):
@ -725,6 +694,37 @@ class Router:
        else:
            pass
    def validate_fallbacks(self, fallback_param: Optional[List]):
        """
        Validate the fallbacks parameter.
        """
        if fallback_param is None:
            return
        for fallback_dict in fallback_param:
            if not isinstance(fallback_dict, dict):
                raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
            if len(fallback_dict) != 1:
                raise ValueError(
                    f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys."
                )
    def add_optional_pre_call_checks(
        self, optional_pre_call_checks: Optional[OptionalPreCallChecks]
    ):
        if optional_pre_call_checks is not None:
            for pre_call_check in optional_pre_call_checks:
                _callback: Optional[CustomLogger] = None
                if pre_call_check == "prompt_caching":
                    _callback = PromptCachingDeploymentCheck(cache=self.cache)
                elif pre_call_check == "router_budget_limiting":
                    _callback = RouterBudgetLimiting(
                        dual_cache=self.cache,
                        provider_budget_config=self.provider_budget_config,
                        model_list=self.model_list,
                    )
                if _callback is not None:
                    litellm.logging_callback_manager.add_litellm_callback(_callback)
    def print_deployment(self, deployment: dict):
        """
        returns a copy of the deployment with the api key masked
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@ -254,7 +254,6 @@ class BaseLLMChatTest(ABC):
        # relevant issue: https://github.com/BerriAI/litellm/issues/6741
        assert response.choices[0].message.content is not None
    @pytest.mark.parametrize(
        "response_format",
        [
@ -337,7 +336,6 @@ class BaseLLMChatTest(ABC):
        print(f"translated_params={translated_params}")
    @pytest.mark.flaky(retries=6, delay=1)
    def test_json_response_pydantic_obj(self):
        litellm.set_verbose = True
@ -613,6 +611,46 @@ class BaseLLMChatTest(ABC):
        assert response is not None
    def test_image_url_string(self):
        litellm.set_verbose = True
        from litellm.utils import supports_vision
        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
        litellm.model_cost = litellm.get_model_cost_map(url="")
        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
        base_completion_call_args = self.get_base_completion_call_args()
        if not supports_vision(base_completion_call_args["model"], None):
            pytest.skip("Model does not support image input")
        elif "http://" in image_url and "fireworks_ai" in base_completion_call_args.get(
            "model"
        ):
            pytest.skip("Model does not support http:// input")
        image_url_param = image_url
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What's in this image?"},
                    {
                        "type": "image_url",
                        "image_url": image_url_param,
                    },
                ],
            }
        ]
        try:
            response = self.completion_function(
                **base_completion_call_args, messages=messages
            )
        except litellm.InternalServerError:
            pytest.skip("Model is overloaded")
        assert response is not None
    @pytest.mark.flaky(retries=4, delay=1)
    def test_prompt_caching(self):
        litellm.set_verbose = True
--- a/tests/logging_callback_tests/test_prometheus_unit_tests.py
+++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py
@ -28,7 +28,7 @@ from litellm.types.utils import (
 )
 import pytest
 from unittest.mock import MagicMock, patch, call
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from litellm.integrations.prometheus import PrometheusLogger
 from litellm.proxy._types import UserAPIKeyAuth
@ -302,7 +302,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger):
        # Test remaining budget metrics
        prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with(
-            "team1", "team_alias1"
+            team="team1", team_alias="team_alias1"
        )
        prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with(
            40  # 100 - (50 + 10)
@ -317,7 +317,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger):
        # Test max budget metrics
        prometheus_logger.litellm_team_max_budget_metric.labels.assert_called_once_with(
-            "team1", "team_alias1"
+            team="team1", team_alias="team_alias1"
        )
        prometheus_logger.litellm_team_max_budget_metric.labels().set.assert_called_once_with(
            100
@ -332,7 +332,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger):
        # Test remaining hours metrics
        prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.assert_called_once_with(
-            "team1", "team_alias1"
+            team="team1", team_alias="team_alias1"
        )
        # The remaining hours should be approximately 10 (with some small difference due to test execution time)
        remaining_hours_call = prometheus_logger.litellm_team_budget_remaining_hours_metric.labels().set.call_args[
@ -1159,9 +1159,9 @@ async def test_initialize_remaining_budget_metrics(prometheus_logger):
        # Verify the labels were called with correct team information
        label_calls = [
-            call.labels("team1", "alias1"),
+            call.labels(team="team1", team_alias="alias1"),
-            call.labels("team2", "alias2"),
+            call.labels(team="team2", team_alias="alias2"),
-            call.labels("team3", ""),
+            call.labels(team="team3", team_alias=""),
        ]
        prometheus_logger.litellm_team_budget_remaining_hours_metric.assert_has_calls(
            label_calls, any_order=True
@ -1334,3 +1334,169 @@ async def test_initialize_api_key_budget_metrics(prometheus_logger):
        prometheus_logger.litellm_api_key_max_budget_metric.assert_has_calls(
            expected_max_budget_calls, any_order=True
        )
 def test_set_team_budget_metrics_multiple_teams(prometheus_logger):
    """
    Test that _set_team_budget_metrics correctly handles multiple teams with different budgets and reset times
    """
    # Create test teams with different budgets and reset times
    teams = [
        MagicMock(
            team_id="team1",
            team_alias="alias1",
            spend=50.0,
            max_budget=100.0,
            budget_reset_at=datetime(2024, 12, 31, tzinfo=timezone.utc),
        ),
        MagicMock(
            team_id="team2",
            team_alias="alias2",
            spend=75.0,
            max_budget=150.0,
            budget_reset_at=datetime(2024, 6, 30, tzinfo=timezone.utc),
        ),
        MagicMock(
            team_id="team3",
            team_alias="alias3",
            spend=25.0,
            max_budget=200.0,
            budget_reset_at=datetime(2024, 3, 31, tzinfo=timezone.utc),
        ),
    ]
    # Mock the metrics
    prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
    prometheus_logger.litellm_team_max_budget_metric = MagicMock()
    prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock()
    # Set metrics for each team
    for team in teams:
        prometheus_logger._set_team_budget_metrics(team)
    # Verify remaining budget metric calls
    expected_remaining_budget_calls = [
        call.labels(team="team1", team_alias="alias1").set(50.0),  # 100 - 50
        call.labels(team="team2", team_alias="alias2").set(75.0),  # 150 - 75
        call.labels(team="team3", team_alias="alias3").set(175.0),  # 200 - 25
    ]
    prometheus_logger.litellm_remaining_team_budget_metric.assert_has_calls(
        expected_remaining_budget_calls, any_order=True
    )
    # Verify max budget metric calls
    expected_max_budget_calls = [
        call.labels("team1", "alias1").set(100.0),
        call.labels("team2", "alias2").set(150.0),
        call.labels("team3", "alias3").set(200.0),
    ]
    prometheus_logger.litellm_team_max_budget_metric.assert_has_calls(
        expected_max_budget_calls, any_order=True
    )
    # Verify budget reset metric calls
    # Note: The exact hours will depend on the current time, so we'll just verify the structure
    assert (
        prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.call_count
        == 3
    )
    assert (
        prometheus_logger.litellm_team_budget_remaining_hours_metric.labels().set.call_count
        == 3
    )
 def test_set_team_budget_metrics_null_values(prometheus_logger):
    """
    Test that _set_team_budget_metrics correctly handles null/None values
    """
    # Create test team with null values
    team = MagicMock(
        team_id="team_null",
        team_alias=None,  # Test null alias
        spend=None,  # Test null spend
        max_budget=None,  # Test null max_budget
        budget_reset_at=None,  # Test null reset time
    )
    # Mock the metrics
    prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
    prometheus_logger.litellm_team_max_budget_metric = MagicMock()
    prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock()
    # Set metrics for the team
    prometheus_logger._set_team_budget_metrics(team)
    # Verify remaining budget metric is set to infinity when max_budget is None
    prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with(
        team="team_null", team_alias=""
    )
    prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with(
        float("inf")
    )
    # Verify max budget metric is not set when max_budget is None
    prometheus_logger.litellm_team_max_budget_metric.assert_not_called()
    # Verify reset metric is not set when budget_reset_at is None
    prometheus_logger.litellm_team_budget_remaining_hours_metric.assert_not_called()
 def test_set_team_budget_metrics_with_custom_labels(prometheus_logger, monkeypatch):
    """
    Test that _set_team_budget_metrics correctly handles custom prometheus labels
    """
    # Set custom prometheus labels
    custom_labels = ["metadata.organization", "metadata.environment"]
    monkeypatch.setattr("litellm.custom_prometheus_metadata_labels", custom_labels)
    # Create test team with custom metadata
    team = MagicMock(
        team_id="team1",
        team_alias="alias1",
        spend=50.0,
        max_budget=100.0,
        budget_reset_at=datetime(2024, 12, 31, tzinfo=timezone.utc),
    )
    # Mock the metrics
    prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
    prometheus_logger.litellm_team_max_budget_metric = MagicMock()
    prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock()
    # Set metrics for the team
    prometheus_logger._set_team_budget_metrics(team)
    # Verify remaining budget metric includes custom labels
    prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with(
        team="team1",
        team_alias="alias1",
        metadata_organization=None,
        metadata_environment=None,
    )
    prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with(
        50.0
    )  # 100 - 50
    # Verify max budget metric includes custom labels
    prometheus_logger.litellm_team_max_budget_metric.labels.assert_called_once_with(
        team="team1",
        team_alias="alias1",
        metadata_organization=None,
        metadata_environment=None,
    )
    prometheus_logger.litellm_team_max_budget_metric.labels().set.assert_called_once_with(
        100.0
    )
    # Verify budget reset metric includes custom labels
    budget_reset_calls = (
        prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.call_args_list
    )
    assert len(budget_reset_calls) == 1
    assert budget_reset_calls[0][1] == {
        "team": "team1",
        "team_alias": "alias1",
        "metadata_organization": None,
        "metadata_environment": None,
    }