build: merge branch

This commit is contained in:
Krrish Dholakia 2025-03-02 08:31:57 -08:00
parent f63cf00306
commit 4418e6dd14
9 changed files with 301 additions and 56 deletions

View file

@ -1560,10 +1560,18 @@ class PrometheusLogger(CustomLogger):
- Max Budget
- Budget Reset At
"""
self.litellm_remaining_team_budget_metric.labels(
team.team_id,
team.team_alias or "",
).set(
enum_values = UserAPIKeyLabelValues(
team=team.team_id,
team_alias=team.team_alias or "",
)
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_remaining_team_budget_metric"
),
enum_values=enum_values,
)
self.litellm_remaining_team_budget_metric.labels(**_labels).set(
self._safe_get_remaining_budget(
max_budget=team.max_budget,
spend=team.spend,
@ -1571,16 +1579,22 @@ class PrometheusLogger(CustomLogger):
)
if team.max_budget is not None:
self.litellm_team_max_budget_metric.labels(
team.team_id,
team.team_alias or "",
).set(team.max_budget)
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_team_max_budget_metric"
),
enum_values=enum_values,
)
self.litellm_team_max_budget_metric.labels(**_labels).set(team.max_budget)
if team.budget_reset_at is not None:
self.litellm_team_budget_remaining_hours_metric.labels(
team.team_id,
team.team_alias or "",
).set(
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_team_budget_remaining_hours_metric"
),
enum_values=enum_values,
)
self.litellm_team_budget_remaining_hours_metric.labels(**_labels).set(
self._get_remaining_hours_for_budget_reset(
budget_reset_at=team.budget_reset_at
)

View file

@ -20,7 +20,7 @@ from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues
from litellm.types.llms.openai import AllMessageValues, ChatCompletionImageObject
from litellm.types.utils import ModelResponse, ModelResponseStream
from litellm.utils import convert_to_model_response_object
@ -178,6 +178,17 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
def _transform_messages(
self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]:
"""OpenAI no longer supports image_url as a string, so we need to convert it to a dict"""
for message in messages:
message_content = message.get("content")
if message_content and isinstance(message_content, list):
for content_item in message_content:
if content_item.get("type") == "image_url":
content_item = cast(ChatCompletionImageObject, content_item)
if isinstance(content_item["image_url"], str):
content_item["image_url"] = {
"url": content_item["image_url"],
}
return messages
def transform_request(

View file

@ -152,4 +152,5 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig):
)
messages[i] = new_message # Replace the old message with the new one
messages = super()._transform_messages(messages, model)
return messages

View file

@ -6499,6 +6499,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_tool_choice": true
},
"anthropic.claude-3-5-sonnet-20240620-v1:0": {
@ -6512,6 +6513,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_tool_choice": true
},
"anthropic.claude-3-7-sonnet-20250219-v1:0": {
@ -6539,6 +6541,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
@ -6555,6 +6558,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_tool_choice": true
},
"anthropic.claude-3-5-haiku-20241022-v1:0": {
@ -6566,6 +6570,7 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_assistant_prefill": true,
"supports_pdf_input": true,
"supports_function_calling": true,
"supports_response_schema": true,
"supports_prompt_caching": true,
@ -6595,6 +6600,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_tool_choice": true
},
"us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
@ -6608,6 +6614,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_tool_choice": true
},
"us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
@ -6620,6 +6627,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
@ -6651,6 +6659,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_tool_choice": true
},
"us.anthropic.claude-3-5-haiku-20241022-v1:0": {
@ -6662,6 +6671,7 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_assistant_prefill": true,
"supports_pdf_input": true,
"supports_function_calling": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
@ -6691,6 +6701,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_tool_choice": true
},
"eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
@ -6704,6 +6715,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_tool_choice": true
},
"eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
@ -6716,6 +6728,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
@ -6732,6 +6745,7 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_pdf_input": true,
"supports_tool_choice": true
},
"eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
@ -6744,6 +6758,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_assistant_prefill": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_tool_choice": true

File diff suppressed because one or more lines are too long

View file

@ -23,6 +23,7 @@ model_list:
- model_name: bedrock-nova
litellm_params:
model: bedrock/us.amazon.nova-pro-v1:0
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
litellm_settings:
callbacks: ["langfuse"]

View file

@ -629,37 +629,6 @@ class Router:
self.aget_messages = self.factory_function(litellm.aget_messages)
self.arun_thread = self.factory_function(litellm.arun_thread)
def validate_fallbacks(self, fallback_param: Optional[List]):
"""
Validate the fallbacks parameter.
"""
if fallback_param is None:
return
for fallback_dict in fallback_param:
if not isinstance(fallback_dict, dict):
raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
if len(fallback_dict) != 1:
raise ValueError(
f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys."
)
def add_optional_pre_call_checks(
self, optional_pre_call_checks: Optional[OptionalPreCallChecks]
):
if optional_pre_call_checks is not None:
for pre_call_check in optional_pre_call_checks:
_callback: Optional[CustomLogger] = None
if pre_call_check == "prompt_caching":
_callback = PromptCachingDeploymentCheck(cache=self.cache)
elif pre_call_check == "router_budget_limiting":
_callback = RouterBudgetLimiting(
dual_cache=self.cache,
provider_budget_config=self.provider_budget_config,
model_list=self.model_list,
)
if _callback is not None:
litellm.logging_callback_manager.add_litellm_callback(_callback)
def routing_strategy_init(
self, routing_strategy: Union[RoutingStrategy, str], routing_strategy_args: dict
):
@ -725,6 +694,37 @@ class Router:
else:
pass
def validate_fallbacks(self, fallback_param: Optional[List]):
"""
Validate the fallbacks parameter.
"""
if fallback_param is None:
return
for fallback_dict in fallback_param:
if not isinstance(fallback_dict, dict):
raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
if len(fallback_dict) != 1:
raise ValueError(
f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys."
)
def add_optional_pre_call_checks(
self, optional_pre_call_checks: Optional[OptionalPreCallChecks]
):
if optional_pre_call_checks is not None:
for pre_call_check in optional_pre_call_checks:
_callback: Optional[CustomLogger] = None
if pre_call_check == "prompt_caching":
_callback = PromptCachingDeploymentCheck(cache=self.cache)
elif pre_call_check == "router_budget_limiting":
_callback = RouterBudgetLimiting(
dual_cache=self.cache,
provider_budget_config=self.provider_budget_config,
model_list=self.model_list,
)
if _callback is not None:
litellm.logging_callback_manager.add_litellm_callback(_callback)
def print_deployment(self, deployment: dict):
"""
returns a copy of the deployment with the api key masked

View file

@ -254,7 +254,6 @@ class BaseLLMChatTest(ABC):
# relevant issue: https://github.com/BerriAI/litellm/issues/6741
assert response.choices[0].message.content is not None
@pytest.mark.parametrize(
"response_format",
[
@ -337,7 +336,6 @@ class BaseLLMChatTest(ABC):
print(f"translated_params={translated_params}")
@pytest.mark.flaky(retries=6, delay=1)
def test_json_response_pydantic_obj(self):
litellm.set_verbose = True
@ -613,6 +611,46 @@ class BaseLLMChatTest(ABC):
assert response is not None
def test_image_url_string(self):
litellm.set_verbose = True
from litellm.utils import supports_vision
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
base_completion_call_args = self.get_base_completion_call_args()
if not supports_vision(base_completion_call_args["model"], None):
pytest.skip("Model does not support image input")
elif "http://" in image_url and "fireworks_ai" in base_completion_call_args.get(
"model"
):
pytest.skip("Model does not support http:// input")
image_url_param = image_url
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": image_url_param,
},
],
}
]
try:
response = self.completion_function(
**base_completion_call_args, messages=messages
)
except litellm.InternalServerError:
pytest.skip("Model is overloaded")
assert response is not None
@pytest.mark.flaky(retries=4, delay=1)
def test_prompt_caching(self):
litellm.set_verbose = True

View file

@ -28,7 +28,7 @@ from litellm.types.utils import (
)
import pytest
from unittest.mock import MagicMock, patch, call
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from litellm.integrations.prometheus import PrometheusLogger
from litellm.proxy._types import UserAPIKeyAuth
@ -302,7 +302,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger):
# Test remaining budget metrics
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with(
"team1", "team_alias1"
team="team1", team_alias="team_alias1"
)
prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with(
40 # 100 - (50 + 10)
@ -317,7 +317,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger):
# Test max budget metrics
prometheus_logger.litellm_team_max_budget_metric.labels.assert_called_once_with(
"team1", "team_alias1"
team="team1", team_alias="team_alias1"
)
prometheus_logger.litellm_team_max_budget_metric.labels().set.assert_called_once_with(
100
@ -332,7 +332,7 @@ async def test_increment_remaining_budget_metrics(prometheus_logger):
# Test remaining hours metrics
prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.assert_called_once_with(
"team1", "team_alias1"
team="team1", team_alias="team_alias1"
)
# The remaining hours should be approximately 10 (with some small difference due to test execution time)
remaining_hours_call = prometheus_logger.litellm_team_budget_remaining_hours_metric.labels().set.call_args[
@ -1159,9 +1159,9 @@ async def test_initialize_remaining_budget_metrics(prometheus_logger):
# Verify the labels were called with correct team information
label_calls = [
call.labels("team1", "alias1"),
call.labels("team2", "alias2"),
call.labels("team3", ""),
call.labels(team="team1", team_alias="alias1"),
call.labels(team="team2", team_alias="alias2"),
call.labels(team="team3", team_alias=""),
]
prometheus_logger.litellm_team_budget_remaining_hours_metric.assert_has_calls(
label_calls, any_order=True
@ -1334,3 +1334,169 @@ async def test_initialize_api_key_budget_metrics(prometheus_logger):
prometheus_logger.litellm_api_key_max_budget_metric.assert_has_calls(
expected_max_budget_calls, any_order=True
)
def test_set_team_budget_metrics_multiple_teams(prometheus_logger):
"""
Test that _set_team_budget_metrics correctly handles multiple teams with different budgets and reset times
"""
# Create test teams with different budgets and reset times
teams = [
MagicMock(
team_id="team1",
team_alias="alias1",
spend=50.0,
max_budget=100.0,
budget_reset_at=datetime(2024, 12, 31, tzinfo=timezone.utc),
),
MagicMock(
team_id="team2",
team_alias="alias2",
spend=75.0,
max_budget=150.0,
budget_reset_at=datetime(2024, 6, 30, tzinfo=timezone.utc),
),
MagicMock(
team_id="team3",
team_alias="alias3",
spend=25.0,
max_budget=200.0,
budget_reset_at=datetime(2024, 3, 31, tzinfo=timezone.utc),
),
]
# Mock the metrics
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
prometheus_logger.litellm_team_max_budget_metric = MagicMock()
prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock()
# Set metrics for each team
for team in teams:
prometheus_logger._set_team_budget_metrics(team)
# Verify remaining budget metric calls
expected_remaining_budget_calls = [
call.labels(team="team1", team_alias="alias1").set(50.0), # 100 - 50
call.labels(team="team2", team_alias="alias2").set(75.0), # 150 - 75
call.labels(team="team3", team_alias="alias3").set(175.0), # 200 - 25
]
prometheus_logger.litellm_remaining_team_budget_metric.assert_has_calls(
expected_remaining_budget_calls, any_order=True
)
# Verify max budget metric calls
expected_max_budget_calls = [
call.labels("team1", "alias1").set(100.0),
call.labels("team2", "alias2").set(150.0),
call.labels("team3", "alias3").set(200.0),
]
prometheus_logger.litellm_team_max_budget_metric.assert_has_calls(
expected_max_budget_calls, any_order=True
)
# Verify budget reset metric calls
# Note: The exact hours will depend on the current time, so we'll just verify the structure
assert (
prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.call_count
== 3
)
assert (
prometheus_logger.litellm_team_budget_remaining_hours_metric.labels().set.call_count
== 3
)
def test_set_team_budget_metrics_null_values(prometheus_logger):
"""
Test that _set_team_budget_metrics correctly handles null/None values
"""
# Create test team with null values
team = MagicMock(
team_id="team_null",
team_alias=None, # Test null alias
spend=None, # Test null spend
max_budget=None, # Test null max_budget
budget_reset_at=None, # Test null reset time
)
# Mock the metrics
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
prometheus_logger.litellm_team_max_budget_metric = MagicMock()
prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock()
# Set metrics for the team
prometheus_logger._set_team_budget_metrics(team)
# Verify remaining budget metric is set to infinity when max_budget is None
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with(
team="team_null", team_alias=""
)
prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with(
float("inf")
)
# Verify max budget metric is not set when max_budget is None
prometheus_logger.litellm_team_max_budget_metric.assert_not_called()
# Verify reset metric is not set when budget_reset_at is None
prometheus_logger.litellm_team_budget_remaining_hours_metric.assert_not_called()
def test_set_team_budget_metrics_with_custom_labels(prometheus_logger, monkeypatch):
"""
Test that _set_team_budget_metrics correctly handles custom prometheus labels
"""
# Set custom prometheus labels
custom_labels = ["metadata.organization", "metadata.environment"]
monkeypatch.setattr("litellm.custom_prometheus_metadata_labels", custom_labels)
# Create test team with custom metadata
team = MagicMock(
team_id="team1",
team_alias="alias1",
spend=50.0,
max_budget=100.0,
budget_reset_at=datetime(2024, 12, 31, tzinfo=timezone.utc),
)
# Mock the metrics
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
prometheus_logger.litellm_team_max_budget_metric = MagicMock()
prometheus_logger.litellm_team_budget_remaining_hours_metric = MagicMock()
# Set metrics for the team
prometheus_logger._set_team_budget_metrics(team)
# Verify remaining budget metric includes custom labels
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with(
team="team1",
team_alias="alias1",
metadata_organization=None,
metadata_environment=None,
)
prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with(
50.0
) # 100 - 50
# Verify max budget metric includes custom labels
prometheus_logger.litellm_team_max_budget_metric.labels.assert_called_once_with(
team="team1",
team_alias="alias1",
metadata_organization=None,
metadata_environment=None,
)
prometheus_logger.litellm_team_max_budget_metric.labels().set.assert_called_once_with(
100.0
)
# Verify budget reset metric includes custom labels
budget_reset_calls = (
prometheus_logger.litellm_team_budget_remaining_hours_metric.labels.call_args_list
)
assert len(budget_reset_calls) == 1
assert budget_reset_calls[0][1] == {
"team": "team1",
"team_alias": "alias1",
"metadata_organization": None,
"metadata_environment": None,
}