mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
Provider Budget Routing - Get Budget, Spend Details (#7063)
* add async_get_ttl to dual cache * add ProviderBudgetResponse * add provider_budgets * test_redis_get_ttl * _init_or_get_provider_budget_in_cache * test_init_or_get_provider_budget_in_cache * use _init_provider_budget_in_cache * test_get_current_provider_budget_reset_at * doc Get Budget, Spend Details * doc Provider Budget Routing
This commit is contained in:
parent
aaa4d4178a
commit
87ca62943b
11 changed files with 444 additions and 1 deletions
|
@ -126,6 +126,53 @@ Expected response on failure
|
|||
|
||||
## Monitoring Provider Remaining Budget
|
||||
|
||||
### Get Budget, Spend Details
|
||||
|
||||
Use this endpoint to check current budget, spend and budget reset time for a provider
|
||||
|
||||
Example Request
|
||||
|
||||
```bash
|
||||
curl -X GET http://localhost:4000/provider/budgets \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234"
|
||||
```
|
||||
|
||||
Example Response
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"openai": {
|
||||
"budget_limit": 1e-12,
|
||||
"time_period": "1d",
|
||||
"spend": 0.0,
|
||||
"budget_reset_at": null
|
||||
},
|
||||
"azure": {
|
||||
"budget_limit": 100.0,
|
||||
"time_period": "1d",
|
||||
"spend": 0.0,
|
||||
"budget_reset_at": null
|
||||
},
|
||||
"anthropic": {
|
||||
"budget_limit": 100.0,
|
||||
"time_period": "10d",
|
||||
"spend": 0.0,
|
||||
"budget_reset_at": null
|
||||
},
|
||||
"vertex_ai": {
|
||||
"budget_limit": 100.0,
|
||||
"time_period": "12d",
|
||||
"spend": 0.0,
|
||||
"budget_reset_at": null
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Prometheus Metric
|
||||
|
||||
LiteLLM will emit the following metric on Prometheus to track the remaining budget for each provider
|
||||
|
||||
This metric indicates the remaining budget for a provider in dollars (USD)
|
||||
|
|
|
@ -423,3 +423,12 @@ class DualCache(BaseCache):
|
|||
self.in_memory_cache.delete_cache(key)
|
||||
if self.redis_cache is not None:
|
||||
await self.redis_cache.async_delete_cache(key)
|
||||
|
||||
async def async_get_ttl(self, key: str) -> Optional[int]:
|
||||
"""
|
||||
Get the remaining TTL of a key in in-memory cache or redis
|
||||
"""
|
||||
ttl = await self.in_memory_cache.async_get_ttl(key)
|
||||
if ttl is None and self.redis_cache is not None:
|
||||
ttl = await self.redis_cache.async_get_ttl(key)
|
||||
return ttl
|
||||
|
|
|
@ -145,3 +145,9 @@ class InMemoryCache(BaseCache):
|
|||
def delete_cache(self, key):
|
||||
self.cache_dict.pop(key, None)
|
||||
self.ttl_dict.pop(key, None)
|
||||
|
||||
async def async_get_ttl(self, key: str) -> Optional[int]:
|
||||
"""
|
||||
Get the remaining TTL of a key in in-memory cache
|
||||
"""
|
||||
return self.ttl_dict.get(key, None)
|
||||
|
|
|
@ -980,3 +980,26 @@ class RedisCache(BaseCache):
|
|||
str(e),
|
||||
)
|
||||
raise e
|
||||
|
||||
async def async_get_ttl(self, key: str) -> Optional[int]:
|
||||
"""
|
||||
Get the remaining TTL of a key in Redis
|
||||
|
||||
Args:
|
||||
key (str): The key to get TTL for
|
||||
|
||||
Returns:
|
||||
Optional[int]: The remaining TTL in seconds, or None if key doesn't exist
|
||||
|
||||
Redis ref: https://redis.io/docs/latest/commands/ttl/
|
||||
"""
|
||||
try:
|
||||
_redis_client = await self.init_async_client()
|
||||
async with _redis_client as redis_client:
|
||||
ttl = await redis_client.ttl(key)
|
||||
if ttl <= -1: # -1 means the key does not exist, -2 key does not exist
|
||||
return None
|
||||
return ttl
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Redis TTL Error: {e}")
|
||||
return None
|
||||
|
|
|
@ -2193,3 +2193,25 @@ LiteLLM_ManagementEndpoint_MetadataFields = [
|
|||
"tags",
|
||||
"enforced_params",
|
||||
]
|
||||
|
||||
|
||||
class ProviderBudgetResponseObject(LiteLLMBase):
|
||||
"""
|
||||
Configuration for a single provider's budget settings
|
||||
"""
|
||||
|
||||
budget_limit: float # Budget limit in USD for the time period
|
||||
time_period: str # Time period for budget (e.g., '1d', '30d', '1mo')
|
||||
spend: float = 0.0 # Current spend for this provider
|
||||
budget_reset_at: Optional[str] = None # When the current budget period resets
|
||||
|
||||
|
||||
class ProviderBudgetResponse(LiteLLMBase):
|
||||
"""
|
||||
Complete provider budget configuration and status.
|
||||
Maps provider names to their budget configs.
|
||||
"""
|
||||
|
||||
providers: Dict[str, ProviderBudgetResponseObject] = (
|
||||
{}
|
||||
) # Dictionary mapping provider names to their budget configurations
|
||||
|
|
|
@ -12,3 +12,27 @@ model_list:
|
|||
|
||||
litellm_settings:
|
||||
callbacks: ["datadog"]
|
||||
|
||||
|
||||
router_settings:
|
||||
provider_budget_config:
|
||||
openai:
|
||||
budget_limit: 0.000000000001 # float of $ value budget for time period
|
||||
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
|
||||
azure:
|
||||
budget_limit: 100
|
||||
time_period: 1d
|
||||
anthropic:
|
||||
budget_limit: 100
|
||||
time_period: 10d
|
||||
vertex_ai:
|
||||
budget_limit: 100
|
||||
time_period: 12d
|
||||
gemini:
|
||||
budget_limit: 100
|
||||
time_period: 12d
|
||||
|
||||
# OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
|
||||
redis_host: os.environ/REDIS_HOST
|
||||
redis_port: os.environ/REDIS_PORT
|
||||
redis_password: os.environ/REDIS_PASSWORD
|
|
@ -8,10 +8,12 @@ from fastapi import APIRouter, Depends, Header, HTTPException, Request, status
|
|||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import *
|
||||
from litellm.proxy._types import ProviderBudgetResponse, ProviderBudgetResponseObject
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.spend_tracking.spend_tracking_utils import (
|
||||
get_spend_by_team_and_customer,
|
||||
)
|
||||
from litellm.proxy.utils import handle_exception_on_proxy
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
@ -2464,3 +2466,92 @@ async def global_predict_spend_logs(request: Request):
|
|||
data = await request.json()
|
||||
data = data.get("data")
|
||||
return _forecast_daily_cost(data)
|
||||
|
||||
|
||||
@router.get("/provider/budgets", response_model=ProviderBudgetResponse)
|
||||
async def provider_budgets() -> ProviderBudgetResponse:
|
||||
"""
|
||||
Provider Budget Routing - Get Budget, Spend Details https://docs.litellm.ai/docs/proxy/provider_budget_routing
|
||||
|
||||
Use this endpoint to check current budget, spend and budget reset time for a provider
|
||||
|
||||
Example Request
|
||||
|
||||
```bash
|
||||
curl -X GET http://localhost:4000/provider/budgets \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234"
|
||||
```
|
||||
|
||||
Example Response
|
||||
|
||||
```json
|
||||
{
|
||||
"providers": {
|
||||
"openai": {
|
||||
"budget_limit": 1e-12,
|
||||
"time_period": "1d",
|
||||
"spend": 0.0,
|
||||
"budget_reset_at": null
|
||||
},
|
||||
"azure": {
|
||||
"budget_limit": 100.0,
|
||||
"time_period": "1d",
|
||||
"spend": 0.0,
|
||||
"budget_reset_at": null
|
||||
},
|
||||
"anthropic": {
|
||||
"budget_limit": 100.0,
|
||||
"time_period": "10d",
|
||||
"spend": 0.0,
|
||||
"budget_reset_at": null
|
||||
},
|
||||
"vertex_ai": {
|
||||
"budget_limit": 100.0,
|
||||
"time_period": "12d",
|
||||
"spend": 0.0,
|
||||
"budget_reset_at": null
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
"""
|
||||
from litellm.proxy.proxy_server import llm_router
|
||||
|
||||
try:
|
||||
if llm_router is None:
|
||||
raise HTTPException(
|
||||
status_code=500, detail={"error": "No llm_router found"}
|
||||
)
|
||||
|
||||
provider_budget_config = llm_router.provider_budget_config
|
||||
if provider_budget_config is None:
|
||||
raise ValueError(
|
||||
"No provider budget config found. Please set a provider budget config in the router settings. https://docs.litellm.ai/docs/proxy/provider_budget_routing"
|
||||
)
|
||||
|
||||
provider_budget_response_dict: Dict[str, ProviderBudgetResponseObject] = {}
|
||||
for _provider, _budget_info in provider_budget_config.items():
|
||||
_provider_spend = (
|
||||
await llm_router.provider_budget_logger._get_current_provider_spend(
|
||||
_provider
|
||||
)
|
||||
or 0.0
|
||||
)
|
||||
_provider_budget_ttl = await llm_router.provider_budget_logger._get_current_provider_budget_reset_at(
|
||||
_provider
|
||||
)
|
||||
provider_budget_response_object = ProviderBudgetResponseObject(
|
||||
budget_limit=_budget_info.budget_limit,
|
||||
time_period=_budget_info.time_period,
|
||||
spend=_provider_spend,
|
||||
budget_reset_at=_provider_budget_ttl,
|
||||
)
|
||||
provider_budget_response_dict[_provider] = provider_budget_response_object
|
||||
return ProviderBudgetResponse(providers=provider_budget_response_dict)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.exception(
|
||||
"/provider/budgets: Exception occured - {}".format(str(e))
|
||||
)
|
||||
raise handle_exception_on_proxy(e)
|
||||
|
|
|
@ -19,7 +19,7 @@ anthropic:
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union
|
||||
|
||||
import litellm
|
||||
|
@ -68,6 +68,12 @@ class ProviderBudgetLimiting(CustomLogger):
|
|||
budget_limit=config.get("budget_limit"),
|
||||
time_period=config.get("time_period"),
|
||||
)
|
||||
asyncio.create_task(
|
||||
self._init_provider_budget_in_cache(
|
||||
provider=provider,
|
||||
budget_config=provider_budget_config[provider],
|
||||
)
|
||||
)
|
||||
|
||||
self.provider_budget_config: ProviderBudgetConfigType = provider_budget_config
|
||||
verbose_router_logger.debug(
|
||||
|
@ -450,3 +456,74 @@ class ProviderBudgetLimiting(CustomLogger):
|
|||
spend=spend,
|
||||
budget_limit=budget_limit,
|
||||
)
|
||||
|
||||
async def _get_current_provider_spend(self, provider: str) -> Optional[float]:
|
||||
"""
|
||||
GET the current spend for a provider from cache
|
||||
|
||||
used for GET /provider/budgets endpoint in spend_management_endpoints.py
|
||||
|
||||
Args:
|
||||
provider (str): The provider to get spend for (e.g., "openai", "anthropic")
|
||||
|
||||
Returns:
|
||||
Optional[float]: The current spend for the provider, or None if not found
|
||||
"""
|
||||
budget_config = self._get_budget_config_for_provider(provider)
|
||||
if budget_config is None:
|
||||
return None
|
||||
|
||||
spend_key = f"provider_spend:{provider}:{budget_config.time_period}"
|
||||
|
||||
if self.router_cache.redis_cache:
|
||||
# use Redis as source of truth since that has spend across all instances
|
||||
current_spend = await self.router_cache.redis_cache.async_get_cache(
|
||||
spend_key
|
||||
)
|
||||
else:
|
||||
# use in-memory cache if Redis is not initialized
|
||||
current_spend = await self.router_cache.async_get_cache(spend_key)
|
||||
return float(current_spend) if current_spend is not None else 0.0
|
||||
|
||||
async def _get_current_provider_budget_reset_at(
|
||||
self, provider: str
|
||||
) -> Optional[str]:
|
||||
budget_config = self._get_budget_config_for_provider(provider)
|
||||
if budget_config is None:
|
||||
return None
|
||||
|
||||
spend_key = f"provider_spend:{provider}:{budget_config.time_period}"
|
||||
if self.router_cache.redis_cache:
|
||||
ttl_seconds = await self.router_cache.redis_cache.async_get_ttl(spend_key)
|
||||
else:
|
||||
ttl_seconds = await self.router_cache.async_get_ttl(spend_key)
|
||||
|
||||
if ttl_seconds is None:
|
||||
return None
|
||||
|
||||
return (datetime.now(timezone.utc) + timedelta(seconds=ttl_seconds)).isoformat()
|
||||
|
||||
async def _init_provider_budget_in_cache(
|
||||
self, provider: str, budget_config: ProviderBudgetInfo
|
||||
):
|
||||
"""
|
||||
Initialize provider budget in cache by storing the following keys if they don't exist:
|
||||
- provider_spend:{provider}:{budget_config.time_period} - stores the current spend
|
||||
- provider_budget_start_time:{provider} - stores the start time of the budget window
|
||||
|
||||
"""
|
||||
spend_key = f"provider_spend:{provider}:{budget_config.time_period}"
|
||||
start_time_key = f"provider_budget_start_time:{provider}"
|
||||
ttl_seconds = duration_in_seconds(budget_config.time_period)
|
||||
budget_start = await self.router_cache.async_get_cache(start_time_key)
|
||||
if budget_start is None:
|
||||
budget_start = datetime.now(timezone.utc).timestamp()
|
||||
await self.router_cache.async_set_cache(
|
||||
key=start_time_key, value=budget_start, ttl=ttl_seconds
|
||||
)
|
||||
|
||||
_spend_key = await self.router_cache.async_get_cache(spend_key)
|
||||
if _spend_key is None:
|
||||
await self.router_cache.async_set_cache(
|
||||
key=spend_key, value=0.0, ttl=ttl_seconds
|
||||
)
|
||||
|
|
|
@ -646,3 +646,12 @@ ProviderBudgetConfigType = Dict[str, ProviderBudgetInfo]
|
|||
class RouterCacheEnum(enum.Enum):
|
||||
TPM = "global_router:{id}:{model}:tpm:{current_minute}"
|
||||
RPM = "global_router:{id}:{model}:rpm:{current_minute}"
|
||||
|
||||
|
||||
class ProviderBudgetWindowDetails(BaseModel):
|
||||
"""Details about a provider's budget window"""
|
||||
|
||||
budget_start: float
|
||||
spend_key: str
|
||||
start_time_key: str
|
||||
ttl_seconds: int
|
||||
|
|
|
@ -2478,3 +2478,51 @@ async def test_redis_increment_pipeline():
|
|||
except Exception as e:
|
||||
print(f"Error occurred: {str(e)}")
|
||||
raise e
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_redis_get_ttl():
|
||||
"""
|
||||
Test Redis get TTL functionality
|
||||
|
||||
Redis returns -2 if the key does not exist and -1 if the key exists but has no associated expire.
|
||||
|
||||
test that litellm redis caching wrapper handles -1 and -2 values and returns them as None
|
||||
"""
|
||||
try:
|
||||
from litellm.caching.redis_cache import RedisCache
|
||||
|
||||
redis_cache = RedisCache(
|
||||
host=os.environ["REDIS_HOST"],
|
||||
port=os.environ["REDIS_PORT"],
|
||||
password=os.environ["REDIS_PASSWORD"],
|
||||
)
|
||||
|
||||
# Test case 1: Key does not exist
|
||||
result = await redis_cache.async_get_ttl("nonexistent_key")
|
||||
print("ttl for nonexistent key: ", result)
|
||||
assert result is None, f"Expected None for nonexistent key, got {result}"
|
||||
|
||||
# Test case 2: Key exists with TTL
|
||||
test_key = "test_key_ttl"
|
||||
test_value = "test_value"
|
||||
ttl = 10 # 10 seconds TTL
|
||||
|
||||
# Set a key with TTL
|
||||
_redis_client = await redis_cache.init_async_client()
|
||||
async with _redis_client as redis_client:
|
||||
await redis_client.set(test_key, test_value, ex=ttl)
|
||||
|
||||
# Get TTL and verify it's close to what we set
|
||||
result = await redis_cache.async_get_ttl(test_key)
|
||||
print("ttl for test_key: ", result)
|
||||
assert (
|
||||
result is not None and 0 <= result <= ttl
|
||||
), f"Expected TTL between 0 and {ttl}, got {result}"
|
||||
|
||||
# Clean up
|
||||
await redis_client.delete(test_key)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error occurred: {str(e)}")
|
||||
raise e
|
||||
|
|
|
@ -21,6 +21,7 @@ from litellm.caching.caching import DualCache, RedisCache
|
|||
import logging
|
||||
from litellm._logging import verbose_router_logger
|
||||
import litellm
|
||||
from datetime import timezone, timedelta
|
||||
|
||||
verbose_router_logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
@ -476,3 +477,89 @@ async def test_sync_in_memory_spend_with_redis():
|
|||
|
||||
assert float(openai_spend) == 50.0
|
||||
assert float(anthropic_spend) == 75.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_current_provider_spend():
|
||||
"""
|
||||
Test _get_current_provider_spend helper method
|
||||
|
||||
Scenarios:
|
||||
1. Provider with no budget config returns None
|
||||
2. Provider with budget config but no spend returns 0.0
|
||||
3. Provider with budget config and spend returns correct value
|
||||
"""
|
||||
cleanup_redis()
|
||||
provider_budget = ProviderBudgetLimiting(
|
||||
router_cache=DualCache(),
|
||||
provider_budget_config={
|
||||
"openai": ProviderBudgetInfo(time_period="1d", budget_limit=100),
|
||||
},
|
||||
)
|
||||
|
||||
# Test provider with no budget config
|
||||
spend = await provider_budget._get_current_provider_spend("anthropic")
|
||||
assert spend is None
|
||||
|
||||
# Test provider with budget config but no spend
|
||||
spend = await provider_budget._get_current_provider_spend("openai")
|
||||
assert spend == 0.0
|
||||
|
||||
# Test provider with budget config and spend
|
||||
spend_key = "provider_spend:openai:1d"
|
||||
await provider_budget.router_cache.async_set_cache(key=spend_key, value=50.5)
|
||||
|
||||
spend = await provider_budget._get_current_provider_spend("openai")
|
||||
assert spend == 50.5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_current_provider_budget_reset_at():
|
||||
"""
|
||||
Test _get_current_provider_budget_reset_at helper method
|
||||
|
||||
Scenarios:
|
||||
1. Provider with no budget config returns None
|
||||
2. Provider with budget config but no TTL returns None
|
||||
3. Provider with budget config and TTL returns correct ISO timestamp
|
||||
"""
|
||||
cleanup_redis()
|
||||
provider_budget = ProviderBudgetLimiting(
|
||||
router_cache=DualCache(
|
||||
redis_cache=RedisCache(
|
||||
host=os.getenv("REDIS_HOST"),
|
||||
port=int(os.getenv("REDIS_PORT")),
|
||||
password=os.getenv("REDIS_PASSWORD"),
|
||||
)
|
||||
),
|
||||
provider_budget_config={
|
||||
"openai": ProviderBudgetInfo(time_period="1d", budget_limit=100),
|
||||
"vertex_ai": ProviderBudgetInfo(time_period="1h", budget_limit=100),
|
||||
},
|
||||
)
|
||||
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Test provider with no budget config
|
||||
reset_at = await provider_budget._get_current_provider_budget_reset_at("anthropic")
|
||||
assert reset_at is None
|
||||
|
||||
# Test provider with budget config but no TTL
|
||||
reset_at = await provider_budget._get_current_provider_budget_reset_at("openai")
|
||||
assert reset_at is not None
|
||||
reset_time = datetime.fromisoformat(reset_at.replace("Z", "+00:00"))
|
||||
expected_time = datetime.now(timezone.utc) + timedelta(seconds=(24 * 60 * 60))
|
||||
time_difference = abs((reset_time - expected_time).total_seconds())
|
||||
assert time_difference < 5
|
||||
|
||||
# Test provider with budget config and TTL
|
||||
reset_at = await provider_budget._get_current_provider_budget_reset_at("vertex_ai")
|
||||
assert reset_at is not None
|
||||
|
||||
# Verify the timestamp format and approximate time
|
||||
reset_time = datetime.fromisoformat(reset_at.replace("Z", "+00:00"))
|
||||
expected_time = datetime.now(timezone.utc) + timedelta(seconds=3600)
|
||||
|
||||
# Allow for small time differences (within 5 seconds)
|
||||
time_difference = abs((reset_time - expected_time).total_seconds())
|
||||
assert time_difference < 5
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue