add support for using in multi instance environments

This commit is contained in:
Ishaan Jaff 2024-11-23 15:46:39 -08:00
parent 94e2e292cd
commit 84395e7a19
3 changed files with 138 additions and 22 deletions

View file

@ -16,25 +16,27 @@ model_list:
api_key: os.environ/OPENAI_API_KEY
router_settings:
redis_host: <your-redis-host>
redis_password: <your-redis-password>
redis_port: <your-redis-port>
provider_budget_config:
openai:
budget_limit: 0.000000000001 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
azure:
budget_limit: 100
time_period: 1d
anthropic:
budget_limit: 100
time_period: 10d
vertex_ai:
budget_limit: 100
time_period: 12d
gemini:
budget_limit: 100
time_period: 12d
openai:
budget_limit: 0.000000000001 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
azure:
budget_limit: 100
time_period: 1d
anthropic:
budget_limit: 100
time_period: 10d
vertex_ai:
budget_limit: 100
time_period: 12d
gemini:
budget_limit: 100
time_period: 12d
# OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD
general_settings:
master_key: sk-1234
@ -132,6 +134,31 @@ This metric indicates the remaining budget for a provider in dollars (USD)
litellm_provider_remaining_budget_metric{api_provider="openai"} 10
```
## Multi-instance setup
If you are using a multi-instance setup, you will need to set the Redis host, port, and password in the `proxy_config.yaml` file. Redis is used to sync the spend across LiteLLM instances.
```yaml
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY
router_settings:
provider_budget_config:
openai:
budget_limit: 0.000000000001 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
# 👇 Add this: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD
general_settings:
master_key: sk-1234
```
## Spec for provider_budget_config

View file

@ -2,8 +2,25 @@ model_list:
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
api_base: https://exampleopenaiendpoint-production.up.railway.app/
- model_name: fake-anthropic-endpoint
litellm_params:
model: anthropic/fake
api_base: https://exampleanthropicendpoint-production.up.railway.app/
router_settings:
provider_budget_config:
openai:
budget_limit: 1 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d
anthropic:
budget_limit: 5
time_period: 1d
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD
litellm_settings:
callbacks: ["prometheus"]
default_vertex_config:
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"

View file

@ -18,6 +18,7 @@ anthropic:
```
"""
import asyncio
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union
import litellm
@ -44,10 +45,13 @@ if TYPE_CHECKING:
else:
Span = Any
DEFAULT_REDIS_SYNC_INTERVAL = 60
class ProviderBudgetLimiting(CustomLogger):
def __init__(self, router_cache: DualCache, provider_budget_config: dict):
self.router_cache = router_cache
asyncio.create_task(self.periodic_sync_in_memory_spend_with_redis())
# cast elements of provider_budget_config to ProviderBudgetInfo
for provider, config in provider_budget_config.items():
@ -222,6 +226,74 @@ class ProviderBudgetLimiting(CustomLogger):
f"Incremented spend for {spend_key} by {response_cost}, ttl: {ttl_seconds}"
)
async def periodic_sync_in_memory_spend_with_redis(self):
"""
Handler that triggers sync_in_memory_spend_with_redis every DEFAULT_REDIS_SYNC_INTERVAL seconds
Required for multi-instance environment usage of provider budgets
"""
while True:
try:
await self._sync_in_memory_spend_with_redis()
await asyncio.sleep(
DEFAULT_REDIS_SYNC_INTERVAL
) # Wait for 5 seconds before next sync
except Exception as e:
verbose_router_logger.error(f"Error in periodic sync task: {str(e)}")
await asyncio.sleep(
DEFAULT_REDIS_SYNC_INTERVAL
) # Still wait 5 seconds on error before retrying
async def _sync_in_memory_spend_with_redis(self):
"""
Ensures in-memory cache is updated with latest Redis values for all provider spends.
Why Do we need this?
- Redis is our source of truth for provider spend
- In-memory cache goes out of sync if it does not get updated with the values from Redis
Why not just rely on DualCache ?
- DualCache does not handle synchronization between in-memory and Redis
In a multi-instance evironment, each instance needs to periodically get the provider spend from Redis to ensure it is consistent across all instances.
"""
try:
# No need to sync if Redis cache is not initialized
if self.router_cache.redis_cache is None:
return
# Get all providers and their budget configs
cache_keys = []
for provider, config in self.provider_budget_config.items():
if config is None:
continue
cache_keys.append(f"provider_spend:{provider}:{config.time_period}")
# Batch fetch current spend values from Redis
redis_values = await self.router_cache.redis_cache.async_batch_get_cache(
key_list=cache_keys
)
# Update in-memory cache with Redis values
if isinstance(redis_values, dict): # Check if redis_values is a dictionary
for key, value in redis_values.items():
if value is not None:
self.router_cache.in_memory_cache.set_cache(
key=key, value=float(value)
)
verbose_router_logger.debug(
f"Updated in-memory cache for {key}: {value}"
)
except Exception as e:
import traceback
traceback.print_exc()
verbose_router_logger.error(
f"Error syncing in-memory cache with Redis: {str(e)}"
)
def _get_budget_config_for_provider(
self, provider: str
) -> Optional[ProviderBudgetInfo]: