forked from phoenix/litellm-mirror
add support for using in multi instance environments
This commit is contained in:
parent
94e2e292cd
commit
84395e7a19
3 changed files with 138 additions and 22 deletions
|
@ -16,9 +16,6 @@ model_list:
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
|
||||||
router_settings:
|
router_settings:
|
||||||
redis_host: <your-redis-host>
|
|
||||||
redis_password: <your-redis-password>
|
|
||||||
redis_port: <your-redis-port>
|
|
||||||
provider_budget_config:
|
provider_budget_config:
|
||||||
openai:
|
openai:
|
||||||
budget_limit: 0.000000000001 # float of $ value budget for time period
|
budget_limit: 0.000000000001 # float of $ value budget for time period
|
||||||
|
@ -36,6 +33,11 @@ router_settings:
|
||||||
budget_limit: 100
|
budget_limit: 100
|
||||||
time_period: 12d
|
time_period: 12d
|
||||||
|
|
||||||
|
# OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
|
||||||
|
redis_host: os.environ/REDIS_HOST
|
||||||
|
redis_port: os.environ/REDIS_PORT
|
||||||
|
redis_password: os.environ/REDIS_PASSWORD
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
```
|
```
|
||||||
|
@ -132,6 +134,31 @@ This metric indicates the remaining budget for a provider in dollars (USD)
|
||||||
litellm_provider_remaining_budget_metric{api_provider="openai"} 10
|
litellm_provider_remaining_budget_metric{api_provider="openai"} 10
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Multi-instance setup
|
||||||
|
|
||||||
|
If you are using a multi-instance setup, you will need to set the Redis host, port, and password in the `proxy_config.yaml` file. Redis is used to sync the spend across LiteLLM instances.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-3.5-turbo
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
|
||||||
|
router_settings:
|
||||||
|
provider_budget_config:
|
||||||
|
openai:
|
||||||
|
budget_limit: 0.000000000001 # float of $ value budget for time period
|
||||||
|
time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
|
||||||
|
|
||||||
|
# 👇 Add this: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
|
||||||
|
redis_host: os.environ/REDIS_HOST
|
||||||
|
redis_port: os.environ/REDIS_PORT
|
||||||
|
redis_password: os.environ/REDIS_PASSWORD
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
```
|
||||||
|
|
||||||
## Spec for provider_budget_config
|
## Spec for provider_budget_config
|
||||||
|
|
||||||
|
|
|
@ -2,8 +2,25 @@ model_list:
|
||||||
- model_name: gpt-4o
|
- model_name: gpt-4o
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/gpt-4o
|
model: openai/gpt-4o
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
- model_name: fake-anthropic-endpoint
|
||||||
|
litellm_params:
|
||||||
|
model: anthropic/fake
|
||||||
|
api_base: https://exampleanthropicendpoint-production.up.railway.app/
|
||||||
|
|
||||||
|
router_settings:
|
||||||
|
provider_budget_config:
|
||||||
|
openai:
|
||||||
|
budget_limit: 1 # float of $ value budget for time period
|
||||||
|
time_period: 1d # can be 1d, 2d, 30d
|
||||||
|
anthropic:
|
||||||
|
budget_limit: 5
|
||||||
|
time_period: 1d
|
||||||
|
redis_host: os.environ/REDIS_HOST
|
||||||
|
redis_port: os.environ/REDIS_PORT
|
||||||
|
redis_password: os.environ/REDIS_PASSWORD
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["prometheus"]
|
||||||
|
|
||||||
|
|
||||||
default_vertex_config:
|
|
||||||
vertex_project: "adroit-crow-413218"
|
|
||||||
vertex_location: "us-central1"
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ anthropic:
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
@ -44,10 +45,13 @@ if TYPE_CHECKING:
|
||||||
else:
|
else:
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
||||||
|
DEFAULT_REDIS_SYNC_INTERVAL = 60
|
||||||
|
|
||||||
|
|
||||||
class ProviderBudgetLimiting(CustomLogger):
|
class ProviderBudgetLimiting(CustomLogger):
|
||||||
def __init__(self, router_cache: DualCache, provider_budget_config: dict):
|
def __init__(self, router_cache: DualCache, provider_budget_config: dict):
|
||||||
self.router_cache = router_cache
|
self.router_cache = router_cache
|
||||||
|
asyncio.create_task(self.periodic_sync_in_memory_spend_with_redis())
|
||||||
|
|
||||||
# cast elements of provider_budget_config to ProviderBudgetInfo
|
# cast elements of provider_budget_config to ProviderBudgetInfo
|
||||||
for provider, config in provider_budget_config.items():
|
for provider, config in provider_budget_config.items():
|
||||||
|
@ -222,6 +226,74 @@ class ProviderBudgetLimiting(CustomLogger):
|
||||||
f"Incremented spend for {spend_key} by {response_cost}, ttl: {ttl_seconds}"
|
f"Incremented spend for {spend_key} by {response_cost}, ttl: {ttl_seconds}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def periodic_sync_in_memory_spend_with_redis(self):
|
||||||
|
"""
|
||||||
|
Handler that triggers sync_in_memory_spend_with_redis every DEFAULT_REDIS_SYNC_INTERVAL seconds
|
||||||
|
|
||||||
|
Required for multi-instance environment usage of provider budgets
|
||||||
|
"""
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await self._sync_in_memory_spend_with_redis()
|
||||||
|
await asyncio.sleep(
|
||||||
|
DEFAULT_REDIS_SYNC_INTERVAL
|
||||||
|
) # Wait for 5 seconds before next sync
|
||||||
|
except Exception as e:
|
||||||
|
verbose_router_logger.error(f"Error in periodic sync task: {str(e)}")
|
||||||
|
await asyncio.sleep(
|
||||||
|
DEFAULT_REDIS_SYNC_INTERVAL
|
||||||
|
) # Still wait 5 seconds on error before retrying
|
||||||
|
|
||||||
|
async def _sync_in_memory_spend_with_redis(self):
|
||||||
|
"""
|
||||||
|
Ensures in-memory cache is updated with latest Redis values for all provider spends.
|
||||||
|
|
||||||
|
Why Do we need this?
|
||||||
|
- Redis is our source of truth for provider spend
|
||||||
|
- In-memory cache goes out of sync if it does not get updated with the values from Redis
|
||||||
|
|
||||||
|
Why not just rely on DualCache ?
|
||||||
|
- DualCache does not handle synchronization between in-memory and Redis
|
||||||
|
|
||||||
|
In a multi-instance evironment, each instance needs to periodically get the provider spend from Redis to ensure it is consistent across all instances.
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
# No need to sync if Redis cache is not initialized
|
||||||
|
if self.router_cache.redis_cache is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get all providers and their budget configs
|
||||||
|
cache_keys = []
|
||||||
|
for provider, config in self.provider_budget_config.items():
|
||||||
|
if config is None:
|
||||||
|
continue
|
||||||
|
cache_keys.append(f"provider_spend:{provider}:{config.time_period}")
|
||||||
|
|
||||||
|
# Batch fetch current spend values from Redis
|
||||||
|
redis_values = await self.router_cache.redis_cache.async_batch_get_cache(
|
||||||
|
key_list=cache_keys
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update in-memory cache with Redis values
|
||||||
|
if isinstance(redis_values, dict): # Check if redis_values is a dictionary
|
||||||
|
for key, value in redis_values.items():
|
||||||
|
if value is not None:
|
||||||
|
self.router_cache.in_memory_cache.set_cache(
|
||||||
|
key=key, value=float(value)
|
||||||
|
)
|
||||||
|
verbose_router_logger.debug(
|
||||||
|
f"Updated in-memory cache for {key}: {value}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
traceback.print_exc()
|
||||||
|
verbose_router_logger.error(
|
||||||
|
f"Error syncing in-memory cache with Redis: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
def _get_budget_config_for_provider(
|
def _get_budget_config_for_provider(
|
||||||
self, provider: str
|
self, provider: str
|
||||||
) -> Optional[ProviderBudgetInfo]:
|
) -> Optional[ProviderBudgetInfo]:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue