diff --git a/docs/my-website/docs/proxy/provider_budget_routing.md b/docs/my-website/docs/proxy/provider_budget_routing.md index 293f9e9d8..1cb75d667 100644 --- a/docs/my-website/docs/proxy/provider_budget_routing.md +++ b/docs/my-website/docs/proxy/provider_budget_routing.md @@ -16,25 +16,27 @@ model_list: api_key: os.environ/OPENAI_API_KEY router_settings: - redis_host: - redis_password: - redis_port: provider_budget_config: - openai: - budget_limit: 0.000000000001 # float of $ value budget for time period - time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo - azure: - budget_limit: 100 - time_period: 1d - anthropic: - budget_limit: 100 - time_period: 10d - vertex_ai: - budget_limit: 100 - time_period: 12d - gemini: - budget_limit: 100 - time_period: 12d + openai: + budget_limit: 0.000000000001 # float of $ value budget for time period + time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo + azure: + budget_limit: 100 + time_period: 1d + anthropic: + budget_limit: 100 + time_period: 10d + vertex_ai: + budget_limit: 100 + time_period: 12d + gemini: + budget_limit: 100 + time_period: 12d + + # OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM + redis_host: os.environ/REDIS_HOST + redis_port: os.environ/REDIS_PORT + redis_password: os.environ/REDIS_PASSWORD general_settings: master_key: sk-1234 @@ -132,6 +134,31 @@ This metric indicates the remaining budget for a provider in dollars (USD) litellm_provider_remaining_budget_metric{api_provider="openai"} 10 ``` +## Multi-instance setup + +If you are using a multi-instance setup, you will need to set the Redis host, port, and password in the `proxy_config.yaml` file. Redis is used to sync the spend across LiteLLM instances. + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: openai/gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + +router_settings: + provider_budget_config: + openai: + budget_limit: 0.000000000001 # float of $ value budget for time period + time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo + + # 👇 Add this: Set Redis Host, Port, and Password if using multiple instance of LiteLLM + redis_host: os.environ/REDIS_HOST + redis_port: os.environ/REDIS_PORT + redis_password: os.environ/REDIS_PASSWORD + +general_settings: + master_key: sk-1234 +``` ## Spec for provider_budget_config diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 956a17a75..f716585b3 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -2,8 +2,25 @@ model_list: - model_name: gpt-4o litellm_params: model: openai/gpt-4o - api_key: os.environ/OPENAI_API_KEY + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + - model_name: fake-anthropic-endpoint + litellm_params: + model: anthropic/fake + api_base: https://exampleanthropicendpoint-production.up.railway.app/ + +router_settings: + provider_budget_config: + openai: + budget_limit: 1 # float of $ value budget for time period + time_period: 1d # can be 1d, 2d, 30d + anthropic: + budget_limit: 5 + time_period: 1d + redis_host: os.environ/REDIS_HOST + redis_port: os.environ/REDIS_PORT + redis_password: os.environ/REDIS_PASSWORD + +litellm_settings: + callbacks: ["prometheus"] + -default_vertex_config: - vertex_project: "adroit-crow-413218" - vertex_location: "us-central1" diff --git a/litellm/router_strategy/provider_budgets.py b/litellm/router_strategy/provider_budgets.py index ea26d2c0f..42e63b297 100644 --- a/litellm/router_strategy/provider_budgets.py +++ b/litellm/router_strategy/provider_budgets.py @@ -18,6 +18,7 @@ anthropic: ``` """ +import asyncio from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union import litellm @@ -44,10 +45,13 @@ if TYPE_CHECKING: else: Span = Any +DEFAULT_REDIS_SYNC_INTERVAL = 60 + class ProviderBudgetLimiting(CustomLogger): def __init__(self, router_cache: DualCache, provider_budget_config: dict): self.router_cache = router_cache + asyncio.create_task(self.periodic_sync_in_memory_spend_with_redis()) # cast elements of provider_budget_config to ProviderBudgetInfo for provider, config in provider_budget_config.items(): @@ -222,6 +226,74 @@ class ProviderBudgetLimiting(CustomLogger): f"Incremented spend for {spend_key} by {response_cost}, ttl: {ttl_seconds}" ) + async def periodic_sync_in_memory_spend_with_redis(self): + """ + Handler that triggers sync_in_memory_spend_with_redis every DEFAULT_REDIS_SYNC_INTERVAL seconds + + Required for multi-instance environment usage of provider budgets + """ + while True: + try: + await self._sync_in_memory_spend_with_redis() + await asyncio.sleep( + DEFAULT_REDIS_SYNC_INTERVAL + ) # Wait for 5 seconds before next sync + except Exception as e: + verbose_router_logger.error(f"Error in periodic sync task: {str(e)}") + await asyncio.sleep( + DEFAULT_REDIS_SYNC_INTERVAL + ) # Still wait 5 seconds on error before retrying + + async def _sync_in_memory_spend_with_redis(self): + """ + Ensures in-memory cache is updated with latest Redis values for all provider spends. + + Why Do we need this? + - Redis is our source of truth for provider spend + - In-memory cache goes out of sync if it does not get updated with the values from Redis + + Why not just rely on DualCache ? + - DualCache does not handle synchronization between in-memory and Redis + + In a multi-instance evironment, each instance needs to periodically get the provider spend from Redis to ensure it is consistent across all instances. + """ + + try: + # No need to sync if Redis cache is not initialized + if self.router_cache.redis_cache is None: + return + + # Get all providers and their budget configs + cache_keys = [] + for provider, config in self.provider_budget_config.items(): + if config is None: + continue + cache_keys.append(f"provider_spend:{provider}:{config.time_period}") + + # Batch fetch current spend values from Redis + redis_values = await self.router_cache.redis_cache.async_batch_get_cache( + key_list=cache_keys + ) + + # Update in-memory cache with Redis values + if isinstance(redis_values, dict): # Check if redis_values is a dictionary + for key, value in redis_values.items(): + if value is not None: + self.router_cache.in_memory_cache.set_cache( + key=key, value=float(value) + ) + verbose_router_logger.debug( + f"Updated in-memory cache for {key}: {value}" + ) + + except Exception as e: + import traceback + + traceback.print_exc() + verbose_router_logger.error( + f"Error syncing in-memory cache with Redis: {str(e)}" + ) + def _get_budget_config_for_provider( self, provider: str ) -> Optional[ProviderBudgetInfo]: