add support for using in multi instance environments

2024-11-23 15:46:39 -08:00 · 2024-11-23 15:46:39 -08:00 · 84395e7a19
commit 84395e7a19
parent 94e2e292cd
3 changed files with 138 additions and 22 deletions
--- a/docs/my-website/docs/proxy/provider_budget_routing.md
+++ b/docs/my-website/docs/proxy/provider_budget_routing.md
@ -16,9 +16,6 @@ model_list:
        api_key: os.environ/OPENAI_API_KEY
 router_settings:
  redis_host: <your-redis-host>
  redis_password: <your-redis-password>
  redis_port: <your-redis-port>
  provider_budget_config: 
    openai: 
      budget_limit: 0.000000000001 # float of $ value budget for time period
@ -36,6 +33,11 @@ router_settings:
      budget_limit: 100
      time_period: 12d
  # OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
  redis_host: os.environ/REDIS_HOST
  redis_port: os.environ/REDIS_PORT
  redis_password: os.environ/REDIS_PASSWORD
 general_settings:
  master_key: sk-1234
 ```
@ -132,6 +134,31 @@ This metric indicates the remaining budget for a provider in dollars (USD)
 litellm_provider_remaining_budget_metric{api_provider="openai"} 10
 ```
 ## Multi-instance setup
 If you are using a multi-instance setup, you will need to set the Redis host, port, and password in the `proxy_config.yaml` file. Redis is used to sync the spend across LiteLLM instances.
 ```yaml
 model_list:
    - model_name: gpt-3.5-turbo
      litellm_params:
        model: openai/gpt-3.5-turbo
        api_key: os.environ/OPENAI_API_KEY
 router_settings:
  provider_budget_config: 
    openai: 
      budget_limit: 0.000000000001 # float of $ value budget for time period
      time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
  # 👇 Add this: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
  redis_host: os.environ/REDIS_HOST
  redis_port: os.environ/REDIS_PORT
  redis_password: os.environ/REDIS_PASSWORD
 general_settings:
  master_key: sk-1234
 ```
 ## Spec for provider_budget_config
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -2,8 +2,25 @@ model_list:
  - model_name: gpt-4o
    litellm_params:
      model: openai/gpt-4o
-      api_key: os.environ/OPENAI_API_KEY
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
  - model_name: fake-anthropic-endpoint
    litellm_params:
      model: anthropic/fake
      api_base: https://exampleanthropicendpoint-production.up.railway.app/
 router_settings:
  provider_budget_config: 
    openai: 
      budget_limit: 1 # float of $ value budget for time period
      time_period: 1d # can be 1d, 2d, 30d 
    anthropic:
      budget_limit: 5
      time_period: 1d
  redis_host: os.environ/REDIS_HOST
  redis_port: os.environ/REDIS_PORT
  redis_password: os.environ/REDIS_PASSWORD
 litellm_settings:
  callbacks: ["prometheus"]
 default_vertex_config:
  vertex_project: "adroit-crow-413218"
  vertex_location: "us-central1"
--- a/litellm/router_strategy/provider_budgets.py
+++ b/litellm/router_strategy/provider_budgets.py
@ -18,6 +18,7 @@ anthropic:
 ```
 """
 import asyncio
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union
 import litellm
@ -44,10 +45,13 @@ if TYPE_CHECKING:
 else:
    Span = Any
 DEFAULT_REDIS_SYNC_INTERVAL = 60
 class ProviderBudgetLimiting(CustomLogger):
    def __init__(self, router_cache: DualCache, provider_budget_config: dict):
        self.router_cache = router_cache
        asyncio.create_task(self.periodic_sync_in_memory_spend_with_redis())
        # cast elements of provider_budget_config to ProviderBudgetInfo
        for provider, config in provider_budget_config.items():
@ -222,6 +226,74 @@ class ProviderBudgetLimiting(CustomLogger):
            f"Incremented spend for {spend_key} by {response_cost}, ttl: {ttl_seconds}"
        )
    async def periodic_sync_in_memory_spend_with_redis(self):
        """
        Handler that triggers sync_in_memory_spend_with_redis every DEFAULT_REDIS_SYNC_INTERVAL seconds
        Required for multi-instance environment usage of provider budgets
        """
        while True:
            try:
                await self._sync_in_memory_spend_with_redis()
                await asyncio.sleep(
                    DEFAULT_REDIS_SYNC_INTERVAL
                )  # Wait for 5 seconds before next sync
            except Exception as e:
                verbose_router_logger.error(f"Error in periodic sync task: {str(e)}")
                await asyncio.sleep(
                    DEFAULT_REDIS_SYNC_INTERVAL
                )  # Still wait 5 seconds on error before retrying
    async def _sync_in_memory_spend_with_redis(self):
        """
        Ensures in-memory cache is updated with latest Redis values for all provider spends.
        Why Do we need this?
        - Redis is our source of truth for provider spend
        - In-memory cache goes out of sync if it does not get updated with the values from Redis
        Why not just rely on DualCache ?
        - DualCache does not handle synchronization between in-memory and Redis
        In a multi-instance evironment, each instance needs to periodically get the provider spend from Redis to ensure it is consistent across all instances.
        """
        try:
            # No need to sync if Redis cache is not initialized
            if self.router_cache.redis_cache is None:
                return
            # Get all providers and their budget configs
            cache_keys = []
            for provider, config in self.provider_budget_config.items():
                if config is None:
                    continue
                cache_keys.append(f"provider_spend:{provider}:{config.time_period}")
            # Batch fetch current spend values from Redis
            redis_values = await self.router_cache.redis_cache.async_batch_get_cache(
                key_list=cache_keys
            )
            # Update in-memory cache with Redis values
            if isinstance(redis_values, dict):  # Check if redis_values is a dictionary
                for key, value in redis_values.items():
                    if value is not None:
                        self.router_cache.in_memory_cache.set_cache(
                            key=key, value=float(value)
                        )
                        verbose_router_logger.debug(
                            f"Updated in-memory cache for {key}: {value}"
                        )
        except Exception as e:
            import traceback
            traceback.print_exc()
            verbose_router_logger.error(
                f"Error syncing in-memory cache with Redis: {str(e)}"
            )
    def _get_budget_config_for_provider(
        self, provider: str
    ) -> Optional[ProviderBudgetInfo]: