add support for using in multi instance environments

2024-11-23 15:46:39 -08:00 · 2024-11-23 15:46:39 -08:00 · 84395e7a19
commit 84395e7a19
parent 94e2e292cd
3 changed files with 138 additions and 22 deletions
--- a/docs/my-website/docs/proxy/provider_budget_routing.md
+++ b/docs/my-website/docs/proxy/provider_budget_routing.md
@ -16,25 +16,27 @@ model_list:
        api_key: os.environ/OPENAI_API_KEY

 router_settings:
-  redis_host: <your-redis-host>
-  redis_password: <your-redis-password>
-  redis_port: <your-redis-port>
  provider_budget_config: 
-	openai: 
-		budget_limit: 0.000000000001 # float of $ value budget for time period
-		time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
-	azure:
-		budget_limit: 100
-		time_period: 1d
-	anthropic:
-		budget_limit: 100
-		time_period: 10d
-	vertex_ai:
-		budget_limit: 100
-		time_period: 12d
-	gemini:
-		budget_limit: 100
-		time_period: 12d
+    openai: 
+      budget_limit: 0.000000000001 # float of $ value budget for time period
+      time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
+    azure:
+      budget_limit: 100
+      time_period: 1d
+    anthropic:
+      budget_limit: 100
+      time_period: 10d
+    vertex_ai:
+      budget_limit: 100
+      time_period: 12d
+    gemini:
+      budget_limit: 100
+      time_period: 12d
+  
+  # OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
+  redis_host: os.environ/REDIS_HOST
+  redis_port: os.environ/REDIS_PORT
+  redis_password: os.environ/REDIS_PASSWORD

 general_settings:
  master_key: sk-1234
@ -132,6 +134,31 @@ This metric indicates the remaining budget for a provider in dollars (USD)
 litellm_provider_remaining_budget_metric{api_provider="openai"} 10
 ```

+## Multi-instance setup
+
+If you are using a multi-instance setup, you will need to set the Redis host, port, and password in the `proxy_config.yaml` file. Redis is used to sync the spend across LiteLLM instances.
+
+```yaml
+model_list:
+    - model_name: gpt-3.5-turbo
+      litellm_params:
+        model: openai/gpt-3.5-turbo
+        api_key: os.environ/OPENAI_API_KEY
+
+router_settings:
+  provider_budget_config: 
+    openai: 
+      budget_limit: 0.000000000001 # float of $ value budget for time period
+      time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
+  
+  # 👇 Add this: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
+  redis_host: os.environ/REDIS_HOST
+  redis_port: os.environ/REDIS_PORT
+  redis_password: os.environ/REDIS_PASSWORD
+
+general_settings:
+  master_key: sk-1234
+```

 ## Spec for provider_budget_config

--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -2,8 +2,25 @@ model_list:
  - model_name: gpt-4o
    litellm_params:
      model: openai/gpt-4o
-      api_key: os.environ/OPENAI_API_KEY
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+  - model_name: fake-anthropic-endpoint
+    litellm_params:
+      model: anthropic/fake
+      api_base: https://exampleanthropicendpoint-production.up.railway.app/
+
+router_settings:
+  provider_budget_config: 
+    openai: 
+      budget_limit: 1 # float of $ value budget for time period
+      time_period: 1d # can be 1d, 2d, 30d 
+    anthropic:
+      budget_limit: 5
+      time_period: 1d
+  redis_host: os.environ/REDIS_HOST
+  redis_port: os.environ/REDIS_PORT
+  redis_password: os.environ/REDIS_PASSWORD
+
+litellm_settings:
+  callbacks: ["prometheus"]
+

-default_vertex_config:
-  vertex_project: "adroit-crow-413218"
-  vertex_location: "us-central1"
--- a/litellm/router_strategy/provider_budgets.py
+++ b/litellm/router_strategy/provider_budgets.py
@ -18,6 +18,7 @@ anthropic:
 ```
 """

+import asyncio
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union

 import litellm
@ -44,10 +45,13 @@ if TYPE_CHECKING:
 else:
    Span = Any

+DEFAULT_REDIS_SYNC_INTERVAL = 60
+

 class ProviderBudgetLimiting(CustomLogger):
    def __init__(self, router_cache: DualCache, provider_budget_config: dict):
        self.router_cache = router_cache
+        asyncio.create_task(self.periodic_sync_in_memory_spend_with_redis())

        # cast elements of provider_budget_config to ProviderBudgetInfo
        for provider, config in provider_budget_config.items():
@ -222,6 +226,74 @@ class ProviderBudgetLimiting(CustomLogger):
            f"Incremented spend for {spend_key} by {response_cost}, ttl: {ttl_seconds}"
        )

+    async def periodic_sync_in_memory_spend_with_redis(self):
+        """
+        Handler that triggers sync_in_memory_spend_with_redis every DEFAULT_REDIS_SYNC_INTERVAL seconds
+
+        Required for multi-instance environment usage of provider budgets
+        """
+        while True:
+            try:
+                await self._sync_in_memory_spend_with_redis()
+                await asyncio.sleep(
+                    DEFAULT_REDIS_SYNC_INTERVAL
+                )  # Wait for 5 seconds before next sync
+            except Exception as e:
+                verbose_router_logger.error(f"Error in periodic sync task: {str(e)}")
+                await asyncio.sleep(
+                    DEFAULT_REDIS_SYNC_INTERVAL
+                )  # Still wait 5 seconds on error before retrying
+
+    async def _sync_in_memory_spend_with_redis(self):
+        """
+        Ensures in-memory cache is updated with latest Redis values for all provider spends.
+
+        Why Do we need this?
+        - Redis is our source of truth for provider spend
+        - In-memory cache goes out of sync if it does not get updated with the values from Redis
+
+        Why not just rely on DualCache ?
+        - DualCache does not handle synchronization between in-memory and Redis
+
+        In a multi-instance evironment, each instance needs to periodically get the provider spend from Redis to ensure it is consistent across all instances.
+        """
+
+        try:
+            # No need to sync if Redis cache is not initialized
+            if self.router_cache.redis_cache is None:
+                return
+
+            # Get all providers and their budget configs
+            cache_keys = []
+            for provider, config in self.provider_budget_config.items():
+                if config is None:
+                    continue
+                cache_keys.append(f"provider_spend:{provider}:{config.time_period}")
+
+            # Batch fetch current spend values from Redis
+            redis_values = await self.router_cache.redis_cache.async_batch_get_cache(
+                key_list=cache_keys
+            )
+
+            # Update in-memory cache with Redis values
+            if isinstance(redis_values, dict):  # Check if redis_values is a dictionary
+                for key, value in redis_values.items():
+                    if value is not None:
+                        self.router_cache.in_memory_cache.set_cache(
+                            key=key, value=float(value)
+                        )
+                        verbose_router_logger.debug(
+                            f"Updated in-memory cache for {key}: {value}"
+                        )
+
+        except Exception as e:
+            import traceback
+
+            traceback.print_exc()
+            verbose_router_logger.error(
+                f"Error syncing in-memory cache with Redis: {str(e)}"
+            )
+
    def _get_budget_config_for_provider(
        self, provider: str
    ) -> Optional[ProviderBudgetInfo]: