(feat) - provider budget improvements - ensure provider budgets work with multiple proxy instances + improve latency to ~90ms (#6886)

* use 1 file for duration_in_seconds * add to readme.md * re use duration_in_seconds * fix importing _extract_from_regex, get_last_day_of_month * fix import * update provider budget routing * fix - remove dup test * add support for using in multi instance environments * test_in_memory_redis_sync_e2e * test_in_memory_redis_sync_e2e * fix test_in_memory_redis_sync_e2e * fix code quality check * fix test provider budgets * working provider budget tests * add fixture for provider budget routing * fix router testing for provider budgets * add comments on provider budget routing * use RedisPipelineIncrementOperation * add redis async_increment_pipeline * use redis async_increment_pipeline * use lower value for testing * use redis async_increment_pipeline * use consistent key name for increment op * add handling for budget windows * fix typing async_increment_pipeline * fix set attr * add clear doc strings * unit testing for provider budgets * test_redis_increment_pipeline
2024-11-24 16:36:19 -08:00 · 2024-11-24 16:36:19 -08:00 · c73ce95c01
commit c73ce95c01
parent 34bfebe470
7 changed files with 638 additions and 52 deletions
--- a/docs/my-website/docs/proxy/provider_budget_routing.md
+++ b/docs/my-website/docs/proxy/provider_budget_routing.md
@ -16,25 +16,27 @@ model_list:
        api_key: os.environ/OPENAI_API_KEY

 router_settings:
-  redis_host: <your-redis-host>
-  redis_password: <your-redis-password>
-  redis_port: <your-redis-port>
  provider_budget_config: 
-	openai: 
-		budget_limit: 0.000000000001 # float of $ value budget for time period
-		time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
-	azure:
-		budget_limit: 100
-		time_period: 1d
-	anthropic:
-		budget_limit: 100
-		time_period: 10d
-	vertex_ai:
-		budget_limit: 100
-		time_period: 12d
-	gemini:
-		budget_limit: 100
-		time_period: 12d
+    openai: 
+      budget_limit: 0.000000000001 # float of $ value budget for time period
+      time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
+    azure:
+      budget_limit: 100
+      time_period: 1d
+    anthropic:
+      budget_limit: 100
+      time_period: 10d
+    vertex_ai:
+      budget_limit: 100
+      time_period: 12d
+    gemini:
+      budget_limit: 100
+      time_period: 12d
+  
+  # OPTIONAL: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
+  redis_host: os.environ/REDIS_HOST
+  redis_port: os.environ/REDIS_PORT
+  redis_password: os.environ/REDIS_PASSWORD

 general_settings:
  master_key: sk-1234
@ -132,6 +134,31 @@ This metric indicates the remaining budget for a provider in dollars (USD)
 litellm_provider_remaining_budget_metric{api_provider="openai"} 10
 ```

+## Multi-instance setup
+
+If you are using a multi-instance setup, you will need to set the Redis host, port, and password in the `proxy_config.yaml` file. Redis is used to sync the spend across LiteLLM instances.
+
+```yaml
+model_list:
+    - model_name: gpt-3.5-turbo
+      litellm_params:
+        model: openai/gpt-3.5-turbo
+        api_key: os.environ/OPENAI_API_KEY
+
+router_settings:
+  provider_budget_config: 
+    openai: 
+      budget_limit: 0.000000000001 # float of $ value budget for time period
+      time_period: 1d # can be 1d, 2d, 30d, 1mo, 2mo
+  
+  # 👇 Add this: Set Redis Host, Port, and Password if using multiple instance of LiteLLM
+  redis_host: os.environ/REDIS_HOST
+  redis_port: os.environ/REDIS_PORT
+  redis_password: os.environ/REDIS_PASSWORD
+
+general_settings:
+  master_key: sk-1234
+```

 ## Spec for provider_budget_config