From df8842031098574ac486c2ac90dc86393ab8e607 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 21:24:21 -0800 Subject: [PATCH] doc provider budgets --- .../docs/proxy/provider_budget_routing.md | 100 ++++++++++++++++-- 1 file changed, 93 insertions(+), 7 deletions(-) diff --git a/docs/my-website/docs/proxy/provider_budget_routing.md b/docs/my-website/docs/proxy/provider_budget_routing.md index a945ef89a..fea3f483c 100644 --- a/docs/my-website/docs/proxy/provider_budget_routing.md +++ b/docs/my-website/docs/proxy/provider_budget_routing.md @@ -4,18 +4,16 @@ import TabItem from '@theme/TabItem'; # Provider Budget Routing Use this to set budgets for LLM Providers - example $100/day for OpenAI, $100/day for Azure. +## Quick Start + +Set provider budgets in your `proxy_config.yaml` file +### Proxy Config setup ```yaml model_list: - model_name: gpt-3.5-turbo litellm_params: model: openai/gpt-3.5-turbo api_key: os.environ/OPENAI_API_KEY - - model_name: gpt-3.5-turbo - litellm_params: - model: azure/chatgpt-functioncalling - api_key: os.environ/AZURE_API_KEY - api_version: os.environ/AZURE_API_VERSION - api_base: os.environ/AZURE_API_BASE router_settings: redis_host: @@ -42,8 +40,66 @@ general_settings: master_key: sk-1234 ``` +### Make a test request -#### How provider-budget-routing works +We expect the first request to succeed, and the second request to fail since we cross the budget for `openai` + + +**[Langchain, OpenAI SDK Usage Examples](../proxy/user_keys#request-format)** + + + + +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "hi my name is test request"} + ] + }' +``` + + + + +Expect this to fail since since `ishaan@berri.ai` in the request is PII + +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "hi my name is test request"} + ] + }' +``` + +Expected response on failure + +```json +{ + "error": { + "message": "No deployments available - crossed budget for provider: Exceeded budget for provider openai: 0.0007350000000000001 >= 1e-12", + "type": "None", + "param": "None", + "code": "429" + } +} +``` + + + + + + + + +## How provider budget routing works 1. **Budget Tracking**: - Uses Redis to track spend for each provider @@ -62,3 +118,33 @@ general_settings: 4. **Requirements**: - Redis required for tracking spend across instances - Provider names must be litellm provider names. See [Supported Providers](https://docs.litellm.ai/docs/providers) + +## Monitoring Provider Remaining Budget + +LiteLLM will emit the following metric on Prometheus to track the remaining budget for each provider + +This metric indicates the remaining budget for a provider in dollars (USD) + +``` +litellm_provider_remaining_budget_metric{api_provider="openai"} 10 +``` + + +## Spec for provider_budget_config + +The `provider_budget_config` is a dictionary where: +- **Key**: Provider name (string) - Must be a valid [LiteLLM provider name](https://docs.litellm.ai/docs/providers) +- **Value**: Budget configuration object with the following parameters: + - `budget_limit`: Float value representing the budget in USD + - `time_period`: String in the format "Xd" where X is the number of days (e.g., "1d", "30d") + +Example structure: +```yaml +provider_budget_config: + openai: + budget_limit: 100.0 # $100 USD + time_period: "1d" # 1 day period + azure: + budget_limit: 500.0 # $500 USD + time_period: "30d" # 30 day period +``` \ No newline at end of file