diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md index 980a781ac..3e2d1f860 100644 --- a/docs/my-website/docs/proxy/configs.md +++ b/docs/my-website/docs/proxy/configs.md @@ -94,18 +94,18 @@ Run with `--detailed_debug` if you need detailed debug logs ```shell $ litellm --config /path/to/config.yaml --detailed_debug +``` + ::: -### Using Proxy - Curl Request, OpenAI Package, Langchain, Langchain JS -Calling a model group - - - +#### Step 3: Test it Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml. If multiple with `model_name=gpt-3.5-turbo` does [Load Balancing](https://docs.litellm.ai/docs/proxy/load_balancing) +**[Langchain, OpenAI SDK Usage Examples](../proxy/user_keys#request-format)** + ```shell curl --location 'http://0.0.0.0:4000/chat/completions' \ --header 'Content-Type: application/json' \ @@ -120,105 +120,10 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ } ' ``` - - +## LLM configs `model_list` -Sends this request to model where `model_name=bedrock-claude-v1` on config.yaml - -```shell -curl --location 'http://0.0.0.0:4000/chat/completions' \ ---header 'Content-Type: application/json' \ ---data ' { - "model": "bedrock-claude-v1", - "messages": [ - { - "role": "user", - "content": "what llm are you" - } - ], - } -' -``` - - - -```python -import openai -client = openai.OpenAI( - api_key="anything", - base_url="http://0.0.0.0:4000" -) - -# Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml. -response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [ - { - "role": "user", - "content": "this is a test request, write a short poem" - } -]) - -print(response) - -# Sends this request to model where `model_name=bedrock-claude-v1` on config.yaml -response = client.chat.completions.create(model="bedrock-claude-v1", messages = [ - { - "role": "user", - "content": "this is a test request, write a short poem" - } -]) - -print(response) - -``` - - - - -```python -from langchain.chat_models import ChatOpenAI -from langchain.prompts.chat import ( - ChatPromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -) -from langchain.schema import HumanMessage, SystemMessage - -messages = [ - SystemMessage( - content="You are a helpful assistant that im using to make a test request to." - ), - HumanMessage( - content="test from litellm. tell me why it's amazing in 1 sentence" - ), -] - -# Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml. -chat = ChatOpenAI( - openai_api_base="http://0.0.0.0:4000", # set openai base to the proxy - model = "gpt-3.5-turbo", - temperature=0.1 -) - -response = chat(messages) -print(response) - -# Sends request to model where `model_name=bedrock-claude-v1` on config.yaml. -claude_chat = ChatOpenAI( - openai_api_base="http://0.0.0.0:4000", # set openai base to the proxy - model = "bedrock-claude-v1", - temperature=0.1 -) - -response = claude_chat(messages) -print(response) -``` - - - - - -## Save Model-specific params (API Base, Keys, Temperature, Max Tokens, Organization, Headers etc.) +### Model-specific params (API Base, Keys, Temperature, Max Tokens, Organization, Headers etc.) You can use the config to save model-specific information like api_base, api_key, temperature, max_tokens, etc. [**All input params**](https://docs.litellm.ai/docs/completion/input#input-params-1) @@ -259,32 +164,6 @@ model_list: $ litellm --config /path/to/config.yaml ``` -## Use CONFIG_FILE_PATH for proxy (Easier Azure container deployment) - -1. Setup config.yaml - -```yaml -model_list: - - model_name: gpt-3.5-turbo - litellm_params: - model: gpt-3.5-turbo - api_key: os.environ/OPENAI_API_KEY -``` - -2. Store filepath as env var - -```bash -CONFIG_FILE_PATH="/path/to/config.yaml" -``` - -3. Start Proxy - -```bash -$ litellm - -# RUNNING on http://0.0.0.0:4000 -``` - **Expected Logs:** Look for this line in your console logs to confirm the config.yaml was loaded in correctly. @@ -292,243 +171,10 @@ Look for this line in your console logs to confirm the config.yaml was loaded in LiteLLM: Proxy initialized with Config, Set models: ``` -## Multiple OpenAI Organizations - -Add all openai models across all OpenAI organizations with just 1 model definition - -```yaml - - model_name: * - litellm_params: - model: openai/* - api_key: os.environ/OPENAI_API_KEY - organization: - - org-1 - - org-2 - - org-3 -``` - -LiteLLM will automatically create separate deployments for each org. - -Confirm this via - -```bash -curl --location 'http://0.0.0.0:4000/v1/model/info' \ ---header 'Authorization: Bearer ${LITELLM_KEY}' \ ---data '' -``` - - -## Provider specific wildcard routing -**Proxy all models from a provider** - -Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml** - -**Step 1** - define provider specific routing on config.yaml -```yaml -model_list: - # provider specific wildcard routing - - model_name: "anthropic/*" - litellm_params: - model: "anthropic/*" - api_key: os.environ/ANTHROPIC_API_KEY - - model_name: "groq/*" - litellm_params: - model: "groq/*" - api_key: os.environ/GROQ_API_KEY -``` - -Step 2 - Run litellm proxy - -```shell -$ litellm --config /path/to/config.yaml -``` - -Step 3 Test it - -Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*` -```shell -curl http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-1234" \ - -d '{ - "model": "anthropic/claude-3-sonnet-20240229", - "messages": [ - {"role": "user", "content": "Hello, Claude!"} - ] - }' -``` - -Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*` -```shell -curl http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-1234" \ - -d '{ - "model": "groq/llama3-8b-8192", - "messages": [ - {"role": "user", "content": "Hello, Claude!"} - ] - }' -``` - -## Load Balancing - -:::info -For more on this, go to [this page](https://docs.litellm.ai/docs/proxy/load_balancing) -::: - -Use this to call multiple instances of the same model and configure things like [routing strategy](https://docs.litellm.ai/docs/routing#advanced). - -For optimal performance: -- Set `tpm/rpm` per model deployment. Weighted picks are then based on the established tpm/rpm. -- Select your optimal routing strategy in `router_settings:routing_strategy`. - -LiteLLM supports -```python -["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"` -``` - -When `tpm/rpm` is set + `routing_strategy==simple-shuffle` litellm will use a weighted pick based on set tpm/rpm. **In our load tests setting tpm/rpm for all deployments + `routing_strategy==simple-shuffle` maximized throughput** -- When using multiple LiteLLM Servers / Kubernetes set redis settings `router_settings:redis_host` etc - -```yaml -model_list: - - model_name: zephyr-beta - litellm_params: - model: huggingface/HuggingFaceH4/zephyr-7b-beta - api_base: http://0.0.0.0:8001 - rpm: 60 # Optional[int]: When rpm/tpm set - litellm uses weighted pick for load balancing. rpm = Rate limit for this deployment: in requests per minute (rpm). - tpm: 1000 # Optional[int]: tpm = Tokens Per Minute - - model_name: zephyr-beta - litellm_params: - model: huggingface/HuggingFaceH4/zephyr-7b-beta - api_base: http://0.0.0.0:8002 - rpm: 600 - - model_name: zephyr-beta - litellm_params: - model: huggingface/HuggingFaceH4/zephyr-7b-beta - api_base: http://0.0.0.0:8003 - rpm: 60000 - - model_name: gpt-3.5-turbo - litellm_params: - model: gpt-3.5-turbo - api_key: - rpm: 200 - - model_name: gpt-3.5-turbo-16k - litellm_params: - model: gpt-3.5-turbo-16k - api_key: - rpm: 100 - -litellm_settings: - num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta) - request_timeout: 10 # raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout - fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries - context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error - allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. - -router_settings: # router_settings are optional - routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle" - model_group_alias: {"gpt-4": "gpt-3.5-turbo"} # all requests with `gpt-4` will be routed to models with `gpt-3.5-turbo` - num_retries: 2 - timeout: 30 # 30 seconds - redis_host: # set this when using multiple litellm proxy deployments, load balancing state stored in redis - redis_password: - redis_port: 1992 -``` - -You can view your cost once you set up [Virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys) or [custom_callbacks](https://docs.litellm.ai/docs/proxy/logging) - -## Load API Keys - -### Load API Keys / config values from Environment - -If you have secrets saved in your environment, and don't want to expose them in the config.yaml, here's how to load model-specific keys from the environment. **This works for ANY value on the config.yaml** - -```yaml -os.environ/ # runs os.getenv("YOUR-ENV-VAR") -``` - -```yaml -model_list: - - model_name: gpt-4-team1 - litellm_params: # params for litellm.completion() - https://docs.litellm.ai/docs/completion/input#input---request-body - model: azure/chatgpt-v-2 - api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ - api_version: "2023-05-15" - api_key: os.environ/AZURE_NORTH_AMERICA_API_KEY # 👈 KEY CHANGE -``` - -[**See Code**](https://github.com/BerriAI/litellm/blob/c12d6c3fe80e1b5e704d9846b246c059defadce7/litellm/utils.py#L2366) - -s/o to [@David Manouchehri](https://www.linkedin.com/in/davidmanouchehri/) for helping with this. - -### Load API Keys from Azure Vault - -1. Install Proxy dependencies -```bash -$ pip install 'litellm[proxy]' 'litellm[extra_proxy]' -``` - -2. Save Azure details in your environment -```bash -export["AZURE_CLIENT_ID"]="your-azure-app-client-id" -export["AZURE_CLIENT_SECRET"]="your-azure-app-client-secret" -export["AZURE_TENANT_ID"]="your-azure-tenant-id" -export["AZURE_KEY_VAULT_URI"]="your-azure-key-vault-uri" -``` - -3. Add to proxy config.yaml -```yaml -model_list: - - model_name: "my-azure-models" # model alias - litellm_params: - model: "azure/" - api_key: "os.environ/AZURE-API-KEY" # reads from key vault - get_secret("AZURE_API_KEY") - api_base: "os.environ/AZURE-API-BASE" # reads from key vault - get_secret("AZURE_API_BASE") - -general_settings: - use_azure_key_vault: True -``` - -You can now test this by starting your proxy: -```bash -litellm --config /path/to/config.yaml -``` - -### Set Custom Prompt Templates - -LiteLLM by default checks if a model has a [prompt template and applies it](../completion/prompt_formatting.md) (e.g. if a huggingface model has a saved chat template in it's tokenizer_config.json). However, you can also set a custom prompt template on your proxy in the `config.yaml`: - -**Step 1**: Save your prompt template in a `config.yaml` -```yaml -# Model-specific parameters -model_list: - - model_name: mistral-7b # model alias - litellm_params: # actual params for litellm.completion() - model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1" - api_base: "" - api_key: "" # [OPTIONAL] for hf inference endpoints - initial_prompt_value: "\n" - roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}} - final_prompt_value: "\n" - bos_token: "" - eos_token: "" - max_tokens: 4096 -``` - -**Step 2**: Start server with config - -```shell -$ litellm --config /path/to/config.yaml -``` - -## Setting Embedding Models +### Embedding Models - Use Sagemaker, Bedrock, Azure, OpenAI, XInference See supported Embedding Providers & Models [here](https://docs.litellm.ai/docs/embedding/supported_embedding) -### Use Sagemaker, Bedrock, Azure, OpenAI, XInference -#### Create Config.yaml @@ -685,48 +331,248 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ }' ``` -## ✨ IP Address Filtering -:::info +### Multiple OpenAI Organizations -You need a LiteLLM License to unlock this feature. [Grab time](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat), to get one today! - -::: - -Restrict which IP's can call the proxy endpoints. +Add all openai models across all OpenAI organizations with just 1 model definition ```yaml -general_settings: - allowed_ips: ["192.168.1.1"] + - model_name: * + litellm_params: + model: openai/* + api_key: os.environ/OPENAI_API_KEY + organization: + - org-1 + - org-2 + - org-3 ``` -**Expected Response** (if IP not listed) +LiteLLM will automatically create separate deployments for each org. + +Confirm this via ```bash -{ - "error": { - "message": "Access forbidden: IP address not allowed.", - "type": "auth_error", - "param": "None", - "code": 403 - } -} +curl --location 'http://0.0.0.0:4000/v1/model/info' \ +--header 'Authorization: Bearer ${LITELLM_KEY}' \ +--data '' +``` + + +### Provider specific wildcard routing +**Proxy all models from a provider** + +Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml** + +**Step 1** - define provider specific routing on config.yaml +```yaml +model_list: + # provider specific wildcard routing + - model_name: "anthropic/*" + litellm_params: + model: "anthropic/*" + api_key: os.environ/ANTHROPIC_API_KEY + - model_name: "groq/*" + litellm_params: + model: "groq/*" + api_key: os.environ/GROQ_API_KEY +``` + +Step 2 - Run litellm proxy + +```shell +$ litellm --config /path/to/config.yaml +``` + +Step 3 Test it + +Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*` +```shell +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "anthropic/claude-3-sonnet-20240229", + "messages": [ + {"role": "user", "content": "Hello, Claude!"} + ] + }' +``` + +Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*` +```shell +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "groq/llama3-8b-8192", + "messages": [ + {"role": "user", "content": "Hello, Claude!"} + ] + }' +``` + +### Load Balancing + +:::info +For more on this, go to [this page](https://docs.litellm.ai/docs/proxy/load_balancing) +::: + +Use this to call multiple instances of the same model and configure things like [routing strategy](https://docs.litellm.ai/docs/routing#advanced). + +For optimal performance: +- Set `tpm/rpm` per model deployment. Weighted picks are then based on the established tpm/rpm. +- Select your optimal routing strategy in `router_settings:routing_strategy`. + +LiteLLM supports +```python +["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"` +``` + +When `tpm/rpm` is set + `routing_strategy==simple-shuffle` litellm will use a weighted pick based on set tpm/rpm. **In our load tests setting tpm/rpm for all deployments + `routing_strategy==simple-shuffle` maximized throughput** +- When using multiple LiteLLM Servers / Kubernetes set redis settings `router_settings:redis_host` etc + +```yaml +model_list: + - model_name: zephyr-beta + litellm_params: + model: huggingface/HuggingFaceH4/zephyr-7b-beta + api_base: http://0.0.0.0:8001 + rpm: 60 # Optional[int]: When rpm/tpm set - litellm uses weighted pick for load balancing. rpm = Rate limit for this deployment: in requests per minute (rpm). + tpm: 1000 # Optional[int]: tpm = Tokens Per Minute + - model_name: zephyr-beta + litellm_params: + model: huggingface/HuggingFaceH4/zephyr-7b-beta + api_base: http://0.0.0.0:8002 + rpm: 600 + - model_name: zephyr-beta + litellm_params: + model: huggingface/HuggingFaceH4/zephyr-7b-beta + api_base: http://0.0.0.0:8003 + rpm: 60000 + - model_name: gpt-3.5-turbo + litellm_params: + model: gpt-3.5-turbo + api_key: + rpm: 200 + - model_name: gpt-3.5-turbo-16k + litellm_params: + model: gpt-3.5-turbo-16k + api_key: + rpm: 100 + +litellm_settings: + num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta) + request_timeout: 10 # raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout + fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries + context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error + allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. + +router_settings: # router_settings are optional + routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle" + model_group_alias: {"gpt-4": "gpt-3.5-turbo"} # all requests with `gpt-4` will be routed to models with `gpt-3.5-turbo` + num_retries: 2 + timeout: 30 # 30 seconds + redis_host: # set this when using multiple litellm proxy deployments, load balancing state stored in redis + redis_password: + redis_port: 1992 +``` + +You can view your cost once you set up [Virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys) or [custom_callbacks](https://docs.litellm.ai/docs/proxy/logging) + + +### Load API Keys / config values from Environment + +If you have secrets saved in your environment, and don't want to expose them in the config.yaml, here's how to load model-specific keys from the environment. **This works for ANY value on the config.yaml** + +```yaml +os.environ/ # runs os.getenv("YOUR-ENV-VAR") +``` + +```yaml +model_list: + - model_name: gpt-4-team1 + litellm_params: # params for litellm.completion() - https://docs.litellm.ai/docs/completion/input#input---request-body + model: azure/chatgpt-v-2 + api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ + api_version: "2023-05-15" + api_key: os.environ/AZURE_NORTH_AMERICA_API_KEY # 👈 KEY CHANGE +``` + +[**See Code**](https://github.com/BerriAI/litellm/blob/c12d6c3fe80e1b5e704d9846b246c059defadce7/litellm/utils.py#L2366) + +s/o to [@David Manouchehri](https://www.linkedin.com/in/davidmanouchehri/) for helping with this. + +### Load API Keys from Secret Managers (Azure Vault, etc) + +[**Using Secret Managers with LiteLLM Proxy**](../secret) + + +### Set Supported Environments for a model - `production`, `staging`, `development` + +Use this if you want to control which model is exposed on a specific litellm environment + +Supported Environments: +- `production` +- `staging` +- `development` + +1. Set `LITELLM_ENVIRONMENT=""` in your environment. Can be one of `production`, `staging` or `development` + + +2. For each model set the list of supported environments in `model_info.supported_environments` +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: openai/gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + model_info: + supported_environments: ["development", "production", "staging"] + - model_name: gpt-4 + litellm_params: + model: openai/gpt-4 + api_key: os.environ/OPENAI_API_KEY + model_info: + supported_environments: ["production", "staging"] + - model_name: gpt-4o + litellm_params: + model: openai/gpt-4o + api_key: os.environ/OPENAI_API_KEY + model_info: + supported_environments: ["production"] ``` +### Set Custom Prompt Templates -## Disable Swagger UI +LiteLLM by default checks if a model has a [prompt template and applies it](../completion/prompt_formatting.md) (e.g. if a huggingface model has a saved chat template in it's tokenizer_config.json). However, you can also set a custom prompt template on your proxy in the `config.yaml`: -To disable the Swagger docs from the base url, set - -```env -NO_DOCS="True" +**Step 1**: Save your prompt template in a `config.yaml` +```yaml +# Model-specific parameters +model_list: + - model_name: mistral-7b # model alias + litellm_params: # actual params for litellm.completion() + model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1" + api_base: "" + api_key: "" # [OPTIONAL] for hf inference endpoints + initial_prompt_value: "\n" + roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}} + final_prompt_value: "\n" + bos_token: "" + eos_token: "" + max_tokens: 4096 ``` -in your environment, and restart the proxy. +**Step 2**: Start server with config +```shell +$ litellm --config /path/to/config.yaml +``` -## Configure DB Pool Limits + Connection Timeouts +## General Settings `general_settings` (DB Connection, etc) + +### Configure DB Pool Limits + Connection Timeouts ```yaml general_settings: @@ -812,3 +658,43 @@ general_settings: } ``` +## Extras + + +### Disable Swagger UI + +To disable the Swagger docs from the base url, set + +```env +NO_DOCS="True" +``` + +in your environment, and restart the proxy. + +### Use CONFIG_FILE_PATH for proxy (Easier Azure container deployment) + +1. Setup config.yaml + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY +``` + +2. Store filepath as env var + +```bash +CONFIG_FILE_PATH="/path/to/config.yaml" +``` + +3. Start Proxy + +```bash +$ litellm + +# RUNNING on http://0.0.0.0:4000 +``` + + diff --git a/docs/my-website/docs/proxy/ip_address.md b/docs/my-website/docs/proxy/ip_address.md new file mode 100644 index 000000000..31ffd98a4 --- /dev/null +++ b/docs/my-website/docs/proxy/ip_address.md @@ -0,0 +1,28 @@ + +# ✨ IP Address Filtering + +:::info + +You need a LiteLLM License to unlock this feature. [Grab time](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat), to get one today! + +::: + +Restrict which IP's can call the proxy endpoints. + +```yaml +general_settings: + allowed_ips: ["192.168.1.1"] +``` + +**Expected Response** (if IP not listed) + +```bash +{ + "error": { + "message": "Access forbidden: IP address not allowed.", + "type": "auth_error", + "param": "None", + "code": 403 + } +} +``` \ No newline at end of file diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index 1a0422714..4cd9002e0 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -1,7 +1,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# 📈 [BETA] Prometheus metrics +# 📈 Prometheus metrics :::info diff --git a/docs/my-website/docs/proxy/token_auth.md b/docs/my-website/docs/proxy/token_auth.md index 87d1b5243..d8e28b2ba 100644 --- a/docs/my-website/docs/proxy/token_auth.md +++ b/docs/my-website/docs/proxy/token_auth.md @@ -1,7 +1,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# [BETA] JWT-based Auth +# JWT-based Auth Use JWT's to auth admins / projects into the proxy. diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index f1549da0a..e0512f080 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -50,7 +50,7 @@ const sidebars = { { type: "category", label: "🔑 Authentication", - items: ["proxy/virtual_keys", "proxy/token_auth", "proxy/oauth2"], + items: ["proxy/virtual_keys", "proxy/token_auth", "proxy/oauth2", "proxy/ip_address"], }, { type: "category", diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 829131146..4601d4980 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -2,22 +2,21 @@ model_list: - model_name: gpt-3.5-turbo litellm_params: model: openai/gpt-3.5-turbo - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ - tags: ["teamB"] + api_key: os.environ/OPENAI_API_KEY model_info: - id: "team-b-model" - - model_name: rerank-english-v3.0 + supported_environments: ["development", "production", "staging"] + - model_name: gpt-4 litellm_params: - model: cohere/rerank-english-v3.0 - api_key: os.environ/COHERE_API_KEY - - model_name: llava-hf - litellm_params: - model: openai/llava-hf/llava-v1.6-vicuna-7b-hf - api_base: http://localhost:8000 - api_key: fake-key + model: openai/gpt-4 + api_key: os.environ/OPENAI_API_KEY model_info: - supports_vision: True + supported_environments: ["production", "staging"] + - model_name: gpt-4o + litellm_params: + model: openai/gpt-4o + api_key: os.environ/OPENAI_API_KEY + model_info: + supported_environments: ["production"] litellm_settings: diff --git a/litellm/router.py b/litellm/router.py index b3a07ad4e..2a177a2a2 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -90,6 +90,7 @@ from litellm.types.llms.openai import ( ) from litellm.types.router import ( SPECIAL_MODEL_INFO_PARAMS, + VALID_LITELLM_ENVIRONMENTS, AlertingConfig, AllowedFailsPolicy, AssistantsTypedDict, @@ -3973,12 +3974,55 @@ class Router: } ) + ## Check if LLM Deployment is allowed for this deployment + if deployment.model_info and "supported_environments" in deployment.model_info: + if ( + self.deployment_is_active_for_environment(deployment=deployment) + is not True + ): + return + deployment = self._add_deployment(deployment=deployment) model = deployment.to_json(exclude_none=True) self.model_list.append(model) + def deployment_is_active_for_environment(self, deployment: Deployment) -> bool: + """ + Function to check if a llm deployment is active for a given environment. Allows using the same config.yaml across multople environments + + Requires `LITELLM_ENVIRONMENT` to be set in .env. Valid values for environment: + - development + - staging + - production + + Raises: + - ValueError: If LITELLM_ENVIRONMENT is not set in .env or not one of the valid values + - ValueError: If supported_environments is not set in model_info or not one of the valid values + """ + litellm_environment = litellm.get_secret_str(secret_name="LITELLM_ENVIRONMENT") + if litellm_environment is None: + raise ValueError( + f"Set 'supported_environments' for model but not 'LITELLM_ENVIRONMENT' set in .env" + ) + + if litellm_environment not in VALID_LITELLM_ENVIRONMENTS: + raise ValueError( + f"LITELLM_ENVIRONMENT must be one of {VALID_LITELLM_ENVIRONMENTS}. but set as: {litellm_environment}" + ) + + for _env in deployment.model_info["supported_environments"]: + if _env not in VALID_LITELLM_ENVIRONMENTS: + raise ValueError( + f"supported_environments must be one of {VALID_LITELLM_ENVIRONMENTS}. but set as: {_env} for deployment: {deployment}" + ) + + # validate litellm_environment is one of LiteLLMEnvironment + if litellm_environment in deployment.model_info["supported_environments"]: + return True + return False + def set_model_list(self, model_list: list): original_model_list = copy.deepcopy(model_list) self.model_list = [] diff --git a/litellm/tests/test_router_init.py b/litellm/tests/test_router_init.py index 13167c10f..3733af252 100644 --- a/litellm/tests/test_router_init.py +++ b/litellm/tests/test_router_init.py @@ -636,3 +636,63 @@ def test_init_clients_async_mode(): assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None except Exception as e: pytest.fail(f"Error occurred: {e}") + + +@pytest.mark.parametrize( + "environment,expected_models", + [ + ("development", ["gpt-3.5-turbo"]), + ("production", ["gpt-4", "gpt-3.5-turbo", "gpt-4o"]), + ], +) +def test_init_router_with_supported_environments(environment, expected_models): + """ + Tests that the correct models are setup on router when LITELLM_ENVIRONMENT is set + """ + os.environ["LITELLM_ENVIRONMENT"] = environment + model_list = [ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "azure/chatgpt-v-2", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE"), + "timeout": 0.01, + "stream_timeout": 0.000_001, + "max_retries": 7, + }, + "model_info": {"supported_environments": ["development", "production"]}, + }, + { + "model_name": "gpt-4", + "litellm_params": { + "model": "openai/gpt-4", + "api_key": os.getenv("OPENAI_API_KEY"), + "timeout": 0.01, + "stream_timeout": 0.000_001, + "max_retries": 7, + }, + "model_info": {"supported_environments": ["production"]}, + }, + { + "model_name": "gpt-4o", + "litellm_params": { + "model": "openai/gpt-4o", + "api_key": os.getenv("OPENAI_API_KEY"), + "timeout": 0.01, + "stream_timeout": 0.000_001, + "max_retries": 7, + }, + "model_info": {"supported_environments": ["production"]}, + }, + ] + router = Router(model_list=model_list, set_verbose=True) + _model_list = router.get_model_names() + + print("model_list: ", _model_list) + print("expected_models: ", expected_models) + + assert set(_model_list) == set(expected_models) + + os.environ.pop("LITELLM_ENVIRONMENT") diff --git a/litellm/types/router.py b/litellm/types/router.py index 306dfcba1..cfb90814b 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -596,3 +596,10 @@ class RouterRateLimitError(ValueError): class RouterModelGroupAliasItem(TypedDict): model: str hidden: bool # if 'True', don't return on `.get_model_list` + + +VALID_LITELLM_ENVIRONMENTS = [ + "development", + "staging", + "production", +]