diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md
index 980a781ac..3e2d1f860 100644
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@@ -94,18 +94,18 @@ Run with `--detailed_debug` if you need detailed debug logs
```shell
$ litellm --config /path/to/config.yaml --detailed_debug
+```
+
:::
-### Using Proxy - Curl Request, OpenAI Package, Langchain, Langchain JS
-Calling a model group
-
-
-
+#### Step 3: Test it
Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
If multiple with `model_name=gpt-3.5-turbo` does [Load Balancing](https://docs.litellm.ai/docs/proxy/load_balancing)
+**[Langchain, OpenAI SDK Usage Examples](../proxy/user_keys#request-format)**
+
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
@@ -120,105 +120,10 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
}
'
```
-
-
+## LLM configs `model_list`
-Sends this request to model where `model_name=bedrock-claude-v1` on config.yaml
-
-```shell
-curl --location 'http://0.0.0.0:4000/chat/completions' \
---header 'Content-Type: application/json' \
---data ' {
- "model": "bedrock-claude-v1",
- "messages": [
- {
- "role": "user",
- "content": "what llm are you"
- }
- ],
- }
-'
-```
-
-
-
-```python
-import openai
-client = openai.OpenAI(
- api_key="anything",
- base_url="http://0.0.0.0:4000"
-)
-
-# Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
-response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
- {
- "role": "user",
- "content": "this is a test request, write a short poem"
- }
-])
-
-print(response)
-
-# Sends this request to model where `model_name=bedrock-claude-v1` on config.yaml
-response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
- {
- "role": "user",
- "content": "this is a test request, write a short poem"
- }
-])
-
-print(response)
-
-```
-
-
-
-
-```python
-from langchain.chat_models import ChatOpenAI
-from langchain.prompts.chat import (
- ChatPromptTemplate,
- HumanMessagePromptTemplate,
- SystemMessagePromptTemplate,
-)
-from langchain.schema import HumanMessage, SystemMessage
-
-messages = [
- SystemMessage(
- content="You are a helpful assistant that im using to make a test request to."
- ),
- HumanMessage(
- content="test from litellm. tell me why it's amazing in 1 sentence"
- ),
-]
-
-# Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
-chat = ChatOpenAI(
- openai_api_base="http://0.0.0.0:4000", # set openai base to the proxy
- model = "gpt-3.5-turbo",
- temperature=0.1
-)
-
-response = chat(messages)
-print(response)
-
-# Sends request to model where `model_name=bedrock-claude-v1` on config.yaml.
-claude_chat = ChatOpenAI(
- openai_api_base="http://0.0.0.0:4000", # set openai base to the proxy
- model = "bedrock-claude-v1",
- temperature=0.1
-)
-
-response = claude_chat(messages)
-print(response)
-```
-
-
-
-
-
-## Save Model-specific params (API Base, Keys, Temperature, Max Tokens, Organization, Headers etc.)
+### Model-specific params (API Base, Keys, Temperature, Max Tokens, Organization, Headers etc.)
You can use the config to save model-specific information like api_base, api_key, temperature, max_tokens, etc.
[**All input params**](https://docs.litellm.ai/docs/completion/input#input-params-1)
@@ -259,32 +164,6 @@ model_list:
$ litellm --config /path/to/config.yaml
```
-## Use CONFIG_FILE_PATH for proxy (Easier Azure container deployment)
-
-1. Setup config.yaml
-
-```yaml
-model_list:
- - model_name: gpt-3.5-turbo
- litellm_params:
- model: gpt-3.5-turbo
- api_key: os.environ/OPENAI_API_KEY
-```
-
-2. Store filepath as env var
-
-```bash
-CONFIG_FILE_PATH="/path/to/config.yaml"
-```
-
-3. Start Proxy
-
-```bash
-$ litellm
-
-# RUNNING on http://0.0.0.0:4000
-```
-
**Expected Logs:**
Look for this line in your console logs to confirm the config.yaml was loaded in correctly.
@@ -292,243 +171,10 @@ Look for this line in your console logs to confirm the config.yaml was loaded in
LiteLLM: Proxy initialized with Config, Set models:
```
-## Multiple OpenAI Organizations
-
-Add all openai models across all OpenAI organizations with just 1 model definition
-
-```yaml
- - model_name: *
- litellm_params:
- model: openai/*
- api_key: os.environ/OPENAI_API_KEY
- organization:
- - org-1
- - org-2
- - org-3
-```
-
-LiteLLM will automatically create separate deployments for each org.
-
-Confirm this via
-
-```bash
-curl --location 'http://0.0.0.0:4000/v1/model/info' \
---header 'Authorization: Bearer ${LITELLM_KEY}' \
---data ''
-```
-
-
-## Provider specific wildcard routing
-**Proxy all models from a provider**
-
-Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml**
-
-**Step 1** - define provider specific routing on config.yaml
-```yaml
-model_list:
- # provider specific wildcard routing
- - model_name: "anthropic/*"
- litellm_params:
- model: "anthropic/*"
- api_key: os.environ/ANTHROPIC_API_KEY
- - model_name: "groq/*"
- litellm_params:
- model: "groq/*"
- api_key: os.environ/GROQ_API_KEY
-```
-
-Step 2 - Run litellm proxy
-
-```shell
-$ litellm --config /path/to/config.yaml
-```
-
-Step 3 Test it
-
-Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*`
-```shell
-curl http://localhost:4000/v1/chat/completions \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer sk-1234" \
- -d '{
- "model": "anthropic/claude-3-sonnet-20240229",
- "messages": [
- {"role": "user", "content": "Hello, Claude!"}
- ]
- }'
-```
-
-Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*`
-```shell
-curl http://localhost:4000/v1/chat/completions \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer sk-1234" \
- -d '{
- "model": "groq/llama3-8b-8192",
- "messages": [
- {"role": "user", "content": "Hello, Claude!"}
- ]
- }'
-```
-
-## Load Balancing
-
-:::info
-For more on this, go to [this page](https://docs.litellm.ai/docs/proxy/load_balancing)
-:::
-
-Use this to call multiple instances of the same model and configure things like [routing strategy](https://docs.litellm.ai/docs/routing#advanced).
-
-For optimal performance:
-- Set `tpm/rpm` per model deployment. Weighted picks are then based on the established tpm/rpm.
-- Select your optimal routing strategy in `router_settings:routing_strategy`.
-
-LiteLLM supports
-```python
-["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"`
-```
-
-When `tpm/rpm` is set + `routing_strategy==simple-shuffle` litellm will use a weighted pick based on set tpm/rpm. **In our load tests setting tpm/rpm for all deployments + `routing_strategy==simple-shuffle` maximized throughput**
-- When using multiple LiteLLM Servers / Kubernetes set redis settings `router_settings:redis_host` etc
-
-```yaml
-model_list:
- - model_name: zephyr-beta
- litellm_params:
- model: huggingface/HuggingFaceH4/zephyr-7b-beta
- api_base: http://0.0.0.0:8001
- rpm: 60 # Optional[int]: When rpm/tpm set - litellm uses weighted pick for load balancing. rpm = Rate limit for this deployment: in requests per minute (rpm).
- tpm: 1000 # Optional[int]: tpm = Tokens Per Minute
- - model_name: zephyr-beta
- litellm_params:
- model: huggingface/HuggingFaceH4/zephyr-7b-beta
- api_base: http://0.0.0.0:8002
- rpm: 600
- - model_name: zephyr-beta
- litellm_params:
- model: huggingface/HuggingFaceH4/zephyr-7b-beta
- api_base: http://0.0.0.0:8003
- rpm: 60000
- - model_name: gpt-3.5-turbo
- litellm_params:
- model: gpt-3.5-turbo
- api_key:
- rpm: 200
- - model_name: gpt-3.5-turbo-16k
- litellm_params:
- model: gpt-3.5-turbo-16k
- api_key:
- rpm: 100
-
-litellm_settings:
- num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta)
- request_timeout: 10 # raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
- fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries
- context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
- allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
-
-router_settings: # router_settings are optional
- routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
- model_group_alias: {"gpt-4": "gpt-3.5-turbo"} # all requests with `gpt-4` will be routed to models with `gpt-3.5-turbo`
- num_retries: 2
- timeout: 30 # 30 seconds
- redis_host: # set this when using multiple litellm proxy deployments, load balancing state stored in redis
- redis_password:
- redis_port: 1992
-```
-
-You can view your cost once you set up [Virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys) or [custom_callbacks](https://docs.litellm.ai/docs/proxy/logging)
-
-## Load API Keys
-
-### Load API Keys / config values from Environment
-
-If you have secrets saved in your environment, and don't want to expose them in the config.yaml, here's how to load model-specific keys from the environment. **This works for ANY value on the config.yaml**
-
-```yaml
-os.environ/ # runs os.getenv("YOUR-ENV-VAR")
-```
-
-```yaml
-model_list:
- - model_name: gpt-4-team1
- litellm_params: # params for litellm.completion() - https://docs.litellm.ai/docs/completion/input#input---request-body
- model: azure/chatgpt-v-2
- api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
- api_version: "2023-05-15"
- api_key: os.environ/AZURE_NORTH_AMERICA_API_KEY # 👈 KEY CHANGE
-```
-
-[**See Code**](https://github.com/BerriAI/litellm/blob/c12d6c3fe80e1b5e704d9846b246c059defadce7/litellm/utils.py#L2366)
-
-s/o to [@David Manouchehri](https://www.linkedin.com/in/davidmanouchehri/) for helping with this.
-
-### Load API Keys from Azure Vault
-
-1. Install Proxy dependencies
-```bash
-$ pip install 'litellm[proxy]' 'litellm[extra_proxy]'
-```
-
-2. Save Azure details in your environment
-```bash
-export["AZURE_CLIENT_ID"]="your-azure-app-client-id"
-export["AZURE_CLIENT_SECRET"]="your-azure-app-client-secret"
-export["AZURE_TENANT_ID"]="your-azure-tenant-id"
-export["AZURE_KEY_VAULT_URI"]="your-azure-key-vault-uri"
-```
-
-3. Add to proxy config.yaml
-```yaml
-model_list:
- - model_name: "my-azure-models" # model alias
- litellm_params:
- model: "azure/"
- api_key: "os.environ/AZURE-API-KEY" # reads from key vault - get_secret("AZURE_API_KEY")
- api_base: "os.environ/AZURE-API-BASE" # reads from key vault - get_secret("AZURE_API_BASE")
-
-general_settings:
- use_azure_key_vault: True
-```
-
-You can now test this by starting your proxy:
-```bash
-litellm --config /path/to/config.yaml
-```
-
-### Set Custom Prompt Templates
-
-LiteLLM by default checks if a model has a [prompt template and applies it](../completion/prompt_formatting.md) (e.g. if a huggingface model has a saved chat template in it's tokenizer_config.json). However, you can also set a custom prompt template on your proxy in the `config.yaml`:
-
-**Step 1**: Save your prompt template in a `config.yaml`
-```yaml
-# Model-specific parameters
-model_list:
- - model_name: mistral-7b # model alias
- litellm_params: # actual params for litellm.completion()
- model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1"
- api_base: ""
- api_key: "" # [OPTIONAL] for hf inference endpoints
- initial_prompt_value: "\n"
- roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
- final_prompt_value: "\n"
- bos_token: ""
- eos_token: ""
- max_tokens: 4096
-```
-
-**Step 2**: Start server with config
-
-```shell
-$ litellm --config /path/to/config.yaml
-```
-
-## Setting Embedding Models
+### Embedding Models - Use Sagemaker, Bedrock, Azure, OpenAI, XInference
See supported Embedding Providers & Models [here](https://docs.litellm.ai/docs/embedding/supported_embedding)
-### Use Sagemaker, Bedrock, Azure, OpenAI, XInference
-#### Create Config.yaml
@@ -685,48 +331,248 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
}'
```
-## ✨ IP Address Filtering
-:::info
+### Multiple OpenAI Organizations
-You need a LiteLLM License to unlock this feature. [Grab time](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat), to get one today!
-
-:::
-
-Restrict which IP's can call the proxy endpoints.
+Add all openai models across all OpenAI organizations with just 1 model definition
```yaml
-general_settings:
- allowed_ips: ["192.168.1.1"]
+ - model_name: *
+ litellm_params:
+ model: openai/*
+ api_key: os.environ/OPENAI_API_KEY
+ organization:
+ - org-1
+ - org-2
+ - org-3
```
-**Expected Response** (if IP not listed)
+LiteLLM will automatically create separate deployments for each org.
+
+Confirm this via
```bash
-{
- "error": {
- "message": "Access forbidden: IP address not allowed.",
- "type": "auth_error",
- "param": "None",
- "code": 403
- }
-}
+curl --location 'http://0.0.0.0:4000/v1/model/info' \
+--header 'Authorization: Bearer ${LITELLM_KEY}' \
+--data ''
+```
+
+
+### Provider specific wildcard routing
+**Proxy all models from a provider**
+
+Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml**
+
+**Step 1** - define provider specific routing on config.yaml
+```yaml
+model_list:
+ # provider specific wildcard routing
+ - model_name: "anthropic/*"
+ litellm_params:
+ model: "anthropic/*"
+ api_key: os.environ/ANTHROPIC_API_KEY
+ - model_name: "groq/*"
+ litellm_params:
+ model: "groq/*"
+ api_key: os.environ/GROQ_API_KEY
+```
+
+Step 2 - Run litellm proxy
+
+```shell
+$ litellm --config /path/to/config.yaml
+```
+
+Step 3 Test it
+
+Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*`
+```shell
+curl http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-1234" \
+ -d '{
+ "model": "anthropic/claude-3-sonnet-20240229",
+ "messages": [
+ {"role": "user", "content": "Hello, Claude!"}
+ ]
+ }'
+```
+
+Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*`
+```shell
+curl http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-1234" \
+ -d '{
+ "model": "groq/llama3-8b-8192",
+ "messages": [
+ {"role": "user", "content": "Hello, Claude!"}
+ ]
+ }'
+```
+
+### Load Balancing
+
+:::info
+For more on this, go to [this page](https://docs.litellm.ai/docs/proxy/load_balancing)
+:::
+
+Use this to call multiple instances of the same model and configure things like [routing strategy](https://docs.litellm.ai/docs/routing#advanced).
+
+For optimal performance:
+- Set `tpm/rpm` per model deployment. Weighted picks are then based on the established tpm/rpm.
+- Select your optimal routing strategy in `router_settings:routing_strategy`.
+
+LiteLLM supports
+```python
+["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"`
+```
+
+When `tpm/rpm` is set + `routing_strategy==simple-shuffle` litellm will use a weighted pick based on set tpm/rpm. **In our load tests setting tpm/rpm for all deployments + `routing_strategy==simple-shuffle` maximized throughput**
+- When using multiple LiteLLM Servers / Kubernetes set redis settings `router_settings:redis_host` etc
+
+```yaml
+model_list:
+ - model_name: zephyr-beta
+ litellm_params:
+ model: huggingface/HuggingFaceH4/zephyr-7b-beta
+ api_base: http://0.0.0.0:8001
+ rpm: 60 # Optional[int]: When rpm/tpm set - litellm uses weighted pick for load balancing. rpm = Rate limit for this deployment: in requests per minute (rpm).
+ tpm: 1000 # Optional[int]: tpm = Tokens Per Minute
+ - model_name: zephyr-beta
+ litellm_params:
+ model: huggingface/HuggingFaceH4/zephyr-7b-beta
+ api_base: http://0.0.0.0:8002
+ rpm: 600
+ - model_name: zephyr-beta
+ litellm_params:
+ model: huggingface/HuggingFaceH4/zephyr-7b-beta
+ api_base: http://0.0.0.0:8003
+ rpm: 60000
+ - model_name: gpt-3.5-turbo
+ litellm_params:
+ model: gpt-3.5-turbo
+ api_key:
+ rpm: 200
+ - model_name: gpt-3.5-turbo-16k
+ litellm_params:
+ model: gpt-3.5-turbo-16k
+ api_key:
+ rpm: 100
+
+litellm_settings:
+ num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta)
+ request_timeout: 10 # raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
+ fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries
+ context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
+ allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
+
+router_settings: # router_settings are optional
+ routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
+ model_group_alias: {"gpt-4": "gpt-3.5-turbo"} # all requests with `gpt-4` will be routed to models with `gpt-3.5-turbo`
+ num_retries: 2
+ timeout: 30 # 30 seconds
+ redis_host: # set this when using multiple litellm proxy deployments, load balancing state stored in redis
+ redis_password:
+ redis_port: 1992
+```
+
+You can view your cost once you set up [Virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys) or [custom_callbacks](https://docs.litellm.ai/docs/proxy/logging)
+
+
+### Load API Keys / config values from Environment
+
+If you have secrets saved in your environment, and don't want to expose them in the config.yaml, here's how to load model-specific keys from the environment. **This works for ANY value on the config.yaml**
+
+```yaml
+os.environ/ # runs os.getenv("YOUR-ENV-VAR")
+```
+
+```yaml
+model_list:
+ - model_name: gpt-4-team1
+ litellm_params: # params for litellm.completion() - https://docs.litellm.ai/docs/completion/input#input---request-body
+ model: azure/chatgpt-v-2
+ api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
+ api_version: "2023-05-15"
+ api_key: os.environ/AZURE_NORTH_AMERICA_API_KEY # 👈 KEY CHANGE
+```
+
+[**See Code**](https://github.com/BerriAI/litellm/blob/c12d6c3fe80e1b5e704d9846b246c059defadce7/litellm/utils.py#L2366)
+
+s/o to [@David Manouchehri](https://www.linkedin.com/in/davidmanouchehri/) for helping with this.
+
+### Load API Keys from Secret Managers (Azure Vault, etc)
+
+[**Using Secret Managers with LiteLLM Proxy**](../secret)
+
+
+### Set Supported Environments for a model - `production`, `staging`, `development`
+
+Use this if you want to control which model is exposed on a specific litellm environment
+
+Supported Environments:
+- `production`
+- `staging`
+- `development`
+
+1. Set `LITELLM_ENVIRONMENT=""` in your environment. Can be one of `production`, `staging` or `development`
+
+
+2. For each model set the list of supported environments in `model_info.supported_environments`
+```yaml
+model_list:
+ - model_name: gpt-3.5-turbo
+ litellm_params:
+ model: openai/gpt-3.5-turbo
+ api_key: os.environ/OPENAI_API_KEY
+ model_info:
+ supported_environments: ["development", "production", "staging"]
+ - model_name: gpt-4
+ litellm_params:
+ model: openai/gpt-4
+ api_key: os.environ/OPENAI_API_KEY
+ model_info:
+ supported_environments: ["production", "staging"]
+ - model_name: gpt-4o
+ litellm_params:
+ model: openai/gpt-4o
+ api_key: os.environ/OPENAI_API_KEY
+ model_info:
+ supported_environments: ["production"]
```
+### Set Custom Prompt Templates
-## Disable Swagger UI
+LiteLLM by default checks if a model has a [prompt template and applies it](../completion/prompt_formatting.md) (e.g. if a huggingface model has a saved chat template in it's tokenizer_config.json). However, you can also set a custom prompt template on your proxy in the `config.yaml`:
-To disable the Swagger docs from the base url, set
-
-```env
-NO_DOCS="True"
+**Step 1**: Save your prompt template in a `config.yaml`
+```yaml
+# Model-specific parameters
+model_list:
+ - model_name: mistral-7b # model alias
+ litellm_params: # actual params for litellm.completion()
+ model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1"
+ api_base: ""
+ api_key: "" # [OPTIONAL] for hf inference endpoints
+ initial_prompt_value: "\n"
+ roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
+ final_prompt_value: "\n"
+ bos_token: ""
+ eos_token: ""
+ max_tokens: 4096
```
-in your environment, and restart the proxy.
+**Step 2**: Start server with config
+```shell
+$ litellm --config /path/to/config.yaml
+```
-## Configure DB Pool Limits + Connection Timeouts
+## General Settings `general_settings` (DB Connection, etc)
+
+### Configure DB Pool Limits + Connection Timeouts
```yaml
general_settings:
@@ -812,3 +658,43 @@ general_settings:
}
```
+## Extras
+
+
+### Disable Swagger UI
+
+To disable the Swagger docs from the base url, set
+
+```env
+NO_DOCS="True"
+```
+
+in your environment, and restart the proxy.
+
+### Use CONFIG_FILE_PATH for proxy (Easier Azure container deployment)
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+ - model_name: gpt-3.5-turbo
+ litellm_params:
+ model: gpt-3.5-turbo
+ api_key: os.environ/OPENAI_API_KEY
+```
+
+2. Store filepath as env var
+
+```bash
+CONFIG_FILE_PATH="/path/to/config.yaml"
+```
+
+3. Start Proxy
+
+```bash
+$ litellm
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+
diff --git a/docs/my-website/docs/proxy/ip_address.md b/docs/my-website/docs/proxy/ip_address.md
new file mode 100644
index 000000000..31ffd98a4
--- /dev/null
+++ b/docs/my-website/docs/proxy/ip_address.md
@@ -0,0 +1,28 @@
+
+# ✨ IP Address Filtering
+
+:::info
+
+You need a LiteLLM License to unlock this feature. [Grab time](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat), to get one today!
+
+:::
+
+Restrict which IP's can call the proxy endpoints.
+
+```yaml
+general_settings:
+ allowed_ips: ["192.168.1.1"]
+```
+
+**Expected Response** (if IP not listed)
+
+```bash
+{
+ "error": {
+ "message": "Access forbidden: IP address not allowed.",
+ "type": "auth_error",
+ "param": "None",
+ "code": 403
+ }
+}
+```
\ No newline at end of file
diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md
index 1a0422714..4cd9002e0 100644
--- a/docs/my-website/docs/proxy/prometheus.md
+++ b/docs/my-website/docs/proxy/prometheus.md
@@ -1,7 +1,7 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
-# 📈 [BETA] Prometheus metrics
+# 📈 Prometheus metrics
:::info
diff --git a/docs/my-website/docs/proxy/token_auth.md b/docs/my-website/docs/proxy/token_auth.md
index 87d1b5243..d8e28b2ba 100644
--- a/docs/my-website/docs/proxy/token_auth.md
+++ b/docs/my-website/docs/proxy/token_auth.md
@@ -1,7 +1,7 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
-# [BETA] JWT-based Auth
+# JWT-based Auth
Use JWT's to auth admins / projects into the proxy.
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index f1549da0a..e0512f080 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -50,7 +50,7 @@ const sidebars = {
{
type: "category",
label: "🔑 Authentication",
- items: ["proxy/virtual_keys", "proxy/token_auth", "proxy/oauth2"],
+ items: ["proxy/virtual_keys", "proxy/token_auth", "proxy/oauth2", "proxy/ip_address"],
},
{
type: "category",
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 829131146..4601d4980 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -2,22 +2,21 @@ model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: openai/gpt-3.5-turbo
- api_key: fake-key
- api_base: https://exampleopenaiendpoint-production.up.railway.app/
- tags: ["teamB"]
+ api_key: os.environ/OPENAI_API_KEY
model_info:
- id: "team-b-model"
- - model_name: rerank-english-v3.0
+ supported_environments: ["development", "production", "staging"]
+ - model_name: gpt-4
litellm_params:
- model: cohere/rerank-english-v3.0
- api_key: os.environ/COHERE_API_KEY
- - model_name: llava-hf
- litellm_params:
- model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
- api_base: http://localhost:8000
- api_key: fake-key
+ model: openai/gpt-4
+ api_key: os.environ/OPENAI_API_KEY
model_info:
- supports_vision: True
+ supported_environments: ["production", "staging"]
+ - model_name: gpt-4o
+ litellm_params:
+ model: openai/gpt-4o
+ api_key: os.environ/OPENAI_API_KEY
+ model_info:
+ supported_environments: ["production"]
litellm_settings:
diff --git a/litellm/router.py b/litellm/router.py
index b3a07ad4e..2a177a2a2 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -90,6 +90,7 @@ from litellm.types.llms.openai import (
)
from litellm.types.router import (
SPECIAL_MODEL_INFO_PARAMS,
+ VALID_LITELLM_ENVIRONMENTS,
AlertingConfig,
AllowedFailsPolicy,
AssistantsTypedDict,
@@ -3973,12 +3974,55 @@ class Router:
}
)
+ ## Check if LLM Deployment is allowed for this deployment
+ if deployment.model_info and "supported_environments" in deployment.model_info:
+ if (
+ self.deployment_is_active_for_environment(deployment=deployment)
+ is not True
+ ):
+ return
+
deployment = self._add_deployment(deployment=deployment)
model = deployment.to_json(exclude_none=True)
self.model_list.append(model)
+ def deployment_is_active_for_environment(self, deployment: Deployment) -> bool:
+ """
+ Function to check if a llm deployment is active for a given environment. Allows using the same config.yaml across multople environments
+
+ Requires `LITELLM_ENVIRONMENT` to be set in .env. Valid values for environment:
+ - development
+ - staging
+ - production
+
+ Raises:
+ - ValueError: If LITELLM_ENVIRONMENT is not set in .env or not one of the valid values
+ - ValueError: If supported_environments is not set in model_info or not one of the valid values
+ """
+ litellm_environment = litellm.get_secret_str(secret_name="LITELLM_ENVIRONMENT")
+ if litellm_environment is None:
+ raise ValueError(
+ f"Set 'supported_environments' for model but not 'LITELLM_ENVIRONMENT' set in .env"
+ )
+
+ if litellm_environment not in VALID_LITELLM_ENVIRONMENTS:
+ raise ValueError(
+ f"LITELLM_ENVIRONMENT must be one of {VALID_LITELLM_ENVIRONMENTS}. but set as: {litellm_environment}"
+ )
+
+ for _env in deployment.model_info["supported_environments"]:
+ if _env not in VALID_LITELLM_ENVIRONMENTS:
+ raise ValueError(
+ f"supported_environments must be one of {VALID_LITELLM_ENVIRONMENTS}. but set as: {_env} for deployment: {deployment}"
+ )
+
+ # validate litellm_environment is one of LiteLLMEnvironment
+ if litellm_environment in deployment.model_info["supported_environments"]:
+ return True
+ return False
+
def set_model_list(self, model_list: list):
original_model_list = copy.deepcopy(model_list)
self.model_list = []
diff --git a/litellm/tests/test_router_init.py b/litellm/tests/test_router_init.py
index 13167c10f..3733af252 100644
--- a/litellm/tests/test_router_init.py
+++ b/litellm/tests/test_router_init.py
@@ -636,3 +636,63 @@ def test_init_clients_async_mode():
assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
except Exception as e:
pytest.fail(f"Error occurred: {e}")
+
+
+@pytest.mark.parametrize(
+ "environment,expected_models",
+ [
+ ("development", ["gpt-3.5-turbo"]),
+ ("production", ["gpt-4", "gpt-3.5-turbo", "gpt-4o"]),
+ ],
+)
+def test_init_router_with_supported_environments(environment, expected_models):
+ """
+ Tests that the correct models are setup on router when LITELLM_ENVIRONMENT is set
+ """
+ os.environ["LITELLM_ENVIRONMENT"] = environment
+ model_list = [
+ {
+ "model_name": "gpt-3.5-turbo",
+ "litellm_params": {
+ "model": "azure/chatgpt-v-2",
+ "api_key": os.getenv("AZURE_API_KEY"),
+ "api_version": os.getenv("AZURE_API_VERSION"),
+ "api_base": os.getenv("AZURE_API_BASE"),
+ "timeout": 0.01,
+ "stream_timeout": 0.000_001,
+ "max_retries": 7,
+ },
+ "model_info": {"supported_environments": ["development", "production"]},
+ },
+ {
+ "model_name": "gpt-4",
+ "litellm_params": {
+ "model": "openai/gpt-4",
+ "api_key": os.getenv("OPENAI_API_KEY"),
+ "timeout": 0.01,
+ "stream_timeout": 0.000_001,
+ "max_retries": 7,
+ },
+ "model_info": {"supported_environments": ["production"]},
+ },
+ {
+ "model_name": "gpt-4o",
+ "litellm_params": {
+ "model": "openai/gpt-4o",
+ "api_key": os.getenv("OPENAI_API_KEY"),
+ "timeout": 0.01,
+ "stream_timeout": 0.000_001,
+ "max_retries": 7,
+ },
+ "model_info": {"supported_environments": ["production"]},
+ },
+ ]
+ router = Router(model_list=model_list, set_verbose=True)
+ _model_list = router.get_model_names()
+
+ print("model_list: ", _model_list)
+ print("expected_models: ", expected_models)
+
+ assert set(_model_list) == set(expected_models)
+
+ os.environ.pop("LITELLM_ENVIRONMENT")
diff --git a/litellm/types/router.py b/litellm/types/router.py
index 306dfcba1..cfb90814b 100644
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@@ -596,3 +596,10 @@ class RouterRateLimitError(ValueError):
class RouterModelGroupAliasItem(TypedDict):
model: str
hidden: bool # if 'True', don't return on `.get_model_list`
+
+
+VALID_LITELLM_ENVIRONMENTS = [
+ "development",
+ "staging",
+ "production",
+]