forked from phoenix/litellm-mirror
[Feat-Router] Allow setting which environment to use a model on (#5892)
* add check deployment_is_active_for_environment * add test for test_init_router_with_supported_environments * show good example config for environments * docs clean up config.yaml * docs cleanup * docs configs * docs specfic env
This commit is contained in:
parent
4911979c61
commit
4ec4d02474
9 changed files with 427 additions and 403 deletions
|
@ -94,18 +94,18 @@ Run with `--detailed_debug` if you need detailed debug logs
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ litellm --config /path/to/config.yaml --detailed_debug
|
$ litellm --config /path/to/config.yaml --detailed_debug
|
||||||
|
```
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
### Using Proxy - Curl Request, OpenAI Package, Langchain, Langchain JS
|
#### Step 3: Test it
|
||||||
Calling a model group
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="Curl" label="Curl Request">
|
|
||||||
|
|
||||||
Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
|
Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
|
||||||
|
|
||||||
If multiple with `model_name=gpt-3.5-turbo` does [Load Balancing](https://docs.litellm.ai/docs/proxy/load_balancing)
|
If multiple with `model_name=gpt-3.5-turbo` does [Load Balancing](https://docs.litellm.ai/docs/proxy/load_balancing)
|
||||||
|
|
||||||
|
**[Langchain, OpenAI SDK Usage Examples](../proxy/user_keys#request-format)**
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
--header 'Content-Type: application/json' \
|
--header 'Content-Type: application/json' \
|
||||||
|
@ -120,105 +120,10 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
}
|
}
|
||||||
'
|
'
|
||||||
```
|
```
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="Curl2" label="Curl Request: Bedrock">
|
## LLM configs `model_list`
|
||||||
|
|
||||||
Sends this request to model where `model_name=bedrock-claude-v1` on config.yaml
|
### Model-specific params (API Base, Keys, Temperature, Max Tokens, Organization, Headers etc.)
|
||||||
|
|
||||||
```shell
|
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data ' {
|
|
||||||
"model": "bedrock-claude-v1",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "what llm are you"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
'
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="openai" label="OpenAI v1.0.0+">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import openai
|
|
||||||
client = openai.OpenAI(
|
|
||||||
api_key="anything",
|
|
||||||
base_url="http://0.0.0.0:4000"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
|
|
||||||
response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "this is a test request, write a short poem"
|
|
||||||
}
|
|
||||||
])
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
|
|
||||||
# Sends this request to model where `model_name=bedrock-claude-v1` on config.yaml
|
|
||||||
response = client.chat.completions.create(model="bedrock-claude-v1", messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "this is a test request, write a short poem"
|
|
||||||
}
|
|
||||||
])
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="langchain" label="Langchain Python">
|
|
||||||
|
|
||||||
```python
|
|
||||||
from langchain.chat_models import ChatOpenAI
|
|
||||||
from langchain.prompts.chat import (
|
|
||||||
ChatPromptTemplate,
|
|
||||||
HumanMessagePromptTemplate,
|
|
||||||
SystemMessagePromptTemplate,
|
|
||||||
)
|
|
||||||
from langchain.schema import HumanMessage, SystemMessage
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
SystemMessage(
|
|
||||||
content="You are a helpful assistant that im using to make a test request to."
|
|
||||||
),
|
|
||||||
HumanMessage(
|
|
||||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
# Sends request to model where `model_name=gpt-3.5-turbo` on config.yaml.
|
|
||||||
chat = ChatOpenAI(
|
|
||||||
openai_api_base="http://0.0.0.0:4000", # set openai base to the proxy
|
|
||||||
model = "gpt-3.5-turbo",
|
|
||||||
temperature=0.1
|
|
||||||
)
|
|
||||||
|
|
||||||
response = chat(messages)
|
|
||||||
print(response)
|
|
||||||
|
|
||||||
# Sends request to model where `model_name=bedrock-claude-v1` on config.yaml.
|
|
||||||
claude_chat = ChatOpenAI(
|
|
||||||
openai_api_base="http://0.0.0.0:4000", # set openai base to the proxy
|
|
||||||
model = "bedrock-claude-v1",
|
|
||||||
temperature=0.1
|
|
||||||
)
|
|
||||||
|
|
||||||
response = claude_chat(messages)
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
## Save Model-specific params (API Base, Keys, Temperature, Max Tokens, Organization, Headers etc.)
|
|
||||||
You can use the config to save model-specific information like api_base, api_key, temperature, max_tokens, etc.
|
You can use the config to save model-specific information like api_base, api_key, temperature, max_tokens, etc.
|
||||||
|
|
||||||
[**All input params**](https://docs.litellm.ai/docs/completion/input#input-params-1)
|
[**All input params**](https://docs.litellm.ai/docs/completion/input#input-params-1)
|
||||||
|
@ -259,32 +164,6 @@ model_list:
|
||||||
$ litellm --config /path/to/config.yaml
|
$ litellm --config /path/to/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
## Use CONFIG_FILE_PATH for proxy (Easier Azure container deployment)
|
|
||||||
|
|
||||||
1. Setup config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Store filepath as env var
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CONFIG_FILE_PATH="/path/to/config.yaml"
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Start Proxy
|
|
||||||
|
|
||||||
```bash
|
|
||||||
$ litellm
|
|
||||||
|
|
||||||
# RUNNING on http://0.0.0.0:4000
|
|
||||||
```
|
|
||||||
|
|
||||||
**Expected Logs:**
|
**Expected Logs:**
|
||||||
|
|
||||||
Look for this line in your console logs to confirm the config.yaml was loaded in correctly.
|
Look for this line in your console logs to confirm the config.yaml was loaded in correctly.
|
||||||
|
@ -292,243 +171,10 @@ Look for this line in your console logs to confirm the config.yaml was loaded in
|
||||||
LiteLLM: Proxy initialized with Config, Set models:
|
LiteLLM: Proxy initialized with Config, Set models:
|
||||||
```
|
```
|
||||||
|
|
||||||
## Multiple OpenAI Organizations
|
### Embedding Models - Use Sagemaker, Bedrock, Azure, OpenAI, XInference
|
||||||
|
|
||||||
Add all openai models across all OpenAI organizations with just 1 model definition
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
- model_name: *
|
|
||||||
litellm_params:
|
|
||||||
model: openai/*
|
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
|
||||||
organization:
|
|
||||||
- org-1
|
|
||||||
- org-2
|
|
||||||
- org-3
|
|
||||||
```
|
|
||||||
|
|
||||||
LiteLLM will automatically create separate deployments for each org.
|
|
||||||
|
|
||||||
Confirm this via
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl --location 'http://0.0.0.0:4000/v1/model/info' \
|
|
||||||
--header 'Authorization: Bearer ${LITELLM_KEY}' \
|
|
||||||
--data ''
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## Provider specific wildcard routing
|
|
||||||
**Proxy all models from a provider**
|
|
||||||
|
|
||||||
Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml**
|
|
||||||
|
|
||||||
**Step 1** - define provider specific routing on config.yaml
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
# provider specific wildcard routing
|
|
||||||
- model_name: "anthropic/*"
|
|
||||||
litellm_params:
|
|
||||||
model: "anthropic/*"
|
|
||||||
api_key: os.environ/ANTHROPIC_API_KEY
|
|
||||||
- model_name: "groq/*"
|
|
||||||
litellm_params:
|
|
||||||
model: "groq/*"
|
|
||||||
api_key: os.environ/GROQ_API_KEY
|
|
||||||
```
|
|
||||||
|
|
||||||
Step 2 - Run litellm proxy
|
|
||||||
|
|
||||||
```shell
|
|
||||||
$ litellm --config /path/to/config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
Step 3 Test it
|
|
||||||
|
|
||||||
Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*`
|
|
||||||
```shell
|
|
||||||
curl http://localhost:4000/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "Authorization: Bearer sk-1234" \
|
|
||||||
-d '{
|
|
||||||
"model": "anthropic/claude-3-sonnet-20240229",
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "Hello, Claude!"}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*`
|
|
||||||
```shell
|
|
||||||
curl http://localhost:4000/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "Authorization: Bearer sk-1234" \
|
|
||||||
-d '{
|
|
||||||
"model": "groq/llama3-8b-8192",
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "Hello, Claude!"}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Load Balancing
|
|
||||||
|
|
||||||
:::info
|
|
||||||
For more on this, go to [this page](https://docs.litellm.ai/docs/proxy/load_balancing)
|
|
||||||
:::
|
|
||||||
|
|
||||||
Use this to call multiple instances of the same model and configure things like [routing strategy](https://docs.litellm.ai/docs/routing#advanced).
|
|
||||||
|
|
||||||
For optimal performance:
|
|
||||||
- Set `tpm/rpm` per model deployment. Weighted picks are then based on the established tpm/rpm.
|
|
||||||
- Select your optimal routing strategy in `router_settings:routing_strategy`.
|
|
||||||
|
|
||||||
LiteLLM supports
|
|
||||||
```python
|
|
||||||
["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"`
|
|
||||||
```
|
|
||||||
|
|
||||||
When `tpm/rpm` is set + `routing_strategy==simple-shuffle` litellm will use a weighted pick based on set tpm/rpm. **In our load tests setting tpm/rpm for all deployments + `routing_strategy==simple-shuffle` maximized throughput**
|
|
||||||
- When using multiple LiteLLM Servers / Kubernetes set redis settings `router_settings:redis_host` etc
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: zephyr-beta
|
|
||||||
litellm_params:
|
|
||||||
model: huggingface/HuggingFaceH4/zephyr-7b-beta
|
|
||||||
api_base: http://0.0.0.0:8001
|
|
||||||
rpm: 60 # Optional[int]: When rpm/tpm set - litellm uses weighted pick for load balancing. rpm = Rate limit for this deployment: in requests per minute (rpm).
|
|
||||||
tpm: 1000 # Optional[int]: tpm = Tokens Per Minute
|
|
||||||
- model_name: zephyr-beta
|
|
||||||
litellm_params:
|
|
||||||
model: huggingface/HuggingFaceH4/zephyr-7b-beta
|
|
||||||
api_base: http://0.0.0.0:8002
|
|
||||||
rpm: 600
|
|
||||||
- model_name: zephyr-beta
|
|
||||||
litellm_params:
|
|
||||||
model: huggingface/HuggingFaceH4/zephyr-7b-beta
|
|
||||||
api_base: http://0.0.0.0:8003
|
|
||||||
rpm: 60000
|
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-3.5-turbo
|
|
||||||
api_key: <my-openai-key>
|
|
||||||
rpm: 200
|
|
||||||
- model_name: gpt-3.5-turbo-16k
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-3.5-turbo-16k
|
|
||||||
api_key: <my-openai-key>
|
|
||||||
rpm: 100
|
|
||||||
|
|
||||||
litellm_settings:
|
|
||||||
num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta)
|
|
||||||
request_timeout: 10 # raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
|
|
||||||
fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries
|
|
||||||
context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
|
|
||||||
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
|
||||||
|
|
||||||
router_settings: # router_settings are optional
|
|
||||||
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
|
||||||
model_group_alias: {"gpt-4": "gpt-3.5-turbo"} # all requests with `gpt-4` will be routed to models with `gpt-3.5-turbo`
|
|
||||||
num_retries: 2
|
|
||||||
timeout: 30 # 30 seconds
|
|
||||||
redis_host: <your redis host> # set this when using multiple litellm proxy deployments, load balancing state stored in redis
|
|
||||||
redis_password: <your redis password>
|
|
||||||
redis_port: 1992
|
|
||||||
```
|
|
||||||
|
|
||||||
You can view your cost once you set up [Virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys) or [custom_callbacks](https://docs.litellm.ai/docs/proxy/logging)
|
|
||||||
|
|
||||||
## Load API Keys
|
|
||||||
|
|
||||||
### Load API Keys / config values from Environment
|
|
||||||
|
|
||||||
If you have secrets saved in your environment, and don't want to expose them in the config.yaml, here's how to load model-specific keys from the environment. **This works for ANY value on the config.yaml**
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
os.environ/<YOUR-ENV-VAR> # runs os.getenv("YOUR-ENV-VAR")
|
|
||||||
```
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: gpt-4-team1
|
|
||||||
litellm_params: # params for litellm.completion() - https://docs.litellm.ai/docs/completion/input#input---request-body
|
|
||||||
model: azure/chatgpt-v-2
|
|
||||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
|
||||||
api_version: "2023-05-15"
|
|
||||||
api_key: os.environ/AZURE_NORTH_AMERICA_API_KEY # 👈 KEY CHANGE
|
|
||||||
```
|
|
||||||
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/c12d6c3fe80e1b5e704d9846b246c059defadce7/litellm/utils.py#L2366)
|
|
||||||
|
|
||||||
s/o to [@David Manouchehri](https://www.linkedin.com/in/davidmanouchehri/) for helping with this.
|
|
||||||
|
|
||||||
### Load API Keys from Azure Vault
|
|
||||||
|
|
||||||
1. Install Proxy dependencies
|
|
||||||
```bash
|
|
||||||
$ pip install 'litellm[proxy]' 'litellm[extra_proxy]'
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Save Azure details in your environment
|
|
||||||
```bash
|
|
||||||
export["AZURE_CLIENT_ID"]="your-azure-app-client-id"
|
|
||||||
export["AZURE_CLIENT_SECRET"]="your-azure-app-client-secret"
|
|
||||||
export["AZURE_TENANT_ID"]="your-azure-tenant-id"
|
|
||||||
export["AZURE_KEY_VAULT_URI"]="your-azure-key-vault-uri"
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Add to proxy config.yaml
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: "my-azure-models" # model alias
|
|
||||||
litellm_params:
|
|
||||||
model: "azure/<your-deployment-name>"
|
|
||||||
api_key: "os.environ/AZURE-API-KEY" # reads from key vault - get_secret("AZURE_API_KEY")
|
|
||||||
api_base: "os.environ/AZURE-API-BASE" # reads from key vault - get_secret("AZURE_API_BASE")
|
|
||||||
|
|
||||||
general_settings:
|
|
||||||
use_azure_key_vault: True
|
|
||||||
```
|
|
||||||
|
|
||||||
You can now test this by starting your proxy:
|
|
||||||
```bash
|
|
||||||
litellm --config /path/to/config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
### Set Custom Prompt Templates
|
|
||||||
|
|
||||||
LiteLLM by default checks if a model has a [prompt template and applies it](../completion/prompt_formatting.md) (e.g. if a huggingface model has a saved chat template in it's tokenizer_config.json). However, you can also set a custom prompt template on your proxy in the `config.yaml`:
|
|
||||||
|
|
||||||
**Step 1**: Save your prompt template in a `config.yaml`
|
|
||||||
```yaml
|
|
||||||
# Model-specific parameters
|
|
||||||
model_list:
|
|
||||||
- model_name: mistral-7b # model alias
|
|
||||||
litellm_params: # actual params for litellm.completion()
|
|
||||||
model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1"
|
|
||||||
api_base: "<your-api-base>"
|
|
||||||
api_key: "<your-api-key>" # [OPTIONAL] for hf inference endpoints
|
|
||||||
initial_prompt_value: "\n"
|
|
||||||
roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
|
|
||||||
final_prompt_value: "\n"
|
|
||||||
bos_token: "<s>"
|
|
||||||
eos_token: "</s>"
|
|
||||||
max_tokens: 4096
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 2**: Start server with config
|
|
||||||
|
|
||||||
```shell
|
|
||||||
$ litellm --config /path/to/config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
## Setting Embedding Models
|
|
||||||
|
|
||||||
See supported Embedding Providers & Models [here](https://docs.litellm.ai/docs/embedding/supported_embedding)
|
See supported Embedding Providers & Models [here](https://docs.litellm.ai/docs/embedding/supported_embedding)
|
||||||
|
|
||||||
### Use Sagemaker, Bedrock, Azure, OpenAI, XInference
|
|
||||||
#### Create Config.yaml
|
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="bedrock" label="Bedrock Completion/Chat">
|
<TabItem value="bedrock" label="Bedrock Completion/Chat">
|
||||||
|
@ -685,48 +331,248 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
## ✨ IP Address Filtering
|
|
||||||
|
|
||||||
:::info
|
### Multiple OpenAI Organizations
|
||||||
|
|
||||||
You need a LiteLLM License to unlock this feature. [Grab time](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat), to get one today!
|
Add all openai models across all OpenAI organizations with just 1 model definition
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
Restrict which IP's can call the proxy endpoints.
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
general_settings:
|
- model_name: *
|
||||||
allowed_ips: ["192.168.1.1"]
|
litellm_params:
|
||||||
|
model: openai/*
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
organization:
|
||||||
|
- org-1
|
||||||
|
- org-2
|
||||||
|
- org-3
|
||||||
```
|
```
|
||||||
|
|
||||||
**Expected Response** (if IP not listed)
|
LiteLLM will automatically create separate deployments for each org.
|
||||||
|
|
||||||
|
Confirm this via
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
{
|
curl --location 'http://0.0.0.0:4000/v1/model/info' \
|
||||||
"error": {
|
--header 'Authorization: Bearer ${LITELLM_KEY}' \
|
||||||
"message": "Access forbidden: IP address not allowed.",
|
--data ''
|
||||||
"type": "auth_error",
|
```
|
||||||
"param": "None",
|
|
||||||
"code": 403
|
|
||||||
}
|
### Provider specific wildcard routing
|
||||||
}
|
**Proxy all models from a provider**
|
||||||
|
|
||||||
|
Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml**
|
||||||
|
|
||||||
|
**Step 1** - define provider specific routing on config.yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
# provider specific wildcard routing
|
||||||
|
- model_name: "anthropic/*"
|
||||||
|
litellm_params:
|
||||||
|
model: "anthropic/*"
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
- model_name: "groq/*"
|
||||||
|
litellm_params:
|
||||||
|
model: "groq/*"
|
||||||
|
api_key: os.environ/GROQ_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
Step 2 - Run litellm proxy
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Step 3 Test it
|
||||||
|
|
||||||
|
Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*`
|
||||||
|
```shell
|
||||||
|
curl http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"model": "anthropic/claude-3-sonnet-20240229",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello, Claude!"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*`
|
||||||
|
```shell
|
||||||
|
curl http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"model": "groq/llama3-8b-8192",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello, Claude!"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Load Balancing
|
||||||
|
|
||||||
|
:::info
|
||||||
|
For more on this, go to [this page](https://docs.litellm.ai/docs/proxy/load_balancing)
|
||||||
|
:::
|
||||||
|
|
||||||
|
Use this to call multiple instances of the same model and configure things like [routing strategy](https://docs.litellm.ai/docs/routing#advanced).
|
||||||
|
|
||||||
|
For optimal performance:
|
||||||
|
- Set `tpm/rpm` per model deployment. Weighted picks are then based on the established tpm/rpm.
|
||||||
|
- Select your optimal routing strategy in `router_settings:routing_strategy`.
|
||||||
|
|
||||||
|
LiteLLM supports
|
||||||
|
```python
|
||||||
|
["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"`
|
||||||
|
```
|
||||||
|
|
||||||
|
When `tpm/rpm` is set + `routing_strategy==simple-shuffle` litellm will use a weighted pick based on set tpm/rpm. **In our load tests setting tpm/rpm for all deployments + `routing_strategy==simple-shuffle` maximized throughput**
|
||||||
|
- When using multiple LiteLLM Servers / Kubernetes set redis settings `router_settings:redis_host` etc
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: zephyr-beta
|
||||||
|
litellm_params:
|
||||||
|
model: huggingface/HuggingFaceH4/zephyr-7b-beta
|
||||||
|
api_base: http://0.0.0.0:8001
|
||||||
|
rpm: 60 # Optional[int]: When rpm/tpm set - litellm uses weighted pick for load balancing. rpm = Rate limit for this deployment: in requests per minute (rpm).
|
||||||
|
tpm: 1000 # Optional[int]: tpm = Tokens Per Minute
|
||||||
|
- model_name: zephyr-beta
|
||||||
|
litellm_params:
|
||||||
|
model: huggingface/HuggingFaceH4/zephyr-7b-beta
|
||||||
|
api_base: http://0.0.0.0:8002
|
||||||
|
rpm: 600
|
||||||
|
- model_name: zephyr-beta
|
||||||
|
litellm_params:
|
||||||
|
model: huggingface/HuggingFaceH4/zephyr-7b-beta
|
||||||
|
api_base: http://0.0.0.0:8003
|
||||||
|
rpm: 60000
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
api_key: <my-openai-key>
|
||||||
|
rpm: 200
|
||||||
|
- model_name: gpt-3.5-turbo-16k
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo-16k
|
||||||
|
api_key: <my-openai-key>
|
||||||
|
rpm: 100
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta)
|
||||||
|
request_timeout: 10 # raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
|
||||||
|
fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries
|
||||||
|
context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
|
||||||
|
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
||||||
|
|
||||||
|
router_settings: # router_settings are optional
|
||||||
|
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
||||||
|
model_group_alias: {"gpt-4": "gpt-3.5-turbo"} # all requests with `gpt-4` will be routed to models with `gpt-3.5-turbo`
|
||||||
|
num_retries: 2
|
||||||
|
timeout: 30 # 30 seconds
|
||||||
|
redis_host: <your redis host> # set this when using multiple litellm proxy deployments, load balancing state stored in redis
|
||||||
|
redis_password: <your redis password>
|
||||||
|
redis_port: 1992
|
||||||
|
```
|
||||||
|
|
||||||
|
You can view your cost once you set up [Virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys) or [custom_callbacks](https://docs.litellm.ai/docs/proxy/logging)
|
||||||
|
|
||||||
|
|
||||||
|
### Load API Keys / config values from Environment
|
||||||
|
|
||||||
|
If you have secrets saved in your environment, and don't want to expose them in the config.yaml, here's how to load model-specific keys from the environment. **This works for ANY value on the config.yaml**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
os.environ/<YOUR-ENV-VAR> # runs os.getenv("YOUR-ENV-VAR")
|
||||||
|
```
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4-team1
|
||||||
|
litellm_params: # params for litellm.completion() - https://docs.litellm.ai/docs/completion/input#input---request-body
|
||||||
|
model: azure/chatgpt-v-2
|
||||||
|
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||||
|
api_version: "2023-05-15"
|
||||||
|
api_key: os.environ/AZURE_NORTH_AMERICA_API_KEY # 👈 KEY CHANGE
|
||||||
|
```
|
||||||
|
|
||||||
|
[**See Code**](https://github.com/BerriAI/litellm/blob/c12d6c3fe80e1b5e704d9846b246c059defadce7/litellm/utils.py#L2366)
|
||||||
|
|
||||||
|
s/o to [@David Manouchehri](https://www.linkedin.com/in/davidmanouchehri/) for helping with this.
|
||||||
|
|
||||||
|
### Load API Keys from Secret Managers (Azure Vault, etc)
|
||||||
|
|
||||||
|
[**Using Secret Managers with LiteLLM Proxy**](../secret)
|
||||||
|
|
||||||
|
|
||||||
|
### Set Supported Environments for a model - `production`, `staging`, `development`
|
||||||
|
|
||||||
|
Use this if you want to control which model is exposed on a specific litellm environment
|
||||||
|
|
||||||
|
Supported Environments:
|
||||||
|
- `production`
|
||||||
|
- `staging`
|
||||||
|
- `development`
|
||||||
|
|
||||||
|
1. Set `LITELLM_ENVIRONMENT="<environment>"` in your environment. Can be one of `production`, `staging` or `development`
|
||||||
|
|
||||||
|
|
||||||
|
2. For each model set the list of supported environments in `model_info.supported_environments`
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-3.5-turbo
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
model_info:
|
||||||
|
supported_environments: ["development", "production", "staging"]
|
||||||
|
- model_name: gpt-4
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-4
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
model_info:
|
||||||
|
supported_environments: ["production", "staging"]
|
||||||
|
- model_name: gpt-4o
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-4o
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
model_info:
|
||||||
|
supported_environments: ["production"]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Set Custom Prompt Templates
|
||||||
|
|
||||||
## Disable Swagger UI
|
LiteLLM by default checks if a model has a [prompt template and applies it](../completion/prompt_formatting.md) (e.g. if a huggingface model has a saved chat template in it's tokenizer_config.json). However, you can also set a custom prompt template on your proxy in the `config.yaml`:
|
||||||
|
|
||||||
To disable the Swagger docs from the base url, set
|
**Step 1**: Save your prompt template in a `config.yaml`
|
||||||
|
```yaml
|
||||||
```env
|
# Model-specific parameters
|
||||||
NO_DOCS="True"
|
model_list:
|
||||||
|
- model_name: mistral-7b # model alias
|
||||||
|
litellm_params: # actual params for litellm.completion()
|
||||||
|
model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1"
|
||||||
|
api_base: "<your-api-base>"
|
||||||
|
api_key: "<your-api-key>" # [OPTIONAL] for hf inference endpoints
|
||||||
|
initial_prompt_value: "\n"
|
||||||
|
roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
|
||||||
|
final_prompt_value: "\n"
|
||||||
|
bos_token: "<s>"
|
||||||
|
eos_token: "</s>"
|
||||||
|
max_tokens: 4096
|
||||||
```
|
```
|
||||||
|
|
||||||
in your environment, and restart the proxy.
|
**Step 2**: Start server with config
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
## Configure DB Pool Limits + Connection Timeouts
|
## General Settings `general_settings` (DB Connection, etc)
|
||||||
|
|
||||||
|
### Configure DB Pool Limits + Connection Timeouts
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
general_settings:
|
general_settings:
|
||||||
|
@ -812,3 +658,43 @@ general_settings:
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Extras
|
||||||
|
|
||||||
|
|
||||||
|
### Disable Swagger UI
|
||||||
|
|
||||||
|
To disable the Swagger docs from the base url, set
|
||||||
|
|
||||||
|
```env
|
||||||
|
NO_DOCS="True"
|
||||||
|
```
|
||||||
|
|
||||||
|
in your environment, and restart the proxy.
|
||||||
|
|
||||||
|
### Use CONFIG_FILE_PATH for proxy (Easier Azure container deployment)
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Store filepath as env var
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CONFIG_FILE_PATH="/path/to/config.yaml"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Start Proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ litellm
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
28
docs/my-website/docs/proxy/ip_address.md
Normal file
28
docs/my-website/docs/proxy/ip_address.md
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
|
||||||
|
# ✨ IP Address Filtering
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
You need a LiteLLM License to unlock this feature. [Grab time](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat), to get one today!
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
Restrict which IP's can call the proxy endpoints.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
allowed_ips: ["192.168.1.1"]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expected Response** (if IP not listed)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
"error": {
|
||||||
|
"message": "Access forbidden: IP address not allowed.",
|
||||||
|
"type": "auth_error",
|
||||||
|
"param": "None",
|
||||||
|
"code": 403
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# 📈 [BETA] Prometheus metrics
|
# 📈 Prometheus metrics
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# [BETA] JWT-based Auth
|
# JWT-based Auth
|
||||||
|
|
||||||
Use JWT's to auth admins / projects into the proxy.
|
Use JWT's to auth admins / projects into the proxy.
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@ const sidebars = {
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "🔑 Authentication",
|
label: "🔑 Authentication",
|
||||||
items: ["proxy/virtual_keys", "proxy/token_auth", "proxy/oauth2"],
|
items: ["proxy/virtual_keys", "proxy/token_auth", "proxy/oauth2", "proxy/ip_address"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
|
|
|
@ -2,22 +2,21 @@ model_list:
|
||||||
- model_name: gpt-3.5-turbo
|
- model_name: gpt-3.5-turbo
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/gpt-3.5-turbo
|
model: openai/gpt-3.5-turbo
|
||||||
api_key: fake-key
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
||||||
tags: ["teamB"]
|
|
||||||
model_info:
|
model_info:
|
||||||
id: "team-b-model"
|
supported_environments: ["development", "production", "staging"]
|
||||||
- model_name: rerank-english-v3.0
|
- model_name: gpt-4
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: cohere/rerank-english-v3.0
|
model: openai/gpt-4
|
||||||
api_key: os.environ/COHERE_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
- model_name: llava-hf
|
|
||||||
litellm_params:
|
|
||||||
model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
|
|
||||||
api_base: http://localhost:8000
|
|
||||||
api_key: fake-key
|
|
||||||
model_info:
|
model_info:
|
||||||
supports_vision: True
|
supported_environments: ["production", "staging"]
|
||||||
|
- model_name: gpt-4o
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-4o
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
model_info:
|
||||||
|
supported_environments: ["production"]
|
||||||
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
|
|
|
@ -90,6 +90,7 @@ from litellm.types.llms.openai import (
|
||||||
)
|
)
|
||||||
from litellm.types.router import (
|
from litellm.types.router import (
|
||||||
SPECIAL_MODEL_INFO_PARAMS,
|
SPECIAL_MODEL_INFO_PARAMS,
|
||||||
|
VALID_LITELLM_ENVIRONMENTS,
|
||||||
AlertingConfig,
|
AlertingConfig,
|
||||||
AllowedFailsPolicy,
|
AllowedFailsPolicy,
|
||||||
AssistantsTypedDict,
|
AssistantsTypedDict,
|
||||||
|
@ -3973,12 +3974,55 @@ class Router:
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
## Check if LLM Deployment is allowed for this deployment
|
||||||
|
if deployment.model_info and "supported_environments" in deployment.model_info:
|
||||||
|
if (
|
||||||
|
self.deployment_is_active_for_environment(deployment=deployment)
|
||||||
|
is not True
|
||||||
|
):
|
||||||
|
return
|
||||||
|
|
||||||
deployment = self._add_deployment(deployment=deployment)
|
deployment = self._add_deployment(deployment=deployment)
|
||||||
|
|
||||||
model = deployment.to_json(exclude_none=True)
|
model = deployment.to_json(exclude_none=True)
|
||||||
|
|
||||||
self.model_list.append(model)
|
self.model_list.append(model)
|
||||||
|
|
||||||
|
def deployment_is_active_for_environment(self, deployment: Deployment) -> bool:
|
||||||
|
"""
|
||||||
|
Function to check if a llm deployment is active for a given environment. Allows using the same config.yaml across multople environments
|
||||||
|
|
||||||
|
Requires `LITELLM_ENVIRONMENT` to be set in .env. Valid values for environment:
|
||||||
|
- development
|
||||||
|
- staging
|
||||||
|
- production
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
- ValueError: If LITELLM_ENVIRONMENT is not set in .env or not one of the valid values
|
||||||
|
- ValueError: If supported_environments is not set in model_info or not one of the valid values
|
||||||
|
"""
|
||||||
|
litellm_environment = litellm.get_secret_str(secret_name="LITELLM_ENVIRONMENT")
|
||||||
|
if litellm_environment is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"Set 'supported_environments' for model but not 'LITELLM_ENVIRONMENT' set in .env"
|
||||||
|
)
|
||||||
|
|
||||||
|
if litellm_environment not in VALID_LITELLM_ENVIRONMENTS:
|
||||||
|
raise ValueError(
|
||||||
|
f"LITELLM_ENVIRONMENT must be one of {VALID_LITELLM_ENVIRONMENTS}. but set as: {litellm_environment}"
|
||||||
|
)
|
||||||
|
|
||||||
|
for _env in deployment.model_info["supported_environments"]:
|
||||||
|
if _env not in VALID_LITELLM_ENVIRONMENTS:
|
||||||
|
raise ValueError(
|
||||||
|
f"supported_environments must be one of {VALID_LITELLM_ENVIRONMENTS}. but set as: {_env} for deployment: {deployment}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# validate litellm_environment is one of LiteLLMEnvironment
|
||||||
|
if litellm_environment in deployment.model_info["supported_environments"]:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def set_model_list(self, model_list: list):
|
def set_model_list(self, model_list: list):
|
||||||
original_model_list = copy.deepcopy(model_list)
|
original_model_list = copy.deepcopy(model_list)
|
||||||
self.model_list = []
|
self.model_list = []
|
||||||
|
|
|
@ -636,3 +636,63 @@ def test_init_clients_async_mode():
|
||||||
assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
|
assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"environment,expected_models",
|
||||||
|
[
|
||||||
|
("development", ["gpt-3.5-turbo"]),
|
||||||
|
("production", ["gpt-4", "gpt-3.5-turbo", "gpt-4o"]),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_init_router_with_supported_environments(environment, expected_models):
|
||||||
|
"""
|
||||||
|
Tests that the correct models are setup on router when LITELLM_ENVIRONMENT is set
|
||||||
|
"""
|
||||||
|
os.environ["LITELLM_ENVIRONMENT"] = environment
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
"timeout": 0.01,
|
||||||
|
"stream_timeout": 0.000_001,
|
||||||
|
"max_retries": 7,
|
||||||
|
},
|
||||||
|
"model_info": {"supported_environments": ["development", "production"]},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "openai/gpt-4",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
"timeout": 0.01,
|
||||||
|
"stream_timeout": 0.000_001,
|
||||||
|
"max_retries": 7,
|
||||||
|
},
|
||||||
|
"model_info": {"supported_environments": ["production"]},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4o",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "openai/gpt-4o",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
"timeout": 0.01,
|
||||||
|
"stream_timeout": 0.000_001,
|
||||||
|
"max_retries": 7,
|
||||||
|
},
|
||||||
|
"model_info": {"supported_environments": ["production"]},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
router = Router(model_list=model_list, set_verbose=True)
|
||||||
|
_model_list = router.get_model_names()
|
||||||
|
|
||||||
|
print("model_list: ", _model_list)
|
||||||
|
print("expected_models: ", expected_models)
|
||||||
|
|
||||||
|
assert set(_model_list) == set(expected_models)
|
||||||
|
|
||||||
|
os.environ.pop("LITELLM_ENVIRONMENT")
|
||||||
|
|
|
@ -596,3 +596,10 @@ class RouterRateLimitError(ValueError):
|
||||||
class RouterModelGroupAliasItem(TypedDict):
|
class RouterModelGroupAliasItem(TypedDict):
|
||||||
model: str
|
model: str
|
||||||
hidden: bool # if 'True', don't return on `.get_model_list`
|
hidden: bool # if 'True', don't return on `.get_model_list`
|
||||||
|
|
||||||
|
|
||||||
|
VALID_LITELLM_ENVIRONMENTS = [
|
||||||
|
"development",
|
||||||
|
"staging",
|
||||||
|
"production",
|
||||||
|
]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue