mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
docs(routing.md): refactor docs to show how to use pre-call checks and fallback across model groups
This commit is contained in:
parent
b2b8375987
commit
0072174ef9
5 changed files with 274 additions and 137 deletions
|
@ -1,4 +1,4 @@
|
||||||
# Load Balancing - Config Setup
|
# Multiple Instances
|
||||||
Load balance multiple instances of the same model
|
Load balance multiple instances of the same model
|
||||||
|
|
||||||
The proxy will handle routing requests (using LiteLLM's Router). **Set `rpm` in the config if you want maximize throughput**
|
The proxy will handle routing requests (using LiteLLM's Router). **Set `rpm` in the config if you want maximize throughput**
|
||||||
|
@ -10,75 +10,6 @@ For more details on routing strategies / params, see [Routing](../routing.md)
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
## Quick Start - Load Balancing
|
|
||||||
### Step 1 - Set deployments on config
|
|
||||||
|
|
||||||
**Example config below**. Here requests with `model=gpt-3.5-turbo` will be routed across multiple instances of `azure/gpt-3.5-turbo`
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: azure/<your-deployment-name>
|
|
||||||
api_base: <your-azure-endpoint>
|
|
||||||
api_key: <your-azure-api-key>
|
|
||||||
rpm: 6 # Rate limit for this deployment: in requests per minute (rpm)
|
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: azure/gpt-turbo-small-ca
|
|
||||||
api_base: https://my-endpoint-canada-berri992.openai.azure.com/
|
|
||||||
api_key: <your-azure-api-key>
|
|
||||||
rpm: 6
|
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: azure/gpt-turbo-large
|
|
||||||
api_base: https://openai-france-1234.openai.azure.com/
|
|
||||||
api_key: <your-azure-api-key>
|
|
||||||
rpm: 1440
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 2: Start Proxy with config
|
|
||||||
|
|
||||||
```shell
|
|
||||||
$ litellm --config /path/to/config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 3: Use proxy - Call a model group [Load Balancing]
|
|
||||||
Curl Command
|
|
||||||
```shell
|
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data ' {
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "what llm are you"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
'
|
|
||||||
```
|
|
||||||
|
|
||||||
### Usage - Call a specific model deployment
|
|
||||||
If you want to call a specific model defined in the `config.yaml`, you can call the `litellm_params: model`
|
|
||||||
|
|
||||||
In this example it will call `azure/gpt-turbo-small-ca`. Defined in the config on Step 1
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data ' {
|
|
||||||
"model": "azure/gpt-turbo-small-ca",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "what llm are you"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Load Balancing using multiple litellm instances (Kubernetes, Auto Scaling)
|
## Load Balancing using multiple litellm instances (Kubernetes, Auto Scaling)
|
||||||
|
|
||||||
LiteLLM Proxy supports sharing rpm/tpm shared across multiple litellm instances, pass `redis_host`, `redis_password` and `redis_port` to enable this. (LiteLLM will use Redis to track rpm/tpm usage )
|
LiteLLM Proxy supports sharing rpm/tpm shared across multiple litellm instances, pass `redis_host`, `redis_password` and `redis_port` to enable this. (LiteLLM will use Redis to track rpm/tpm usage )
|
||||||
|
|
|
@ -2,7 +2,9 @@ import Image from '@theme/IdealImage';
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Fallbacks, Retries, Timeouts, Cooldowns
|
# 🔥 Fallbacks, Retries, Timeouts, Load Balancing
|
||||||
|
|
||||||
|
Retry call with multiple instances of the same model.
|
||||||
|
|
||||||
If a call fails after num_retries, fall back to another model group.
|
If a call fails after num_retries, fall back to another model group.
|
||||||
|
|
||||||
|
@ -10,6 +12,77 @@ If the error is a context window exceeded error, fall back to a larger model gro
|
||||||
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
|
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
|
||||||
|
|
||||||
|
## Quick Start - Load Balancing
|
||||||
|
### Step 1 - Set deployments on config
|
||||||
|
|
||||||
|
**Example config below**. Here requests with `model=gpt-3.5-turbo` will be routed across multiple instances of `azure/gpt-3.5-turbo`
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: azure/<your-deployment-name>
|
||||||
|
api_base: <your-azure-endpoint>
|
||||||
|
api_key: <your-azure-api-key>
|
||||||
|
rpm: 6 # Rate limit for this deployment: in requests per minute (rpm)
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: azure/gpt-turbo-small-ca
|
||||||
|
api_base: https://my-endpoint-canada-berri992.openai.azure.com/
|
||||||
|
api_key: <your-azure-api-key>
|
||||||
|
rpm: 6
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: azure/gpt-turbo-large
|
||||||
|
api_base: https://openai-france-1234.openai.azure.com/
|
||||||
|
api_key: <your-azure-api-key>
|
||||||
|
rpm: 1440
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2: Start Proxy with config
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 3: Use proxy - Call a model group [Load Balancing]
|
||||||
|
Curl Command
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data ' {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage - Call a specific model deployment
|
||||||
|
If you want to call a specific model defined in the `config.yaml`, you can call the `litellm_params: model`
|
||||||
|
|
||||||
|
In this example it will call `azure/gpt-turbo-small-ca`. Defined in the config on Step 1
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data ' {
|
||||||
|
"model": "azure/gpt-turbo-small-ca",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Fallbacks + Retries + Timeouts + Cooldowns
|
||||||
|
|
||||||
**Set via config**
|
**Set via config**
|
||||||
```yaml
|
```yaml
|
||||||
model_list:
|
model_list:
|
||||||
|
@ -63,7 +136,143 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
'
|
'
|
||||||
```
|
```
|
||||||
|
|
||||||
## Custom Timeouts, Stream Timeouts - Per Model
|
## Advanced - Context Window Fallbacks
|
||||||
|
|
||||||
|
**Before call is made** check if a call is within model context window with **`enable_pre_call_checks: true`**.
|
||||||
|
|
||||||
|
[**See Code**](https://github.com/BerriAI/litellm/blob/c9e6b05cfb20dfb17272218e2555d6b496c47f6f/litellm/router.py#L2163)
|
||||||
|
|
||||||
|
**1. Setup config**
|
||||||
|
|
||||||
|
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with azure/.
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="same-group" label="Same Group">
|
||||||
|
|
||||||
|
Filter older instances of a model (e.g. gpt-3.5-turbo) with smaller context windows
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
router_settings:
|
||||||
|
enable_pre_call_checks: true # 1. Enable pre-call checks
|
||||||
|
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: azure/chatgpt-v-2
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_version: "2023-07-01-preview"
|
||||||
|
model_info:
|
||||||
|
base_model: azure/gpt-4-1106-preview # 2. 👈 (azure-only) SET BASE MODEL
|
||||||
|
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo-1106
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Start proxy**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Test it!**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
text = "What is the meaning of 42?" * 5000
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": text},
|
||||||
|
{"role": "user", "content": "Who was Alexander?"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="different-group" label="Context Window Fallbacks (Different Groups)">
|
||||||
|
|
||||||
|
Fallback to larger models if current model is too small.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
router_settings:
|
||||||
|
enable_pre_call_checks: true # 1. Enable pre-call checks
|
||||||
|
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo-small
|
||||||
|
litellm_params:
|
||||||
|
model: azure/chatgpt-v-2
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_version: "2023-07-01-preview"
|
||||||
|
model_info:
|
||||||
|
base_model: azure/gpt-4-1106-preview # 2. 👈 (azure-only) SET BASE MODEL
|
||||||
|
|
||||||
|
- model_name: gpt-3.5-turbo-large
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo-1106
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
|
||||||
|
- model_name: claude-opus
|
||||||
|
litellm_params:
|
||||||
|
model: claude-3-opus-20240229
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
context_window_fallbacks: [{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large", "claude-opus"]}]
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Start proxy**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Test it!**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
text = "What is the meaning of 42?" * 5000
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": text},
|
||||||
|
{"role": "user", "content": "Who was Alexander?"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Advanced - Custom Timeouts, Stream Timeouts - Per Model
|
||||||
For each model you can set `timeout` & `stream_timeout` under `litellm_params`
|
For each model you can set `timeout` & `stream_timeout` under `litellm_params`
|
||||||
```yaml
|
```yaml
|
||||||
model_list:
|
model_list:
|
||||||
|
@ -92,7 +301,7 @@ $ litellm --config /path/to/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Setting Dynamic Timeouts - Per Request
|
## Advanced - Setting Dynamic Timeouts - Per Request
|
||||||
|
|
||||||
LiteLLM Proxy supports setting a `timeout` per request
|
LiteLLM Proxy supports setting a `timeout` per request
|
||||||
|
|
||||||
|
|
|
@ -567,10 +567,14 @@ from litellm import Router
|
||||||
router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set to True
|
router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set to True
|
||||||
```
|
```
|
||||||
|
|
||||||
**2. (Azure-only) Set base model**
|
|
||||||
|
**2. Set Model List**
|
||||||
|
|
||||||
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`.
|
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="same-group" label="Same Group">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
model_list = [
|
model_list = [
|
||||||
{
|
{
|
||||||
|
@ -582,7 +586,7 @@ model_list = [
|
||||||
"api_base": os.getenv("AZURE_API_BASE"),
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
},
|
},
|
||||||
"model_info": {
|
"model_info": {
|
||||||
"base_model": "azure/gpt-35-turbo", # 👈 SET BASE MODEL
|
"base_model": "azure/gpt-35-turbo", # 👈 (Azure-only) SET BASE MODEL
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -593,8 +597,51 @@ model_list = [
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
router = Router(model_list=model_list, enable_pre_call_checks=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="different-group" label="Context Window Fallbacks (Different Groups)">
|
||||||
|
|
||||||
|
```python
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo-small", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
},
|
||||||
|
"model_info": {
|
||||||
|
"base_model": "azure/gpt-35-turbo", # 👈 (Azure-only) SET BASE MODEL
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo-large", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "gpt-3.5-turbo-1106",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "claude-opus",
|
||||||
|
"litellm_params": { call
|
||||||
|
"model": "claude-3-opus-20240229",
|
||||||
|
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
router = Router(model_list=model_list, enable_pre_call_checks=True, context_window_fallbacks=[{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large", "claude-opus"]}])
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
**3. Test it!**
|
**3. Test it!**
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -646,60 +693,9 @@ print(f"response: {response}")
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="proxy" label="Proxy">
|
<TabItem value="proxy" label="Proxy">
|
||||||
|
|
||||||
**1. Setup config**
|
:::info
|
||||||
|
Go [here](./proxy/reliability.md#advanced---context-window-fallbacks) for how to do this on the proxy
|
||||||
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with azure/.
|
:::
|
||||||
|
|
||||||
```yaml
|
|
||||||
router_settings:
|
|
||||||
enable_pre_call_checks: true # 1. Enable pre-call checks
|
|
||||||
|
|
||||||
model_list:
|
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: azure/chatgpt-v-2
|
|
||||||
api_base: os.environ/AZURE_API_BASE
|
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
api_version: "2023-07-01-preview"
|
|
||||||
model_info:
|
|
||||||
base_model: azure/gpt-4-1106-preview # 2. 👈 (azure-only) SET BASE MODEL
|
|
||||||
|
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-3.5-turbo-1106
|
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
|
||||||
```
|
|
||||||
|
|
||||||
**2. Start proxy**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
litellm --config /path/to/config.yaml
|
|
||||||
|
|
||||||
# RUNNING on http://0.0.0.0:4000
|
|
||||||
```
|
|
||||||
|
|
||||||
**3. Test it!**
|
|
||||||
|
|
||||||
```python
|
|
||||||
import openai
|
|
||||||
client = openai.OpenAI(
|
|
||||||
api_key="anything",
|
|
||||||
base_url="http://0.0.0.0:4000"
|
|
||||||
)
|
|
||||||
|
|
||||||
text = "What is the meaning of 42?" * 5000
|
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages = [
|
|
||||||
{"role": "system", "content": text},
|
|
||||||
{"role": "user", "content": "Who was Alexander?"},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
|
@ -31,24 +31,25 @@ const sidebars = {
|
||||||
"proxy/quick_start",
|
"proxy/quick_start",
|
||||||
"proxy/deploy",
|
"proxy/deploy",
|
||||||
"proxy/prod",
|
"proxy/prod",
|
||||||
"proxy/configs",
|
|
||||||
{
|
{
|
||||||
type: "link",
|
type: "link",
|
||||||
label: "📖 All Endpoints",
|
label: "📖 All Endpoints (Swagger)",
|
||||||
href: "https://litellm-api.up.railway.app/",
|
href: "https://litellm-api.up.railway.app/",
|
||||||
},
|
},
|
||||||
"proxy/enterprise",
|
"proxy/configs",
|
||||||
"proxy/user_keys",
|
"proxy/reliability",
|
||||||
"proxy/virtual_keys",
|
|
||||||
"proxy/users",
|
"proxy/users",
|
||||||
|
"proxy/user_keys",
|
||||||
|
"proxy/enterprise",
|
||||||
|
"proxy/virtual_keys",
|
||||||
"proxy/team_based_routing",
|
"proxy/team_based_routing",
|
||||||
"proxy/ui",
|
"proxy/ui",
|
||||||
"proxy/cost_tracking",
|
"proxy/cost_tracking",
|
||||||
"proxy/token_auth",
|
"proxy/token_auth",
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "🔥 Load Balancing",
|
label: "Extra Load Balancing",
|
||||||
items: ["proxy/load_balancing", "proxy/reliability"],
|
items: ["proxy/load_balancing"],
|
||||||
},
|
},
|
||||||
"proxy/model_management",
|
"proxy/model_management",
|
||||||
"proxy/health",
|
"proxy/health",
|
||||||
|
|
|
@ -2170,7 +2170,7 @@ class Router:
|
||||||
Filter out model in model group, if:
|
Filter out model in model group, if:
|
||||||
|
|
||||||
- model context window < message length
|
- model context window < message length
|
||||||
- function call and model doesn't support function calling
|
- [TODO] function call and model doesn't support function calling
|
||||||
"""
|
"""
|
||||||
verbose_router_logger.debug(
|
verbose_router_logger.debug(
|
||||||
f"Starting Pre-call checks for deployments in model={model}"
|
f"Starting Pre-call checks for deployments in model={model}"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue