forked from phoenix/litellm-mirror
docs(routing.md): add pre-call checks to docs
This commit is contained in:
parent
b7321ae4ee
commit
e8e7964025
3 changed files with 94 additions and 4 deletions
|
@ -551,6 +551,94 @@ router = Router(model_list: Optional[list] = None,
|
|||
cache_responses=True)
|
||||
```
|
||||
|
||||
## Pre-Call Checks (Context Window)
|
||||
|
||||
Enable pre-call checks to filter out deployments with context window limit < messages for a call.
|
||||
|
||||
**1. Enable pre-call checks**
|
||||
```python
|
||||
from litellm import Router
|
||||
# ...
|
||||
router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set to True
|
||||
```
|
||||
|
||||
**2. (Azure-only) Set base model**
|
||||
|
||||
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`.
|
||||
|
||||
```python
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
"model_info": {
|
||||
"base_model": "azure/gpt-35-turbo", # 👈 SET BASE MODEL
|
||||
}
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
]
|
||||
```
|
||||
|
||||
**3. Test it!**
|
||||
|
||||
```python
|
||||
"""
|
||||
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
||||
- Send a 5k prompt
|
||||
- Assert it works
|
||||
"""
|
||||
from litellm import Router
|
||||
import os
|
||||
|
||||
try:
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
"model_info": {
|
||||
"base_model": "azure/gpt-35-turbo",
|
||||
}
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list, enable_pre_call_checks=True)
|
||||
|
||||
text = "What is the meaning of 42?" * 5000
|
||||
|
||||
response = router.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": text},
|
||||
{"role": "user", "content": "Who was Alexander?"},
|
||||
],
|
||||
)
|
||||
|
||||
print(f"response: {response}")
|
||||
```
|
||||
|
||||
## Caching across model groups
|
||||
|
||||
If you want to cache across 2 different model groups (e.g. azure deployments, and openai), use caching groups.
|
||||
|
|
|
@ -2181,9 +2181,11 @@ class Router:
|
|||
for idx, deployment in enumerate(_returned_deployments):
|
||||
# see if we have the info for this model
|
||||
try:
|
||||
base_model = deployment.get("litellm_params", {}).get(
|
||||
"base_model", None
|
||||
)
|
||||
base_model = deployment.get("model_info", {}).get("base_model", None)
|
||||
if base_model is None:
|
||||
base_model = deployment.get("litellm_params", {}).get(
|
||||
"base_model", None
|
||||
)
|
||||
model = base_model or deployment.get("litellm_params", {}).get(
|
||||
"model", None
|
||||
)
|
||||
|
|
|
@ -301,7 +301,7 @@ def test_router_azure_acompletion():
|
|||
def test_router_context_window_check():
|
||||
"""
|
||||
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
||||
- Send a 10k prompt
|
||||
- Send a 5k prompt
|
||||
- Assert it works
|
||||
"""
|
||||
from large_text import text
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue