docs(routing.md): add pre-call checks to docs

This commit is contained in:
Krrish Dholakia 2024-03-23 19:10:34 -07:00
parent b7321ae4ee
commit e8e7964025
3 changed files with 94 additions and 4 deletions

View file

@ -551,6 +551,94 @@ router = Router(model_list: Optional[list] = None,
cache_responses=True)
```
## Pre-Call Checks (Context Window)
Enable pre-call checks to filter out deployments with context window limit < messages for a call.
**1. Enable pre-call checks**
```python
from litellm import Router
# ...
router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set to True
```
**2. (Azure-only) Set base model**
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`.
```python
model_list = [
{
"model_name": "gpt-3.5-turbo", # model group name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
"model_info": {
"base_model": "azure/gpt-35-turbo", # 👈 SET BASE MODEL
}
},
{
"model_name": "gpt-3.5-turbo", # model group name
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo-1106",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
]
```
**3. Test it!**
```python
"""
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
- Send a 5k prompt
- Assert it works
"""
from litellm import Router
import os
try:
model_list = [
{
"model_name": "gpt-3.5-turbo", # model group name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
"model_info": {
"base_model": "azure/gpt-35-turbo",
}
},
{
"model_name": "gpt-3.5-turbo", # model group name
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo-1106",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
]
router = Router(model_list=model_list, enable_pre_call_checks=True)
text = "What is the meaning of 42?" * 5000
response = router.completion(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": text},
{"role": "user", "content": "Who was Alexander?"},
],
)
print(f"response: {response}")
```
## Caching across model groups
If you want to cache across 2 different model groups (e.g. azure deployments, and openai), use caching groups.

View file

@ -2181,6 +2181,8 @@ class Router:
for idx, deployment in enumerate(_returned_deployments):
# see if we have the info for this model
try:
base_model = deployment.get("model_info", {}).get("base_model", None)
if base_model is None:
base_model = deployment.get("litellm_params", {}).get(
"base_model", None
)

View file

@ -301,7 +301,7 @@ def test_router_azure_acompletion():
def test_router_context_window_check():
"""
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
- Send a 10k prompt
- Send a 5k prompt
- Assert it works
"""
from large_text import text