mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
docs(routing.md): add pre-call checks to docs
This commit is contained in:
parent
292cdd81e4
commit
8c6402b02d
3 changed files with 94 additions and 4 deletions
|
@ -551,6 +551,94 @@ router = Router(model_list: Optional[list] = None,
|
|||
cache_responses=True)
|
||||
```
|
||||
|
||||
## Pre-Call Checks (Context Window)
|
||||
|
||||
Enable pre-call checks to filter out deployments with context window limit < messages for a call.
|
||||
|
||||
**1. Enable pre-call checks**
|
||||
```python
|
||||
from litellm import Router
|
||||
# ...
|
||||
router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set to True
|
||||
```
|
||||
|
||||
**2. (Azure-only) Set base model**
|
||||
|
||||
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`.
|
||||
|
||||
```python
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
"model_info": {
|
||||
"base_model": "azure/gpt-35-turbo", # 👈 SET BASE MODEL
|
||||
}
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
]
|
||||
```
|
||||
|
||||
**3. Test it!**
|
||||
|
||||
```python
|
||||
"""
|
||||
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
||||
- Send a 5k prompt
|
||||
- Assert it works
|
||||
"""
|
||||
from litellm import Router
|
||||
import os
|
||||
|
||||
try:
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
},
|
||||
"model_info": {
|
||||
"base_model": "azure/gpt-35-turbo",
|
||||
}
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # model group name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo-1106",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list, enable_pre_call_checks=True)
|
||||
|
||||
text = "What is the meaning of 42?" * 5000
|
||||
|
||||
response = router.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": text},
|
||||
{"role": "user", "content": "Who was Alexander?"},
|
||||
],
|
||||
)
|
||||
|
||||
print(f"response: {response}")
|
||||
```
|
||||
|
||||
## Caching across model groups
|
||||
|
||||
If you want to cache across 2 different model groups (e.g. azure deployments, and openai), use caching groups.
|
||||
|
|
|
@ -2181,6 +2181,8 @@ class Router:
|
|||
for idx, deployment in enumerate(_returned_deployments):
|
||||
# see if we have the info for this model
|
||||
try:
|
||||
base_model = deployment.get("model_info", {}).get("base_model", None)
|
||||
if base_model is None:
|
||||
base_model = deployment.get("litellm_params", {}).get(
|
||||
"base_model", None
|
||||
)
|
||||
|
|
|
@ -301,7 +301,7 @@ def test_router_azure_acompletion():
|
|||
def test_router_context_window_check():
|
||||
"""
|
||||
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
||||
- Send a 10k prompt
|
||||
- Send a 5k prompt
|
||||
- Assert it works
|
||||
"""
|
||||
from large_text import text
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue