forked from phoenix/litellm-mirror
docs(routing.md): add pre-call checks to docs
This commit is contained in:
parent
b7321ae4ee
commit
e8e7964025
3 changed files with 94 additions and 4 deletions
|
@ -551,6 +551,94 @@ router = Router(model_list: Optional[list] = None,
|
||||||
cache_responses=True)
|
cache_responses=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Pre-Call Checks (Context Window)
|
||||||
|
|
||||||
|
Enable pre-call checks to filter out deployments with context window limit < messages for a call.
|
||||||
|
|
||||||
|
**1. Enable pre-call checks**
|
||||||
|
```python
|
||||||
|
from litellm import Router
|
||||||
|
# ...
|
||||||
|
router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set to True
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. (Azure-only) Set base model**
|
||||||
|
|
||||||
|
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
},
|
||||||
|
"model_info": {
|
||||||
|
"base_model": "azure/gpt-35-turbo", # 👈 SET BASE MODEL
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "gpt-3.5-turbo-1106",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Test it!**
|
||||||
|
|
||||||
|
```python
|
||||||
|
"""
|
||||||
|
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
||||||
|
- Send a 5k prompt
|
||||||
|
- Assert it works
|
||||||
|
"""
|
||||||
|
from litellm import Router
|
||||||
|
import os
|
||||||
|
|
||||||
|
try:
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
},
|
||||||
|
"model_info": {
|
||||||
|
"base_model": "azure/gpt-35-turbo",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "gpt-3.5-turbo-1106",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
router = Router(model_list=model_list, enable_pre_call_checks=True)
|
||||||
|
|
||||||
|
text = "What is the meaning of 42?" * 5000
|
||||||
|
|
||||||
|
response = router.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": text},
|
||||||
|
{"role": "user", "content": "Who was Alexander?"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
```
|
||||||
|
|
||||||
## Caching across model groups
|
## Caching across model groups
|
||||||
|
|
||||||
If you want to cache across 2 different model groups (e.g. azure deployments, and openai), use caching groups.
|
If you want to cache across 2 different model groups (e.g. azure deployments, and openai), use caching groups.
|
||||||
|
|
|
@ -2181,6 +2181,8 @@ class Router:
|
||||||
for idx, deployment in enumerate(_returned_deployments):
|
for idx, deployment in enumerate(_returned_deployments):
|
||||||
# see if we have the info for this model
|
# see if we have the info for this model
|
||||||
try:
|
try:
|
||||||
|
base_model = deployment.get("model_info", {}).get("base_model", None)
|
||||||
|
if base_model is None:
|
||||||
base_model = deployment.get("litellm_params", {}).get(
|
base_model = deployment.get("litellm_params", {}).get(
|
||||||
"base_model", None
|
"base_model", None
|
||||||
)
|
)
|
||||||
|
|
|
@ -301,7 +301,7 @@ def test_router_azure_acompletion():
|
||||||
def test_router_context_window_check():
|
def test_router_context_window_check():
|
||||||
"""
|
"""
|
||||||
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
||||||
- Send a 10k prompt
|
- Send a 5k prompt
|
||||||
- Assert it works
|
- Assert it works
|
||||||
"""
|
"""
|
||||||
from large_text import text
|
from large_text import text
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue