mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
docs(routing.md): add pre-call checks to docs
This commit is contained in:
parent
292cdd81e4
commit
8c6402b02d
3 changed files with 94 additions and 4 deletions
|
@ -551,6 +551,94 @@ router = Router(model_list: Optional[list] = None,
|
||||||
cache_responses=True)
|
cache_responses=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Pre-Call Checks (Context Window)
|
||||||
|
|
||||||
|
Enable pre-call checks to filter out deployments with context window limit < messages for a call.
|
||||||
|
|
||||||
|
**1. Enable pre-call checks**
|
||||||
|
```python
|
||||||
|
from litellm import Router
|
||||||
|
# ...
|
||||||
|
router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set to True
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. (Azure-only) Set base model**
|
||||||
|
|
||||||
|
For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
},
|
||||||
|
"model_info": {
|
||||||
|
"base_model": "azure/gpt-35-turbo", # 👈 SET BASE MODEL
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "gpt-3.5-turbo-1106",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Test it!**
|
||||||
|
|
||||||
|
```python
|
||||||
|
"""
|
||||||
|
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
||||||
|
- Send a 5k prompt
|
||||||
|
- Assert it works
|
||||||
|
"""
|
||||||
|
from litellm import Router
|
||||||
|
import os
|
||||||
|
|
||||||
|
try:
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
},
|
||||||
|
"model_info": {
|
||||||
|
"base_model": "azure/gpt-35-turbo",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo", # model group name
|
||||||
|
"litellm_params": { # params for litellm completion/embedding call
|
||||||
|
"model": "gpt-3.5-turbo-1106",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
router = Router(model_list=model_list, enable_pre_call_checks=True)
|
||||||
|
|
||||||
|
text = "What is the meaning of 42?" * 5000
|
||||||
|
|
||||||
|
response = router.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": text},
|
||||||
|
{"role": "user", "content": "Who was Alexander?"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
```
|
||||||
|
|
||||||
## Caching across model groups
|
## Caching across model groups
|
||||||
|
|
||||||
If you want to cache across 2 different model groups (e.g. azure deployments, and openai), use caching groups.
|
If you want to cache across 2 different model groups (e.g. azure deployments, and openai), use caching groups.
|
||||||
|
|
|
@ -2181,9 +2181,11 @@ class Router:
|
||||||
for idx, deployment in enumerate(_returned_deployments):
|
for idx, deployment in enumerate(_returned_deployments):
|
||||||
# see if we have the info for this model
|
# see if we have the info for this model
|
||||||
try:
|
try:
|
||||||
base_model = deployment.get("litellm_params", {}).get(
|
base_model = deployment.get("model_info", {}).get("base_model", None)
|
||||||
"base_model", None
|
if base_model is None:
|
||||||
)
|
base_model = deployment.get("litellm_params", {}).get(
|
||||||
|
"base_model", None
|
||||||
|
)
|
||||||
model = base_model or deployment.get("litellm_params", {}).get(
|
model = base_model or deployment.get("litellm_params", {}).get(
|
||||||
"model", None
|
"model", None
|
||||||
)
|
)
|
||||||
|
|
|
@ -301,7 +301,7 @@ def test_router_azure_acompletion():
|
||||||
def test_router_context_window_check():
|
def test_router_context_window_check():
|
||||||
"""
|
"""
|
||||||
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
|
||||||
- Send a 10k prompt
|
- Send a 5k prompt
|
||||||
- Assert it works
|
- Assert it works
|
||||||
"""
|
"""
|
||||||
from large_text import text
|
from large_text import text
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue