forked from phoenix/litellm-mirror
update config docs
This commit is contained in:
parent
9c97e1513c
commit
a5b780586f
2 changed files with 5 additions and 35 deletions
|
@ -4,41 +4,9 @@ Model-specific changes can make our code complicated, making it harder to debug
|
||||||
|
|
||||||
### usage
|
### usage
|
||||||
|
|
||||||
E.g. If we want to implement:
|
Handling prompt logic.
|
||||||
* Moderations check for Anthropic models (to avoid violating their safety policy)
|
|
||||||
* Model Fallbacks - specific + general
|
|
||||||
|
|
||||||
```python
|
It checks the number of tokens in the prompt and max tokens for each model. It selects the model with max tokens > prompt tokens.
|
||||||
from litellm import completion_with_config
|
|
||||||
import os
|
|
||||||
|
|
||||||
config = {
|
|
||||||
"default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
|
|
||||||
"model": {
|
|
||||||
"claude-instant-1": {
|
|
||||||
"needs_moderation": True
|
|
||||||
},
|
|
||||||
"gpt-3.5-turbo": {
|
|
||||||
"error_handling": {
|
|
||||||
"ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# set env var
|
|
||||||
os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key
|
|
||||||
os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key
|
|
||||||
|
|
||||||
|
|
||||||
sample_text = "how does a court case get to the Supreme Court?" * 1000
|
|
||||||
messages = [{"content": sample_text, "role": "user"}]
|
|
||||||
response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
|
|
||||||
```
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783)
|
|
||||||
### select model based on prompt size
|
|
||||||
|
|
||||||
You can also use model configs to automatically select a model based on the prompt size. It checks the number of tokens in the prompt and max tokens for each model. It selects the model with max tokens > prompt tokens.
|
|
||||||
|
|
||||||
If the prompt is larger than any available model, it'll automatically trim the prompt (from the middle + preserve any system message), and fit it to the largest model available.
|
If the prompt is larger than any available model, it'll automatically trim the prompt (from the middle + preserve any system message), and fit it to the largest model available.
|
||||||
|
|
||||||
|
@ -61,6 +29,8 @@ messages = [{"content": sample_text, "role": "user"}]
|
||||||
response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
|
response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783)
|
||||||
|
|
||||||
### Complete Config Structure
|
### Complete Config Structure
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 7e3bc4eb0def8056ff514910c4e31d064a631ba1
|
Subproject commit bbe0f62e3a413c184607a188ec1b9ca931fef040
|
Loading…
Add table
Add a link
Reference in a new issue