forked from phoenix/litellm-mirror
add docs on using completion with configs
This commit is contained in:
parent
30724d9e51
commit
e0c2bd7cf1
6 changed files with 92 additions and 19 deletions
81
docs/my-website/docs/completion/config.md
Normal file
81
docs/my-website/docs/completion/config.md
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
# Model Config
|
||||||
|
|
||||||
|
Model-specific changes can make our code complicated, making it harder to debug errors. Use model configs to simplify this.
|
||||||
|
|
||||||
|
### usage
|
||||||
|
|
||||||
|
E.g. If we want to implement:
|
||||||
|
* Moderations check for Anthropic models (to avoid violating their safety policy)
|
||||||
|
* Model Fallbacks - specific + general
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion_with_config
|
||||||
|
import os
|
||||||
|
|
||||||
|
config = {
|
||||||
|
"default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
|
||||||
|
"model": {
|
||||||
|
"claude-instant-1": {
|
||||||
|
"needs_moderation": True
|
||||||
|
},
|
||||||
|
"gpt-3.5-turbo": {
|
||||||
|
"error_handling": {
|
||||||
|
"ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# set env var
|
||||||
|
os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key
|
||||||
|
|
||||||
|
|
||||||
|
sample_text = "how does a court case get to the Supreme Court?" * 1000
|
||||||
|
messages = [{"content": sample_text, "role": "user"}]
|
||||||
|
response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
|
||||||
|
```
|
||||||
|
[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783)
|
||||||
|
### select model based on prompt size
|
||||||
|
|
||||||
|
You can also use model configs to automatically select a model based on the prompt size. It checks the number of tokens in the prompt and max tokens for each model. It selects the model with max tokens > prompt tokens.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion_with_config
|
||||||
|
import os
|
||||||
|
|
||||||
|
config = {
|
||||||
|
"available_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
|
||||||
|
"adapt_to_prompt_size": True, # 👈 key change
|
||||||
|
}
|
||||||
|
|
||||||
|
# set env var
|
||||||
|
os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key
|
||||||
|
|
||||||
|
|
||||||
|
sample_text = "how does a court case get to the Supreme Court?" * 1000
|
||||||
|
messages = [{"content": sample_text, "role": "user"}]
|
||||||
|
response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Complete Config Structure
|
||||||
|
|
||||||
|
```python
|
||||||
|
config = {
|
||||||
|
"function": "completion",
|
||||||
|
"default_fallback_models": # [Optional] List of model names to try if a call fails
|
||||||
|
"available_models": # [Optional] List of all possible models you could call
|
||||||
|
"adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
|
||||||
|
"model": {
|
||||||
|
"model-name": {
|
||||||
|
"needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged.
|
||||||
|
"error_handling": {
|
||||||
|
"error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
|
||||||
|
"fallback_model": "" # str, name of the model it should try instead, when that error occurs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
|
@ -35,6 +35,7 @@ const sidebars = {
|
||||||
"completion/message_trimming",
|
"completion/message_trimming",
|
||||||
"completion/model_alias",
|
"completion/model_alias",
|
||||||
"completion/reliable_completions",
|
"completion/reliable_completions",
|
||||||
|
"completion/config",
|
||||||
"completion/batching",
|
"completion/batching",
|
||||||
"completion/mock_requests",
|
"completion/mock_requests",
|
||||||
],
|
],
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -13,7 +13,6 @@ import litellm
|
||||||
from litellm import completion_with_config
|
from litellm import completion_with_config
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
"function": "completion",
|
|
||||||
"default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
|
"default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
|
||||||
"model": {
|
"model": {
|
||||||
"claude-instant-1": {
|
"claude-instant-1": {
|
||||||
|
@ -63,7 +62,6 @@ def test_config_context_default_fallback():
|
||||||
|
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
"function": "completion",
|
|
||||||
"default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
|
"default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
|
||||||
"available_models": ["gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-4", "gpt-4-0314", "gpt-4-0613",
|
"available_models": ["gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-4", "gpt-4-0314", "gpt-4-0613",
|
||||||
"j2-ultra", "command-nightly", "togethercomputer/llama-2-70b-chat", "chat-bison", "chat-bison@001", "claude-2"],
|
"j2-ultra", "command-nightly", "togethercomputer/llama-2-70b-chat", "chat-bison", "chat-bison@001", "claude-2"],
|
||||||
|
@ -90,4 +88,4 @@ def test_config_context_adapt_to_prompt():
|
||||||
print(f"Exception: {e}")
|
print(f"Exception: {e}")
|
||||||
pytest.fail(f"An exception occurred: {e}")
|
pytest.fail(f"An exception occurred: {e}")
|
||||||
|
|
||||||
test_config_context_adapt_to_prompt()
|
# test_config_context_adapt_to_prompt()
|
|
@ -2780,7 +2780,7 @@ def read_config_args(config_path) -> dict:
|
||||||
|
|
||||||
########## experimental completion variants ############################
|
########## experimental completion variants ############################
|
||||||
|
|
||||||
def completion_with_config(*, config: Union[dict, str], **kwargs):
|
def completion_with_config(config: Union[dict, str], **kwargs):
|
||||||
if config is not None:
|
if config is not None:
|
||||||
if isinstance(config, str):
|
if isinstance(config, str):
|
||||||
config = read_config_args(config)
|
config = read_config_args(config)
|
||||||
|
@ -2791,23 +2791,17 @@ def completion_with_config(*, config: Union[dict, str], **kwargs):
|
||||||
else:
|
else:
|
||||||
raise Exception("Config path not passed in.")
|
raise Exception("Config path not passed in.")
|
||||||
|
|
||||||
## load the completion config
|
if config is None:
|
||||||
completion_config = None
|
|
||||||
|
|
||||||
if config["function"] == "completion":
|
|
||||||
completion_config = config
|
|
||||||
|
|
||||||
if completion_config is None:
|
|
||||||
raise Exception("No completion config in the config file")
|
raise Exception("No completion config in the config file")
|
||||||
|
|
||||||
models_with_config = completion_config["model"].keys()
|
models_with_config = config["model"].keys()
|
||||||
model = kwargs["model"]
|
model = kwargs["model"]
|
||||||
messages = kwargs["messages"]
|
messages = kwargs["messages"]
|
||||||
|
|
||||||
## completion config
|
## completion config
|
||||||
fallback_models = completion_config.get("default_fallback_models", None)
|
fallback_models = config.get("default_fallback_models", None)
|
||||||
available_models = completion_config.get("available_models", None)
|
available_models = config.get("available_models", None)
|
||||||
adapt_to_prompt_size = completion_config.get("adapt_to_prompt_size", False)
|
adapt_to_prompt_size = config.get("adapt_to_prompt_size", False)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
if adapt_to_prompt_size:
|
if adapt_to_prompt_size:
|
||||||
## Pick model based on token window
|
## Pick model based on token window
|
||||||
|
@ -2829,7 +2823,7 @@ def completion_with_config(*, config: Union[dict, str], **kwargs):
|
||||||
try:
|
try:
|
||||||
if model in models_with_config:
|
if model in models_with_config:
|
||||||
## Moderation check
|
## Moderation check
|
||||||
if completion_config["model"][model].get("needs_moderation"):
|
if config["model"][model].get("needs_moderation"):
|
||||||
input = " ".join(message["content"] for message in messages)
|
input = " ".join(message["content"] for message in messages)
|
||||||
response = litellm.moderation(input=input)
|
response = litellm.moderation(input=input)
|
||||||
flagged = response["results"][0]["flagged"]
|
flagged = response["results"][0]["flagged"]
|
||||||
|
@ -2838,8 +2832,8 @@ def completion_with_config(*, config: Union[dict, str], **kwargs):
|
||||||
|
|
||||||
## Model-specific Error Handling
|
## Model-specific Error Handling
|
||||||
error_handling = None
|
error_handling = None
|
||||||
if completion_config["model"][model].get("error_handling"):
|
if config["model"][model].get("error_handling"):
|
||||||
error_handling = completion_config["model"][model]["error_handling"]
|
error_handling = config["model"][model]["error_handling"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = litellm.completion(**kwargs)
|
response = litellm.completion(**kwargs)
|
||||||
|
@ -2968,7 +2962,6 @@ def completion_with_fallbacks(**kwargs):
|
||||||
return response
|
return response
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"got exception {e} for model {model}")
|
|
||||||
rate_limited_models.add(model)
|
rate_limited_models.add(model)
|
||||||
model_expiration_times[model] = (
|
model_expiration_times[model] = (
|
||||||
time.time() + 60
|
time.time() + 60
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue