From a5b780586f345dc30268f93b5a26200131db3441 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 21 Sep 2023 18:30:48 -0700 Subject: [PATCH] update config docs --- docs/my-website/docs/completion/config.md | 38 +++-------------------- proxy-server | 2 +- 2 files changed, 5 insertions(+), 35 deletions(-) diff --git a/docs/my-website/docs/completion/config.md b/docs/my-website/docs/completion/config.md index 2a4380c44..4ccc8f421 100644 --- a/docs/my-website/docs/completion/config.md +++ b/docs/my-website/docs/completion/config.md @@ -4,41 +4,9 @@ Model-specific changes can make our code complicated, making it harder to debug ### usage -E.g. If we want to implement: -* Moderations check for Anthropic models (to avoid violating their safety policy) -* Model Fallbacks - specific + general +Handling prompt logic. -```python -from litellm import completion_with_config -import os - -config = { - "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"], - "model": { - "claude-instant-1": { - "needs_moderation": True - }, - "gpt-3.5-turbo": { - "error_handling": { - "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"} - } - }, - } -} - -# set env var -os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key -os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key - - -sample_text = "how does a court case get to the Supreme Court?" * 1000 -messages = [{"content": sample_text, "role": "user"}] -response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config) -``` -[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783) -### select model based on prompt size - -You can also use model configs to automatically select a model based on the prompt size. It checks the number of tokens in the prompt and max tokens for each model. It selects the model with max tokens > prompt tokens. +It checks the number of tokens in the prompt and max tokens for each model. It selects the model with max tokens > prompt tokens. If the prompt is larger than any available model, it'll automatically trim the prompt (from the middle + preserve any system message), and fit it to the largest model available. @@ -61,6 +29,8 @@ messages = [{"content": sample_text, "role": "user"}] response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config) ``` +[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783) + ### Complete Config Structure ```python diff --git a/proxy-server b/proxy-server index 7e3bc4eb0..bbe0f62e3 160000 --- a/proxy-server +++ b/proxy-server @@ -1 +1 @@ -Subproject commit 7e3bc4eb0def8056ff514910c4e31d064a631ba1 +Subproject commit bbe0f62e3a413c184607a188ec1b9ca931fef040