diff --git a/docs/my-website/docs/completion/reliable_completions.md b/docs/my-website/docs/completion/reliable_completions.md index e9a0a2f6c..a58975c1b 100644 --- a/docs/my-website/docs/completion/reliable_completions.md +++ b/docs/my-website/docs/completion/reliable_completions.md @@ -1,10 +1,36 @@ # Reliability + + +## Helper utils LiteLLM supports the following functions for reliability: +* `litellm.longer_context_model_fallback_dict`: Dictionary which has a mapping for those models which have larger equivalents * `completion_with_retries`: use tenacity retries * `completion()` with fallback models: set `fallback_models=['gpt-3.5-turbo', 'command-nightly', 'llama2`]. If primary model fails try fallback models -## Completion with Retries +## Context Window Errors + +```python +from litellm import longer_context_model_fallback_dict, ContextWindowExceededError + +sample_text = "how does a court case get to the Supreme Court?" * 1000 +messages = [{"content": user_message, "role": "user"}] +model = "gpt-3.5-turbo" +try: + # try the original model + response = completion(model=model, messages=messages) +# catch the context window error +except ContextWindowExceededError as e: + if model in longer_context_model_fallback_dict: + # switch to the equivalent larger model -> gpt.3.5-turbo-16k + new_model = longer_context_model_fallback_dict[model] + response = completion(new_model, messages) + +print(response) +``` + + +## Retry failed requests You can use this as a drop-in replacement for the `completion()` function to use tenacity retries - by default we retry the call 3 times. @@ -29,7 +55,7 @@ def test_completion_custom_provider_model_name(): printf"Error occurred: {e}") ``` -## Completion with Fallbacks +## Specify fallback models LLM APIs can be unstable, completion() with fallbacks ensures you'll always get a response from your calls ## Usage diff --git a/litellm/__init__.py b/litellm/__init__.py index ce1cace84..df81cf7b3 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -261,7 +261,7 @@ models_by_provider: dict = { } # mapping for those models which have larger equivalents -longer_context_model_fallback_dict = { +longer_context_model_fallback_dict: dict = { # openai chat completion models "gpt-3.5-turbo": "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301", diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index 05a7c09e2..d18aa4f22 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/tests/test_longer_context_fallback.py b/litellm/tests/test_longer_context_fallback.py new file mode 100644 index 000000000..8f8942897 --- /dev/null +++ b/litellm/tests/test_longer_context_fallback.py @@ -0,0 +1,13 @@ +#### What this tests #### +# This tests context fallback dict + +import sys, os +import traceback +import pytest +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import litellm +from litellm import longer_context_model_fallback_dict + +print(longer_context_model_fallback_dict) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index d68537b92..6a70ee9c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.749" +version = "0.1.750" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"