add docs on using completion with configs

2023-09-21 17:01:56 -07:00 · 2023-09-21 17:01:56 -07:00 · e0c2bd7cf1
commit e0c2bd7cf1
parent 30724d9e51
6 changed files with 92 additions and 19 deletions
--- a/docs/my-website/docs/completion/config.md
+++ b/docs/my-website/docs/completion/config.md
@ -0,0 +1,81 @@
 # Model Config
 Model-specific changes can make our code complicated, making it harder to debug errors. Use model configs to simplify this. 
 ### usage
 E.g. If we want to implement: 
 * Moderations check for Anthropic models (to avoid violating their safety policy)
 * Model Fallbacks - specific + general
 ```python
 from litellm import completion_with_config 
 import os 
 config = {
    "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
    "model": {
        "claude-instant-1": {
            "needs_moderation": True
        },
        "gpt-3.5-turbo": {
            "error_handling": {
                "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"} 
            }
        },
    }
 }
 # set env var
 os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key
 os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key
 sample_text = "how does a court case get to the Supreme Court?" * 1000
 messages = [{"content": sample_text, "role": "user"}]
 response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
 ```
 [**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783)
 ### select model based on prompt size 
 You can also use model configs to automatically select a model based on the prompt size. It checks the number of tokens in the prompt and max tokens for each model. It selects the model with max tokens > prompt tokens. 
 ```python
 from litellm import completion_with_config 
 import os 
 config = {
    "available_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
    "adapt_to_prompt_size": True, # 👈 key change
 }
 # set env var
 os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key
 os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key
 sample_text = "how does a court case get to the Supreme Court?" * 1000
 messages = [{"content": sample_text, "role": "user"}]
 response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
 ```
 ### Complete Config Structure
 ```python
 config = {
    "function": "completion", 
    "default_fallback_models": # [Optional] List of model names to try if a call fails
    "available_models": # [Optional] List of all possible models you could call 
    "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
    "model": {
        "model-name": {
            "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged. 
            "error_handling": {
                "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
                    "fallback_model": "" # str, name of the model it should try instead, when that error occurs 
                }
            }
        }
    }
 }
 ```
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -35,6 +35,7 @@ const sidebars = {
        "completion/message_trimming",
        "completion/model_alias", 
        "completion/reliable_completions", 
        "completion/config",
        "completion/batching",
        "completion/mock_requests",
      ],
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/tests/test_config.py
+++ b/litellm/tests/test_config.py
@ -13,7 +13,6 @@ import litellm
 from litellm import completion_with_config
 config = {
    "function": "completion",
    "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
    "model": {
        "claude-instant-1": {
@ -63,7 +62,6 @@ def test_config_context_default_fallback():
 config = {
    "function": "completion",
    "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
    "available_models": ["gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-4", "gpt-4-0314", "gpt-4-0613", 
                         "j2-ultra", "command-nightly", "togethercomputer/llama-2-70b-chat", "chat-bison", "chat-bison@001", "claude-2"],
@ -90,4 +88,4 @@ def test_config_context_adapt_to_prompt():
        print(f"Exception: {e}")
        pytest.fail(f"An exception occurred: {e}")
-test_config_context_adapt_to_prompt() 
+# test_config_context_adapt_to_prompt() 
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2780,7 +2780,7 @@ def read_config_args(config_path) -> dict:
 ########## experimental completion variants ############################
-def completion_with_config(*, config: Union[dict, str], **kwargs):
+def completion_with_config(config: Union[dict, str], **kwargs):
    if config is not None:
        if isinstance(config, str):
            config = read_config_args(config)
@ -2791,23 +2791,17 @@ def completion_with_config(*, config: Union[dict, str], **kwargs):
    else:
        raise Exception("Config path not passed in.")
-    ## load the completion config 
+    if config is None:
    completion_config = None
    if config["function"] == "completion":
        completion_config = config
    if completion_config is None:
        raise Exception("No completion config in the config file")
-    models_with_config = completion_config["model"].keys() 
+    models_with_config = config["model"].keys() 
    model = kwargs["model"]
    messages = kwargs["messages"]
    ## completion config
-    fallback_models = completion_config.get("default_fallback_models", None)
+    fallback_models = config.get("default_fallback_models", None)
-    available_models = completion_config.get("available_models", None)
+    available_models = config.get("available_models", None)
-    adapt_to_prompt_size = completion_config.get("adapt_to_prompt_size", False)
+    adapt_to_prompt_size = config.get("adapt_to_prompt_size", False)
    start_time = time.time()
    if adapt_to_prompt_size:
        ## Pick model based on token window 
@ -2829,7 +2823,7 @@ def completion_with_config(*, config: Union[dict, str], **kwargs):
    try: 
        if model in models_with_config: 
            ## Moderation check
-            if completion_config["model"][model].get("needs_moderation"):
+            if config["model"][model].get("needs_moderation"):
                input = " ".join(message["content"] for message in messages)
                response = litellm.moderation(input=input)
                flagged = response["results"][0]["flagged"]
@ -2838,8 +2832,8 @@ def completion_with_config(*, config: Union[dict, str], **kwargs):
            ## Model-specific Error Handling
            error_handling = None
-            if completion_config["model"][model].get("error_handling"):
+            if config["model"][model].get("error_handling"):
-                error_handling = completion_config["model"][model]["error_handling"]
+                error_handling = config["model"][model]["error_handling"]
            try:
                response = litellm.completion(**kwargs)
@ -2968,7 +2962,6 @@ def completion_with_fallbacks(**kwargs):
                    return response
            except Exception as e:
                print(f"got exception {e} for model {model}")
                rate_limited_models.add(model)
                model_expiration_times[model] = (
                    time.time() + 60