(chore) remove deprecated completion_with_config() tests

2025-04-26 03:04:13 +00:00 · 2024-01-09 17:13:06 +05:30 · 2024-01-09 17:13:06 +05:30 · 248e5f3d92
commit 248e5f3d92
parent cc07ba1d6a
4 changed files with 0 additions and 295 deletions
--- a/docs/my-website/docs/completion/config.md
+++ b/docs/my-website/docs/completion/config.md
@ -1,49 +0,0 @@
 # Model Config
 Model-specific changes can make our code complicated, making it harder to debug errors. Use model configs to simplify this. 
 ### usage
 Handling prompt logic. Different models have different context windows. Use `adapt_to_prompt_size` to select the right model for the prompt (in case the current model is too small).
 ```python
 from litellm import completion_with_config 
 import os 
 config = {
    "available_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
    "adapt_to_prompt_size": True, # 👈 key change
 }
 # set env var
 os.environ["OPENAI_API_KEY"] = "your-api-key"
 os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
 sample_text = "how does a court case get to the Supreme Court?" * 1000
 messages = [{"content": sample_text, "role": "user"}]
 response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
 ```
 [**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783)
 ### Complete Config Structure
 ```python
 config = {
    "default_fallback_models": # [Optional] List of model names to try if a call fails
    "available_models": # [Optional] List of all possible models you could call 
    "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
    "model": {
        "model-name": {
            "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged. 
            "error_handling": {
                "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
                    "fallback_model": "" # str, name of the model it should try instead, when that error occurs 
                }
            }
        }
    }
 }
 ```
--- a/litellm/init.py
+++ b/litellm/init.py
@ -500,7 +500,6 @@ from .utils import (
    validate_environment,
    check_valid_key,
    get_llm_provider,
    completion_with_config,
    register_model,
    encode,
    decode,
--- a/litellm/tests/test_config.py
+++ b/litellm/tests/test_config.py
@ -1,118 +0,0 @@
 import sys, os
 import traceback
 from dotenv import load_dotenv
 load_dotenv()
 import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
 from litellm import completion_with_config
 config = {
    "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
    "model": {
        "claude-instant-1": {"needs_moderation": True},
        "gpt-3.5-turbo": {
            "error_handling": {
                "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
            }
        },
    },
 }
 def test_config_context_window_exceeded():
    try:
        sample_text = "how does a court case get to the Supreme Court?" * 1000
        messages = [{"content": sample_text, "role": "user"}]
        response = completion_with_config(
            model="gpt-3.5-turbo", messages=messages, config=config
        )
        print(response)
    except Exception as e:
        print(f"Exception: {e}")
        pytest.fail(f"An exception occurred: {e}")
 # test_config_context_window_exceeded()
 def test_config_context_moderation():
    try:
        messages = [{"role": "user", "content": "I want to kill them."}]
        response = completion_with_config(
            model="claude-instant-1", messages=messages, config=config
        )
        print(response)
    except Exception as e:
        print(f"Exception: {e}")
        pytest.fail(f"An exception occurred: {e}")
 # test_config_context_moderation()
 def test_config_context_default_fallback():
    try:
        messages = [{"role": "user", "content": "Hey, how's it going?"}]
        response = completion_with_config(
            model="claude-instant-1",
            messages=messages,
            config=config,
            api_key="bad-key",
        )
        print(response)
    except Exception as e:
        print(f"Exception: {e}")
        pytest.fail(f"An exception occurred: {e}")
 # test_config_context_default_fallback()
 config = {
    "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
    "available_models": [
        "gpt-3.5-turbo",
        "gpt-3.5-turbo-0301",
        "gpt-3.5-turbo-0613",
        "gpt-4",
        "gpt-4-0314",
        "gpt-4-0613",
        "j2-ultra",
        "command-nightly",
        "togethercomputer/llama-2-70b-chat",
        "chat-bison",
        "chat-bison@001",
        "claude-2",
    ],
    "adapt_to_prompt_size": True,  # type: ignore
    "model": {
        "claude-instant-1": {"needs_moderation": True},
        "gpt-3.5-turbo": {
            "error_handling": {
                "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
            }
        },
    },
 }
 def test_config_context_adapt_to_prompt():
    try:
        sample_text = "how does a court case get to the Supreme Court?" * 1000
        messages = [{"content": sample_text, "role": "user"}]
        response = completion_with_config(
            model="gpt-3.5-turbo", messages=messages, config=config
        )
        print(response)
    except Exception as e:
        print(f"Exception: {e}")
        pytest.fail(f"An exception occurred: {e}")
 test_config_context_adapt_to_prompt()
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -7843,133 +7843,6 @@ def read_config_args(config_path) -> dict:
 ########## experimental completion variants ############################
 def completion_with_config(config: Union[dict, str], **kwargs):
    """
    Generate a litellm.completion() using a config dict and all supported completion args
    Example config;
    config = {
        "default_fallback_models": # [Optional] List of model names to try if a call fails
        "available_models": # [Optional] List of all possible models you could call
        "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
        "model": {
            "model-name": {
                "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged.
                "error_handling": {
                    "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
                        "fallback_model": "" # str, name of the model it should try instead, when that error occurs
                    }
                }
            }
        }
    }
    Parameters:
        config (Union[dict, str]): A configuration for litellm
        **kwargs: Additional keyword arguments for litellm.completion
    Returns:
        litellm.ModelResponse: A ModelResponse with the generated completion
    """
    if config is not None:
        if isinstance(config, str):
            config = read_config_args(config)
        elif isinstance(config, dict):
            config = config
        else:
            raise Exception("Config path must be a string or a dictionary.")
    else:
        raise Exception("Config path not passed in.")
    if config is None:
        raise Exception("No completion config in the config file")
    models_with_config = config["model"].keys()
    model = kwargs["model"]
    messages = kwargs["messages"]
    ## completion config
    fallback_models = config.get("default_fallback_models", None)
    available_models = config.get("available_models", None)
    adapt_to_prompt_size = config.get("adapt_to_prompt_size", False)
    trim_messages_flag = config.get("trim_messages", False)
    prompt_larger_than_model = False
    max_model = model
    try:
        max_tokens = litellm.get_max_tokens(model)["max_tokens"]
    except:
        max_tokens = 2048  # assume curr model's max window is 2048 tokens
    if adapt_to_prompt_size:
        ## Pick model based on token window
        prompt_tokens = litellm.token_counter(
            model="gpt-3.5-turbo",
            text="".join(message["content"] for message in messages),
        )
        try:
            curr_max_tokens = litellm.get_max_tokens(model)["max_tokens"]
        except:
            curr_max_tokens = 2048
        if curr_max_tokens < prompt_tokens:
            prompt_larger_than_model = True
            for available_model in available_models:
                try:
                    curr_max_tokens = litellm.get_max_tokens(available_model)[
                        "max_tokens"
                    ]
                    if curr_max_tokens > max_tokens:
                        max_tokens = curr_max_tokens
                        max_model = available_model
                    if curr_max_tokens > prompt_tokens:
                        model = available_model
                        prompt_larger_than_model = False
                except:
                    continue
        if prompt_larger_than_model:
            messages = trim_messages(messages=messages, model=max_model)
            kwargs["messages"] = messages
    kwargs["model"] = model
    try:
        if model in models_with_config:
            ## Moderation check
            if config["model"][model].get("needs_moderation"):
                input = " ".join(message["content"] for message in messages)
                response = litellm.moderation(input=input)
                flagged = response["results"][0]["flagged"]
                if flagged:
                    raise Exception("This response was flagged as inappropriate")
            ## Model-specific Error Handling
            error_handling = None
            if config["model"][model].get("error_handling"):
                error_handling = config["model"][model]["error_handling"]
            try:
                response = litellm.completion(**kwargs)
                return response
            except Exception as e:
                exception_name = type(e).__name__
                fallback_model = None
                if error_handling and exception_name in error_handling:
                    error_handler = error_handling[exception_name]
                    # either switch model or api key
                    fallback_model = error_handler.get("fallback_model", None)
                if fallback_model:
                    kwargs["model"] = fallback_model
                    return litellm.completion(**kwargs)
                raise e
        else:
            return litellm.completion(**kwargs)
    except Exception as e:
        if fallback_models:
            model = fallback_models.pop(0)
            return completion_with_fallbacks(
                model=model, messages=messages, fallbacks=fallback_models
            )
        raise e
 def completion_with_fallbacks(**kwargs):
    nested_kwargs = kwargs.pop("kwargs", {})
    response = None