diff --git a/docs/my-website/docs/completion/config.md b/docs/my-website/docs/completion/config.md deleted file mode 100644 index 44ff6e8a70..0000000000 --- a/docs/my-website/docs/completion/config.md +++ /dev/null @@ -1,49 +0,0 @@ -# Model Config - -Model-specific changes can make our code complicated, making it harder to debug errors. Use model configs to simplify this. - -### usage - -Handling prompt logic. Different models have different context windows. Use `adapt_to_prompt_size` to select the right model for the prompt (in case the current model is too small). - - -```python -from litellm import completion_with_config -import os - -config = { - "available_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"], - "adapt_to_prompt_size": True, # 👈 key change -} - -# set env var -os.environ["OPENAI_API_KEY"] = "your-api-key" -os.environ["ANTHROPIC_API_KEY"] = "your-api-key" - - -sample_text = "how does a court case get to the Supreme Court?" * 1000 -messages = [{"content": sample_text, "role": "user"}] -response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config) -``` - -[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783) - -### Complete Config Structure - -```python -config = { - "default_fallback_models": # [Optional] List of model names to try if a call fails - "available_models": # [Optional] List of all possible models you could call - "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models) - "model": { - "model-name": { - "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged. - "error_handling": { - "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list - "fallback_model": "" # str, name of the model it should try instead, when that error occurs - } - } - } - } -} -``` \ No newline at end of file diff --git a/litellm/__init__.py b/litellm/__init__.py index f848dd3243..018b8bb148 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -500,7 +500,6 @@ from .utils import ( validate_environment, check_valid_key, get_llm_provider, - completion_with_config, register_model, encode, decode, diff --git a/litellm/tests/test_config.py b/litellm/tests/test_config.py deleted file mode 100644 index 69e37cf87c..0000000000 --- a/litellm/tests/test_config.py +++ /dev/null @@ -1,118 +0,0 @@ -import sys, os -import traceback -from dotenv import load_dotenv - -load_dotenv() -import os - -sys.path.insert( - 0, os.path.abspath("../..") -) # Adds the parent directory to the system path -import pytest -import litellm -from litellm import completion_with_config - -config = { - "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"], - "model": { - "claude-instant-1": {"needs_moderation": True}, - "gpt-3.5-turbo": { - "error_handling": { - "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"} - } - }, - }, -} - - -def test_config_context_window_exceeded(): - try: - sample_text = "how does a court case get to the Supreme Court?" * 1000 - messages = [{"content": sample_text, "role": "user"}] - response = completion_with_config( - model="gpt-3.5-turbo", messages=messages, config=config - ) - print(response) - except Exception as e: - print(f"Exception: {e}") - pytest.fail(f"An exception occurred: {e}") - - -# test_config_context_window_exceeded() - - -def test_config_context_moderation(): - try: - messages = [{"role": "user", "content": "I want to kill them."}] - response = completion_with_config( - model="claude-instant-1", messages=messages, config=config - ) - print(response) - except Exception as e: - print(f"Exception: {e}") - pytest.fail(f"An exception occurred: {e}") - - -# test_config_context_moderation() - - -def test_config_context_default_fallback(): - try: - messages = [{"role": "user", "content": "Hey, how's it going?"}] - response = completion_with_config( - model="claude-instant-1", - messages=messages, - config=config, - api_key="bad-key", - ) - print(response) - except Exception as e: - print(f"Exception: {e}") - pytest.fail(f"An exception occurred: {e}") - - -# test_config_context_default_fallback() - - -config = { - "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"], - "available_models": [ - "gpt-3.5-turbo", - "gpt-3.5-turbo-0301", - "gpt-3.5-turbo-0613", - "gpt-4", - "gpt-4-0314", - "gpt-4-0613", - "j2-ultra", - "command-nightly", - "togethercomputer/llama-2-70b-chat", - "chat-bison", - "chat-bison@001", - "claude-2", - ], - "adapt_to_prompt_size": True, # type: ignore - "model": { - "claude-instant-1": {"needs_moderation": True}, - "gpt-3.5-turbo": { - "error_handling": { - "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"} - } - }, - }, -} - - -def test_config_context_adapt_to_prompt(): - try: - sample_text = "how does a court case get to the Supreme Court?" * 1000 - messages = [{"content": sample_text, "role": "user"}] - response = completion_with_config( - model="gpt-3.5-turbo", messages=messages, config=config - ) - print(response) - except Exception as e: - print(f"Exception: {e}") - pytest.fail(f"An exception occurred: {e}") - - -test_config_context_adapt_to_prompt() diff --git a/litellm/utils.py b/litellm/utils.py index 4520bee621..47c5695bb8 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -7843,133 +7843,6 @@ def read_config_args(config_path) -> dict: ########## experimental completion variants ############################ -def completion_with_config(config: Union[dict, str], **kwargs): - """ - Generate a litellm.completion() using a config dict and all supported completion args - - Example config; - config = { - "default_fallback_models": # [Optional] List of model names to try if a call fails - "available_models": # [Optional] List of all possible models you could call - "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models) - "model": { - "model-name": { - "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged. - "error_handling": { - "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list - "fallback_model": "" # str, name of the model it should try instead, when that error occurs - } - } - } - } - } - - Parameters: - config (Union[dict, str]): A configuration for litellm - **kwargs: Additional keyword arguments for litellm.completion - - Returns: - litellm.ModelResponse: A ModelResponse with the generated completion - - """ - if config is not None: - if isinstance(config, str): - config = read_config_args(config) - elif isinstance(config, dict): - config = config - else: - raise Exception("Config path must be a string or a dictionary.") - else: - raise Exception("Config path not passed in.") - - if config is None: - raise Exception("No completion config in the config file") - - models_with_config = config["model"].keys() - model = kwargs["model"] - messages = kwargs["messages"] - - ## completion config - fallback_models = config.get("default_fallback_models", None) - available_models = config.get("available_models", None) - adapt_to_prompt_size = config.get("adapt_to_prompt_size", False) - trim_messages_flag = config.get("trim_messages", False) - prompt_larger_than_model = False - max_model = model - try: - max_tokens = litellm.get_max_tokens(model)["max_tokens"] - except: - max_tokens = 2048 # assume curr model's max window is 2048 tokens - if adapt_to_prompt_size: - ## Pick model based on token window - prompt_tokens = litellm.token_counter( - model="gpt-3.5-turbo", - text="".join(message["content"] for message in messages), - ) - try: - curr_max_tokens = litellm.get_max_tokens(model)["max_tokens"] - except: - curr_max_tokens = 2048 - if curr_max_tokens < prompt_tokens: - prompt_larger_than_model = True - for available_model in available_models: - try: - curr_max_tokens = litellm.get_max_tokens(available_model)[ - "max_tokens" - ] - if curr_max_tokens > max_tokens: - max_tokens = curr_max_tokens - max_model = available_model - if curr_max_tokens > prompt_tokens: - model = available_model - prompt_larger_than_model = False - except: - continue - if prompt_larger_than_model: - messages = trim_messages(messages=messages, model=max_model) - kwargs["messages"] = messages - - kwargs["model"] = model - try: - if model in models_with_config: - ## Moderation check - if config["model"][model].get("needs_moderation"): - input = " ".join(message["content"] for message in messages) - response = litellm.moderation(input=input) - flagged = response["results"][0]["flagged"] - if flagged: - raise Exception("This response was flagged as inappropriate") - - ## Model-specific Error Handling - error_handling = None - if config["model"][model].get("error_handling"): - error_handling = config["model"][model]["error_handling"] - - try: - response = litellm.completion(**kwargs) - return response - except Exception as e: - exception_name = type(e).__name__ - fallback_model = None - if error_handling and exception_name in error_handling: - error_handler = error_handling[exception_name] - # either switch model or api key - fallback_model = error_handler.get("fallback_model", None) - if fallback_model: - kwargs["model"] = fallback_model - return litellm.completion(**kwargs) - raise e - else: - return litellm.completion(**kwargs) - except Exception as e: - if fallback_models: - model = fallback_models.pop(0) - return completion_with_fallbacks( - model=model, messages=messages, fallbacks=fallback_models - ) - raise e - - def completion_with_fallbacks(**kwargs): nested_kwargs = kwargs.pop("kwargs", {}) response = None