diff --git a/docs/my-website/docs/completion/config.md b/docs/my-website/docs/completion/config.md new file mode 100644 index 0000000000..72ccf8e726 --- /dev/null +++ b/docs/my-website/docs/completion/config.md @@ -0,0 +1,81 @@ +# Model Config + +Model-specific changes can make our code complicated, making it harder to debug errors. Use model configs to simplify this. + +### usage + +E.g. If we want to implement: +* Moderations check for Anthropic models (to avoid violating their safety policy) +* Model Fallbacks - specific + general + +```python +from litellm import completion_with_config +import os + +config = { + "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"], + "model": { + "claude-instant-1": { + "needs_moderation": True + }, + "gpt-3.5-turbo": { + "error_handling": { + "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"} + } + }, + } +} + +# set env var +os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key +os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key + + +sample_text = "how does a court case get to the Supreme Court?" * 1000 +messages = [{"content": sample_text, "role": "user"}] +response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config) +``` +[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783) +### select model based on prompt size + +You can also use model configs to automatically select a model based on the prompt size. It checks the number of tokens in the prompt and max tokens for each model. It selects the model with max tokens > prompt tokens. + +```python +from litellm import completion_with_config +import os + +config = { + "available_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"], + "adapt_to_prompt_size": True, # 👈 key change +} + +# set env var +os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key +os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key + + +sample_text = "how does a court case get to the Supreme Court?" * 1000 +messages = [{"content": sample_text, "role": "user"}] +response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config) +``` + +### Complete Config Structure + +```python +config = { + "function": "completion", + "default_fallback_models": # [Optional] List of model names to try if a call fails + "available_models": # [Optional] List of all possible models you could call + "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models) + "model": { + "model-name": { + "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged. + "error_handling": { + "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list + "fallback_model": "" # str, name of the model it should try instead, when that error occurs + } + } + } + } +} +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 11062fc670..51e1b8e8c2 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -35,6 +35,7 @@ const sidebars = { "completion/message_trimming", "completion/model_alias", "completion/reliable_completions", + "completion/config", "completion/batching", "completion/mock_requests", ], diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 420ed270f7..f5b05bfc91 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index f601789679..dc14ce03a0 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/tests/test_config.py b/litellm/tests/test_config.py index 86b796ed95..b2b48cfb3b 100644 --- a/litellm/tests/test_config.py +++ b/litellm/tests/test_config.py @@ -13,7 +13,6 @@ import litellm from litellm import completion_with_config config = { - "function": "completion", "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"], "model": { "claude-instant-1": { @@ -63,7 +62,6 @@ def test_config_context_default_fallback(): config = { - "function": "completion", "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"], "available_models": ["gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-4", "gpt-4-0314", "gpt-4-0613", "j2-ultra", "command-nightly", "togethercomputer/llama-2-70b-chat", "chat-bison", "chat-bison@001", "claude-2"], @@ -90,4 +88,4 @@ def test_config_context_adapt_to_prompt(): print(f"Exception: {e}") pytest.fail(f"An exception occurred: {e}") -test_config_context_adapt_to_prompt() \ No newline at end of file +# test_config_context_adapt_to_prompt() \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 325fea5e34..4eff739738 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2780,7 +2780,7 @@ def read_config_args(config_path) -> dict: ########## experimental completion variants ############################ -def completion_with_config(*, config: Union[dict, str], **kwargs): +def completion_with_config(config: Union[dict, str], **kwargs): if config is not None: if isinstance(config, str): config = read_config_args(config) @@ -2791,23 +2791,17 @@ def completion_with_config(*, config: Union[dict, str], **kwargs): else: raise Exception("Config path not passed in.") - ## load the completion config - completion_config = None - - if config["function"] == "completion": - completion_config = config - - if completion_config is None: + if config is None: raise Exception("No completion config in the config file") - models_with_config = completion_config["model"].keys() + models_with_config = config["model"].keys() model = kwargs["model"] messages = kwargs["messages"] ## completion config - fallback_models = completion_config.get("default_fallback_models", None) - available_models = completion_config.get("available_models", None) - adapt_to_prompt_size = completion_config.get("adapt_to_prompt_size", False) + fallback_models = config.get("default_fallback_models", None) + available_models = config.get("available_models", None) + adapt_to_prompt_size = config.get("adapt_to_prompt_size", False) start_time = time.time() if adapt_to_prompt_size: ## Pick model based on token window @@ -2829,7 +2823,7 @@ def completion_with_config(*, config: Union[dict, str], **kwargs): try: if model in models_with_config: ## Moderation check - if completion_config["model"][model].get("needs_moderation"): + if config["model"][model].get("needs_moderation"): input = " ".join(message["content"] for message in messages) response = litellm.moderation(input=input) flagged = response["results"][0]["flagged"] @@ -2838,8 +2832,8 @@ def completion_with_config(*, config: Union[dict, str], **kwargs): ## Model-specific Error Handling error_handling = None - if completion_config["model"][model].get("error_handling"): - error_handling = completion_config["model"][model]["error_handling"] + if config["model"][model].get("error_handling"): + error_handling = config["model"][model]["error_handling"] try: response = litellm.completion(**kwargs) @@ -2968,7 +2962,6 @@ def completion_with_fallbacks(**kwargs): return response except Exception as e: - print(f"got exception {e} for model {model}") rate_limited_models.add(model) model_expiration_times[model] = ( time.time() + 60