diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index 0bee58842..5f5a1a503 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 9ad726ac5..420ed270f 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 7c768446d..5286ad6f3 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/tests/test_config.py b/litellm/tests/test_config.py index 0df5928f9..6a2cbc238 100644 --- a/litellm/tests/test_config.py +++ b/litellm/tests/test_config.py @@ -14,6 +14,7 @@ from litellm import completion_with_config config = { "function": "completion", + "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"], "model": { "claude-instant-1": { "needs_moderation": True @@ -26,12 +27,20 @@ config = { } } -def test_config(): +def test_config_context_window_exceeded(): try: sample_text = "how does a court case get to the Supreme Court?" * 1000 messages = [{"content": sample_text, "role": "user"}] response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config) print(response) + except Exception as e: + print(f"Exception: {e}") + pytest.fail(f"An exception occurred: {e}") + +# test_config_context_window_exceeded() + +def test_config_context_moderation(): + try: messages=[{"role": "user", "content": "I want to kill them."}] response = completion_with_config(model="claude-instant-1", messages=messages, config=config) print(response) @@ -39,4 +48,15 @@ def test_config(): print(f"Exception: {e}") pytest.fail(f"An exception occurred: {e}") -# test_config() \ No newline at end of file +# test_config_context_moderation() + +def test_config_context_default_fallback(): + try: + messages=[{"role": "user", "content": "Hey, how's it going?"}] + response = completion_with_config(model="claude-instant-1", messages=messages, config=config, api_key="bad-key") + print(response) + except Exception as e: + print(f"Exception: {e}") + pytest.fail(f"An exception occurred: {e}") + +test_config_context_default_fallback() \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index c754c26ea..51e0ebdb0 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2795,36 +2795,45 @@ def completion_with_config(*args, config: Union[dict, str], **kwargs): models_with_config = completion_config["model"].keys() model = args[0] if len(args) > 0 else kwargs["model"] messages = args[1] if len(args) > 1 else kwargs["messages"] - if model in models_with_config: - ## Moderation check - if completion_config["model"][model].get("needs_moderation"): - input = " ".join(message["content"] for message in messages) - response = litellm.moderation(input=input) - flagged = response["results"][0]["flagged"] - if flagged: - raise Exception("This response was flagged as inappropriate") - - ## Load Error Handling Logic - error_handling = None - if completion_config["model"][model].get("error_handling"): - error_handling = completion_config["model"][model]["error_handling"] - try: - response = litellm.completion(*args, **kwargs) - return response - except Exception as e: - exception_name = type(e).__name__ - fallback_model = None - if error_handling and exception_name in error_handling: - error_handler = error_handling[exception_name] - # either switch model or api key - fallback_model = error_handler.get("fallback_model", None) - if fallback_model: - kwargs["model"] = fallback_model - return litellm.completion(*args, **kwargs) - raise e - else: - return litellm.completion(*args, **kwargs) + ## Default fallback models + fallback_models = completion_config.get("default_fallback_models") + try: + if model in models_with_config: + ## Moderation check + if completion_config["model"][model].get("needs_moderation"): + input = " ".join(message["content"] for message in messages) + response = litellm.moderation(input=input) + flagged = response["results"][0]["flagged"] + if flagged: + raise Exception("This response was flagged as inappropriate") + + ## Model-specific Error Handling + error_handling = None + if completion_config["model"][model].get("error_handling"): + error_handling = completion_config["model"][model]["error_handling"] + + try: + response = litellm.completion(*args, **kwargs) + return response + except Exception as e: + exception_name = type(e).__name__ + fallback_model = None + if error_handling and exception_name in error_handling: + error_handler = error_handling[exception_name] + # either switch model or api key + fallback_model = error_handler.get("fallback_model", None) + if fallback_model: + kwargs["model"] = fallback_model + return litellm.completion(*args, **kwargs) + raise e + else: + return litellm.completion(*args, **kwargs) + except Exception as e: + if fallback_models: + model = fallback_models.pop(0) + return completion_with_fallbacks(model=model, messages=messages, fallbacks=fallback_models) + raise e @@ -2924,8 +2933,7 @@ def completion_with_fallbacks(**kwargs): # delete model from kwargs if it exists if kwargs.get("model"): del kwargs["model"] - - print("making completion call", model) + response = litellm.completion(**kwargs, model=model) if response != None: