diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index d18aa4f225..871c98cbb7 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 396f89a3bd..4ae00fb4ec 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index e0f896c0d1..368e334d69 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 7007e923a9..61f5ef3751 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -15,7 +15,6 @@ from litellm import embedding, completion, text_completion, completion_cost user_message = "Write a short poem about the sky" messages = [{"content": user_message, "role": "user"}] -print(f"os path: {os.path.isfile('litellm_uuid.txt')}") def logger_fn(user_model_dict): print(f"user_model_dict: {user_model_dict}") @@ -774,6 +773,20 @@ def test_completion_with_fallbacks(): except Exception as e: pytest.fail(f"Error occurred: {e}") + +def test_completion_with_fallbacks_multiple_keys(): + backup_keys = [{"api_key": os.getenv("BACKUP_OPENAI_API_KEY_1")}, {"api_key": os.getenv("BACKUP_OPENAI_API_KEY_2")}] + try: + api_key = "bad-key" + response = completion( + model="gpt-3.5-turbo", messages=messages, force_timeout=120, fallbacks=backup_keys, api_key=api_key + ) + # Add any assertions here to check the response + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +# test_completion_with_fallbacks_multiple_keys() # def test_petals(): # try: # response = completion(model="petals-team/StableBeluga2", messages=messages) diff --git a/litellm/utils.py b/litellm/utils.py index 7f21e94504..5016eb4db0 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3288,6 +3288,7 @@ def completion_with_fallbacks(**kwargs): rate_limited_models = set() model_expiration_times = {} start_time = time.time() + original_model = kwargs["model"] fallbacks = [kwargs["model"]] + kwargs["fallbacks"] del kwargs["fallbacks"] # remove fallbacks so it's not recursive @@ -3295,7 +3296,13 @@ def completion_with_fallbacks(**kwargs): for model in fallbacks: # loop thru all models try: - if ( + # check if it's dict or new model string + if isinstance(model, dict): # completion(model="gpt-4", fallbacks=[{"api_key": "", "api_base": ""}, {"api_key": "", "api_base": ""}]) + kwargs["api_key"] = model.get("api_key", None) + kwargs["api_base"] = model.get("api_base", None) + model = original_model + print(f"switched api keys") + elif ( model in rate_limited_models ): # check if model is currently cooling down if ( @@ -3318,6 +3325,7 @@ def completion_with_fallbacks(**kwargs): return response except Exception as e: + print(e) rate_limited_models.add(model) model_expiration_times[model] = ( time.time() + 60 diff --git a/pyproject.toml b/pyproject.toml index 7de1bddf9b..b10ad4bf5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.754" +version = "0.1.755" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"