allow backup api keys and api bases

This commit is contained in:
Krrish Dholakia 2023-09-26 06:56:26 -07:00
parent b246629a5b
commit fd953b7ab4
6 changed files with 24 additions and 3 deletions

View file

@ -15,7 +15,6 @@ from litellm import embedding, completion, text_completion, completion_cost
user_message = "Write a short poem about the sky"
messages = [{"content": user_message, "role": "user"}]
print(f"os path: {os.path.isfile('litellm_uuid.txt')}")
def logger_fn(user_model_dict):
print(f"user_model_dict: {user_model_dict}")
@ -774,6 +773,20 @@ def test_completion_with_fallbacks():
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_with_fallbacks_multiple_keys():
backup_keys = [{"api_key": os.getenv("BACKUP_OPENAI_API_KEY_1")}, {"api_key": os.getenv("BACKUP_OPENAI_API_KEY_2")}]
try:
api_key = "bad-key"
response = completion(
model="gpt-3.5-turbo", messages=messages, force_timeout=120, fallbacks=backup_keys, api_key=api_key
)
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_with_fallbacks_multiple_keys()
# def test_petals():
# try:
# response = completion(model="petals-team/StableBeluga2", messages=messages)

View file

@ -3288,6 +3288,7 @@ def completion_with_fallbacks(**kwargs):
rate_limited_models = set()
model_expiration_times = {}
start_time = time.time()
original_model = kwargs["model"]
fallbacks = [kwargs["model"]] + kwargs["fallbacks"]
del kwargs["fallbacks"] # remove fallbacks so it's not recursive
@ -3295,7 +3296,13 @@ def completion_with_fallbacks(**kwargs):
for model in fallbacks:
# loop thru all models
try:
if (
# check if it's dict or new model string
if isinstance(model, dict): # completion(model="gpt-4", fallbacks=[{"api_key": "", "api_base": ""}, {"api_key": "", "api_base": ""}])
kwargs["api_key"] = model.get("api_key", None)
kwargs["api_base"] = model.get("api_base", None)
model = original_model
print(f"switched api keys")
elif (
model in rate_limited_models
): # check if model is currently cooling down
if (
@ -3318,6 +3325,7 @@ def completion_with_fallbacks(**kwargs):
return response
except Exception as e:
print(e)
rate_limited_models.add(model)
model_expiration_times[model] = (
time.time() + 60

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "0.1.754"
version = "0.1.755"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT License"