fix(utils.py): fix bedrock + cohere calls

This commit is contained in:
Krrish Dholakia 2023-11-25 14:45:42 -08:00
parent 2eb7386095
commit 95579fda7d
4 changed files with 11 additions and 9 deletions

View file

@ -371,7 +371,7 @@ class Router:
return response
except Exception as e:
original_exception = e
self.print_verbose(f"An exception occurs{original_exception}")
self.print_verbose(f"An exception occurs {original_exception}")
try:
self.print_verbose(f"Trying to fallback b/w models. Initial model group: {model_group}")
if isinstance(e, litellm.ContextWindowExceededError):
@ -433,6 +433,7 @@ class Router:
response = original_function(*args, **kwargs)
return response
except Exception as e:
original_exception = e
self.print_verbose(f"num retries in function with retries: {num_retries}")
for current_attempt in range(num_retries):
self.print_verbose(f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}")
@ -457,7 +458,7 @@ class Router:
pass
else:
raise e
raise e
raise original_exception
### HELPER FUNCTIONS
@ -514,6 +515,7 @@ class Router:
# cooldown deployment
current_fails = self.failed_calls.get_cache(key=deployment) or 0
updated_fails = current_fails + 1
self.print_verbose(f"updated_fails: {updated_fails}; self.allowed_fails: {self.allowed_fails}")
if updated_fails > self.allowed_fails:
# get the current cooldown list for that minute
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls

View file

@ -50,7 +50,6 @@ def test_multiple_deployments_sync():
set_verbose=True,
num_retries=1) # type: ignore
try:
router.reset()
for _ in range(3):
response = router.completion(**kwargs)
results.append(response)
@ -60,7 +59,7 @@ def test_multiple_deployments_sync():
print(f"FAILED TEST!")
pytest.fail(f"An error occurred - {traceback.format_exc()}")
# test_multiple_deployments_sync()
test_multiple_deployments_sync()
def test_multiple_deployments_parallel():

View file

@ -582,7 +582,7 @@ def test_completion_azure_key_completion_arg():
except Exception as e:
os.environ["AZURE_API_KEY"] = old_key
pytest.fail(f"Error occurred: {e}")
test_completion_azure_key_completion_arg()
# test_completion_azure_key_completion_arg()
def test_completion_azure():
try:
@ -960,6 +960,7 @@ def test_completion_bedrock_claude():
def test_completion_bedrock_cohere():
print("calling bedrock cohere")
litellm.set_verbose = True
try:
response = completion(
model="bedrock/cohere.command-text-v14",
@ -976,7 +977,7 @@ def test_completion_bedrock_cohere():
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_cohere()
test_completion_bedrock_cohere()
def test_completion_bedrock_claude_completion_auth():

View file

@ -1058,7 +1058,7 @@ class Logging:
)
if capture_exception: # log this error to sentry for debugging
capture_exception(e)
except:
except Exception as e:
print_verbose(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}"
)
@ -1970,7 +1970,7 @@ def get_optional_params( # use the openai defaults
optional_params["max_tokens"] = max_tokens
if n is not None:
optional_params["num_generations"] = n
if logit_bias != {}:
if logit_bias is not None:
optional_params["logit_bias"] = logit_bias
if top_p is not None:
optional_params["p"] = top_p
@ -2219,7 +2219,7 @@ def get_optional_params( # use the openai defaults
optional_params["max_tokens"] = max_tokens
if n is not None:
optional_params["num_generations"] = n
if logit_bias != {}:
if logit_bias is not None:
optional_params["logit_bias"] = logit_bias
if top_p is not None:
optional_params["p"] = top_p