mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 10:14:26 +00:00
fix(utils.py): fix bedrock + cohere calls
This commit is contained in:
parent
2eb7386095
commit
95579fda7d
4 changed files with 11 additions and 9 deletions
|
@ -371,7 +371,7 @@ class Router:
|
|||
return response
|
||||
except Exception as e:
|
||||
original_exception = e
|
||||
self.print_verbose(f"An exception occurs{original_exception}")
|
||||
self.print_verbose(f"An exception occurs {original_exception}")
|
||||
try:
|
||||
self.print_verbose(f"Trying to fallback b/w models. Initial model group: {model_group}")
|
||||
if isinstance(e, litellm.ContextWindowExceededError):
|
||||
|
@ -433,6 +433,7 @@ class Router:
|
|||
response = original_function(*args, **kwargs)
|
||||
return response
|
||||
except Exception as e:
|
||||
original_exception = e
|
||||
self.print_verbose(f"num retries in function with retries: {num_retries}")
|
||||
for current_attempt in range(num_retries):
|
||||
self.print_verbose(f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}")
|
||||
|
@ -457,7 +458,7 @@ class Router:
|
|||
pass
|
||||
else:
|
||||
raise e
|
||||
raise e
|
||||
raise original_exception
|
||||
|
||||
### HELPER FUNCTIONS
|
||||
|
||||
|
@ -514,6 +515,7 @@ class Router:
|
|||
# cooldown deployment
|
||||
current_fails = self.failed_calls.get_cache(key=deployment) or 0
|
||||
updated_fails = current_fails + 1
|
||||
self.print_verbose(f"updated_fails: {updated_fails}; self.allowed_fails: {self.allowed_fails}")
|
||||
if updated_fails > self.allowed_fails:
|
||||
# get the current cooldown list for that minute
|
||||
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls
|
||||
|
|
|
@ -50,7 +50,6 @@ def test_multiple_deployments_sync():
|
|||
set_verbose=True,
|
||||
num_retries=1) # type: ignore
|
||||
try:
|
||||
router.reset()
|
||||
for _ in range(3):
|
||||
response = router.completion(**kwargs)
|
||||
results.append(response)
|
||||
|
@ -60,7 +59,7 @@ def test_multiple_deployments_sync():
|
|||
print(f"FAILED TEST!")
|
||||
pytest.fail(f"An error occurred - {traceback.format_exc()}")
|
||||
|
||||
# test_multiple_deployments_sync()
|
||||
test_multiple_deployments_sync()
|
||||
|
||||
|
||||
def test_multiple_deployments_parallel():
|
||||
|
|
|
@ -582,7 +582,7 @@ def test_completion_azure_key_completion_arg():
|
|||
except Exception as e:
|
||||
os.environ["AZURE_API_KEY"] = old_key
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
test_completion_azure_key_completion_arg()
|
||||
# test_completion_azure_key_completion_arg()
|
||||
|
||||
def test_completion_azure():
|
||||
try:
|
||||
|
@ -960,6 +960,7 @@ def test_completion_bedrock_claude():
|
|||
|
||||
def test_completion_bedrock_cohere():
|
||||
print("calling bedrock cohere")
|
||||
litellm.set_verbose = True
|
||||
try:
|
||||
response = completion(
|
||||
model="bedrock/cohere.command-text-v14",
|
||||
|
@ -976,7 +977,7 @@ def test_completion_bedrock_cohere():
|
|||
pass
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# test_completion_bedrock_cohere()
|
||||
test_completion_bedrock_cohere()
|
||||
|
||||
|
||||
def test_completion_bedrock_claude_completion_auth():
|
||||
|
|
|
@ -1058,7 +1058,7 @@ class Logging:
|
|||
)
|
||||
if capture_exception: # log this error to sentry for debugging
|
||||
capture_exception(e)
|
||||
except:
|
||||
except Exception as e:
|
||||
print_verbose(
|
||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}"
|
||||
)
|
||||
|
@ -1970,7 +1970,7 @@ def get_optional_params( # use the openai defaults
|
|||
optional_params["max_tokens"] = max_tokens
|
||||
if n is not None:
|
||||
optional_params["num_generations"] = n
|
||||
if logit_bias != {}:
|
||||
if logit_bias is not None:
|
||||
optional_params["logit_bias"] = logit_bias
|
||||
if top_p is not None:
|
||||
optional_params["p"] = top_p
|
||||
|
@ -2219,7 +2219,7 @@ def get_optional_params( # use the openai defaults
|
|||
optional_params["max_tokens"] = max_tokens
|
||||
if n is not None:
|
||||
optional_params["num_generations"] = n
|
||||
if logit_bias != {}:
|
||||
if logit_bias is not None:
|
||||
optional_params["logit_bias"] = logit_bias
|
||||
if top_p is not None:
|
||||
optional_params["p"] = top_p
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue