forked from phoenix/litellm-mirror
fix(utils.py): fix bedrock + cohere calls
This commit is contained in:
parent
2eb7386095
commit
95579fda7d
4 changed files with 11 additions and 9 deletions
|
@ -433,6 +433,7 @@ class Router:
|
||||||
response = original_function(*args, **kwargs)
|
response = original_function(*args, **kwargs)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
original_exception = e
|
||||||
self.print_verbose(f"num retries in function with retries: {num_retries}")
|
self.print_verbose(f"num retries in function with retries: {num_retries}")
|
||||||
for current_attempt in range(num_retries):
|
for current_attempt in range(num_retries):
|
||||||
self.print_verbose(f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}")
|
self.print_verbose(f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}")
|
||||||
|
@ -457,7 +458,7 @@ class Router:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
raise e
|
raise original_exception
|
||||||
|
|
||||||
### HELPER FUNCTIONS
|
### HELPER FUNCTIONS
|
||||||
|
|
||||||
|
@ -514,6 +515,7 @@ class Router:
|
||||||
# cooldown deployment
|
# cooldown deployment
|
||||||
current_fails = self.failed_calls.get_cache(key=deployment) or 0
|
current_fails = self.failed_calls.get_cache(key=deployment) or 0
|
||||||
updated_fails = current_fails + 1
|
updated_fails = current_fails + 1
|
||||||
|
self.print_verbose(f"updated_fails: {updated_fails}; self.allowed_fails: {self.allowed_fails}")
|
||||||
if updated_fails > self.allowed_fails:
|
if updated_fails > self.allowed_fails:
|
||||||
# get the current cooldown list for that minute
|
# get the current cooldown list for that minute
|
||||||
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls
|
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls
|
||||||
|
|
|
@ -50,7 +50,6 @@ def test_multiple_deployments_sync():
|
||||||
set_verbose=True,
|
set_verbose=True,
|
||||||
num_retries=1) # type: ignore
|
num_retries=1) # type: ignore
|
||||||
try:
|
try:
|
||||||
router.reset()
|
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
response = router.completion(**kwargs)
|
response = router.completion(**kwargs)
|
||||||
results.append(response)
|
results.append(response)
|
||||||
|
@ -60,7 +59,7 @@ def test_multiple_deployments_sync():
|
||||||
print(f"FAILED TEST!")
|
print(f"FAILED TEST!")
|
||||||
pytest.fail(f"An error occurred - {traceback.format_exc()}")
|
pytest.fail(f"An error occurred - {traceback.format_exc()}")
|
||||||
|
|
||||||
# test_multiple_deployments_sync()
|
test_multiple_deployments_sync()
|
||||||
|
|
||||||
|
|
||||||
def test_multiple_deployments_parallel():
|
def test_multiple_deployments_parallel():
|
||||||
|
|
|
@ -582,7 +582,7 @@ def test_completion_azure_key_completion_arg():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
os.environ["AZURE_API_KEY"] = old_key
|
os.environ["AZURE_API_KEY"] = old_key
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
test_completion_azure_key_completion_arg()
|
# test_completion_azure_key_completion_arg()
|
||||||
|
|
||||||
def test_completion_azure():
|
def test_completion_azure():
|
||||||
try:
|
try:
|
||||||
|
@ -960,6 +960,7 @@ def test_completion_bedrock_claude():
|
||||||
|
|
||||||
def test_completion_bedrock_cohere():
|
def test_completion_bedrock_cohere():
|
||||||
print("calling bedrock cohere")
|
print("calling bedrock cohere")
|
||||||
|
litellm.set_verbose = True
|
||||||
try:
|
try:
|
||||||
response = completion(
|
response = completion(
|
||||||
model="bedrock/cohere.command-text-v14",
|
model="bedrock/cohere.command-text-v14",
|
||||||
|
@ -976,7 +977,7 @@ def test_completion_bedrock_cohere():
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
# test_completion_bedrock_cohere()
|
test_completion_bedrock_cohere()
|
||||||
|
|
||||||
|
|
||||||
def test_completion_bedrock_claude_completion_auth():
|
def test_completion_bedrock_claude_completion_auth():
|
||||||
|
|
|
@ -1058,7 +1058,7 @@ class Logging:
|
||||||
)
|
)
|
||||||
if capture_exception: # log this error to sentry for debugging
|
if capture_exception: # log this error to sentry for debugging
|
||||||
capture_exception(e)
|
capture_exception(e)
|
||||||
except:
|
except Exception as e:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}"
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
|
@ -1970,7 +1970,7 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["max_tokens"] = max_tokens
|
optional_params["max_tokens"] = max_tokens
|
||||||
if n is not None:
|
if n is not None:
|
||||||
optional_params["num_generations"] = n
|
optional_params["num_generations"] = n
|
||||||
if logit_bias != {}:
|
if logit_bias is not None:
|
||||||
optional_params["logit_bias"] = logit_bias
|
optional_params["logit_bias"] = logit_bias
|
||||||
if top_p is not None:
|
if top_p is not None:
|
||||||
optional_params["p"] = top_p
|
optional_params["p"] = top_p
|
||||||
|
@ -2219,7 +2219,7 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["max_tokens"] = max_tokens
|
optional_params["max_tokens"] = max_tokens
|
||||||
if n is not None:
|
if n is not None:
|
||||||
optional_params["num_generations"] = n
|
optional_params["num_generations"] = n
|
||||||
if logit_bias != {}:
|
if logit_bias is not None:
|
||||||
optional_params["logit_bias"] = logit_bias
|
optional_params["logit_bias"] = logit_bias
|
||||||
if top_p is not None:
|
if top_p is not None:
|
||||||
optional_params["p"] = top_p
|
optional_params["p"] = top_p
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue