From 95579fda7d7db5ce18ad0bbfd9eb4045c7c66fbc Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 25 Nov 2023 14:45:42 -0800 Subject: [PATCH] fix(utils.py): fix bedrock + cohere calls --- litellm/router.py | 6 ++++-- litellm/tests/test_acooldowns_router.py | 3 +-- litellm/tests/test_completion.py | 5 +++-- litellm/utils.py | 6 +++--- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 6f277cb5f5..4afe1d9d69 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -371,7 +371,7 @@ class Router: return response except Exception as e: original_exception = e - self.print_verbose(f"An exception occurs{original_exception}") + self.print_verbose(f"An exception occurs {original_exception}") try: self.print_verbose(f"Trying to fallback b/w models. Initial model group: {model_group}") if isinstance(e, litellm.ContextWindowExceededError): @@ -433,6 +433,7 @@ class Router: response = original_function(*args, **kwargs) return response except Exception as e: + original_exception = e self.print_verbose(f"num retries in function with retries: {num_retries}") for current_attempt in range(num_retries): self.print_verbose(f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}") @@ -457,7 +458,7 @@ class Router: pass else: raise e - raise e + raise original_exception ### HELPER FUNCTIONS @@ -514,6 +515,7 @@ class Router: # cooldown deployment current_fails = self.failed_calls.get_cache(key=deployment) or 0 updated_fails = current_fails + 1 + self.print_verbose(f"updated_fails: {updated_fails}; self.allowed_fails: {self.allowed_fails}") if updated_fails > self.allowed_fails: # get the current cooldown list for that minute cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls diff --git a/litellm/tests/test_acooldowns_router.py b/litellm/tests/test_acooldowns_router.py index 9ea7c3f6cf..3542fa9869 100644 --- a/litellm/tests/test_acooldowns_router.py +++ b/litellm/tests/test_acooldowns_router.py @@ -50,7 +50,6 @@ def test_multiple_deployments_sync(): set_verbose=True, num_retries=1) # type: ignore try: - router.reset() for _ in range(3): response = router.completion(**kwargs) results.append(response) @@ -60,7 +59,7 @@ def test_multiple_deployments_sync(): print(f"FAILED TEST!") pytest.fail(f"An error occurred - {traceback.format_exc()}") -# test_multiple_deployments_sync() +test_multiple_deployments_sync() def test_multiple_deployments_parallel(): diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 5951b572c3..4320e31e27 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -582,7 +582,7 @@ def test_completion_azure_key_completion_arg(): except Exception as e: os.environ["AZURE_API_KEY"] = old_key pytest.fail(f"Error occurred: {e}") -test_completion_azure_key_completion_arg() +# test_completion_azure_key_completion_arg() def test_completion_azure(): try: @@ -960,6 +960,7 @@ def test_completion_bedrock_claude(): def test_completion_bedrock_cohere(): print("calling bedrock cohere") + litellm.set_verbose = True try: response = completion( model="bedrock/cohere.command-text-v14", @@ -976,7 +977,7 @@ def test_completion_bedrock_cohere(): pass except Exception as e: pytest.fail(f"Error occurred: {e}") -# test_completion_bedrock_cohere() +test_completion_bedrock_cohere() def test_completion_bedrock_claude_completion_auth(): diff --git a/litellm/utils.py b/litellm/utils.py index 689f317266..b00e5ed860 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1058,7 +1058,7 @@ class Logging: ) if capture_exception: # log this error to sentry for debugging capture_exception(e) - except: + except Exception as e: print_verbose( f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}" ) @@ -1970,7 +1970,7 @@ def get_optional_params( # use the openai defaults optional_params["max_tokens"] = max_tokens if n is not None: optional_params["num_generations"] = n - if logit_bias != {}: + if logit_bias is not None: optional_params["logit_bias"] = logit_bias if top_p is not None: optional_params["p"] = top_p @@ -2219,7 +2219,7 @@ def get_optional_params( # use the openai defaults optional_params["max_tokens"] = max_tokens if n is not None: optional_params["num_generations"] = n - if logit_bias != {}: + if logit_bias is not None: optional_params["logit_bias"] = logit_bias if top_p is not None: optional_params["p"] = top_p