diff --git a/docs/my-website/docs/providers/azure_ai.md b/docs/my-website/docs/providers/azure_ai.md index 71776e0fb7..4b7a9a783e 100644 --- a/docs/my-website/docs/providers/azure_ai.md +++ b/docs/my-website/docs/providers/azure_ai.md @@ -8,7 +8,7 @@ Set `MISTRAL_AZURE_API_KEY` and `MISTRAL_AZURE_API_BASE` in your env ```shell MISTRAL_AZURE_API_KEY = "zE************"" -MISTRAL_AZURE_API_BASE = "https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com" +MISTRAL_AZURE_API_BASE = "https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com/v1" ``` ```python diff --git a/litellm/_logging.py b/litellm/_logging.py index 26693c15ec..4f7e464468 100644 --- a/litellm/_logging.py +++ b/litellm/_logging.py @@ -8,7 +8,7 @@ handler.setLevel(logging.DEBUG) # Create a formatter and set it for the handler formatter = logging.Formatter( - "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(message)s", + "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s", datefmt="%H:%M:%S", ) diff --git a/litellm/main.py b/litellm/main.py index 8326e03f69..8ccde52e6a 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -3682,11 +3682,12 @@ async def ahealth_check( response = {} # args like remaining ratelimit etc. return response except Exception as e: + traceback.print_exc() if model not in litellm.model_cost and mode is None: raise Exception( "Missing `mode`. Set the `mode` for the model - https://docs.litellm.ai/docs/proxy/health#embedding-models" ) - return {"error": str(e)} + return {"error": f"{str(e)}"} ####### HELPER FUNCTIONS ################ diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index 8982e4e2bf..af59869c29 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -324,7 +324,10 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): try: self.print_verbose(f"Inside Max Parallel Request Failure Hook") - user_api_key = kwargs["litellm_params"]["metadata"]["user_api_key"] + user_api_key = ( + kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None) + ) + self.print_verbose(f"user_api_key: {user_api_key}") if user_api_key is None: return @@ -355,7 +358,6 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): # ------------ # Update usage # ------------ - current = self.user_api_key_cache.get_cache( key=request_count_api_key ) or { @@ -375,4 +377,6 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): request_count_api_key, new_val, ttl=60 ) # save in cache for up to 1 min. except Exception as e: - print(f"An exception occurred - {str(e)}") # noqa + verbose_proxy_logger.info( + f"Inside Parallel Request Limiter: An exception occurred - {str(e)}." + )