fix(parallel_request_limiter.py): handle metadata being none

2024-03-14 10:02:26 -07:00 · 2024-03-14 10:02:26 -07:00 · 7876aa2d75
commit 7876aa2d75
parent 704573c3f6
4 changed files with 11 additions and 6 deletions
--- a/docs/my-website/docs/providers/azure_ai.md
+++ b/docs/my-website/docs/providers/azure_ai.md
@ -8,7 +8,7 @@ Set `MISTRAL_AZURE_API_KEY` and `MISTRAL_AZURE_API_BASE` in your env
 ```shell
 MISTRAL_AZURE_API_KEY = "zE************""
-MISTRAL_AZURE_API_BASE = "https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com"
+MISTRAL_AZURE_API_BASE = "https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com/v1"
 ```
 ```python
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -8,7 +8,7 @@ handler.setLevel(logging.DEBUG)
 # Create a formatter and set it for the handler
 formatter = logging.Formatter(
-    "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(message)s",
+    "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
    datefmt="%H:%M:%S",
 )
--- a/litellm/main.py
+++ b/litellm/main.py
@ -3682,11 +3682,12 @@ async def ahealth_check(
                response = {}  # args like remaining ratelimit etc.
        return response
    except Exception as e:
        traceback.print_exc()
        if model not in litellm.model_cost and mode is None:
            raise Exception(
                "Missing `mode`. Set the `mode` for the model - https://docs.litellm.ai/docs/proxy/health#embedding-models"
            )
-        return {"error": str(e)}
+        return {"error": f"{str(e)}"}
 ####### HELPER FUNCTIONS ################
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@ -324,7 +324,10 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
        try:
            self.print_verbose(f"Inside Max Parallel Request Failure Hook")
-            user_api_key = kwargs["litellm_params"]["metadata"]["user_api_key"]
+            user_api_key = (
                kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None)
            )
            self.print_verbose(f"user_api_key: {user_api_key}")
            if user_api_key is None:
                return
@ -355,7 +358,6 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                # ------------
                # Update usage
                # ------------
                current = self.user_api_key_cache.get_cache(
                    key=request_count_api_key
                ) or {
@ -375,4 +377,6 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                    request_count_api_key, new_val, ttl=60
                )  # save in cache for up to 1 min.
        except Exception as e:
-            print(f"An exception occurred - {str(e)}")  # noqa
+            verbose_proxy_logger.info(
                f"Inside Parallel Request Limiter: An exception occurred - {str(e)}."
            )