fix(parallel_request_limiter.py): handle metadata being none

This commit is contained in:
Krrish Dholakia 2024-03-14 10:02:26 -07:00
parent 704573c3f6
commit 7876aa2d75
4 changed files with 11 additions and 6 deletions

View file

@ -8,7 +8,7 @@ Set `MISTRAL_AZURE_API_KEY` and `MISTRAL_AZURE_API_BASE` in your env
```shell ```shell
MISTRAL_AZURE_API_KEY = "zE************"" MISTRAL_AZURE_API_KEY = "zE************""
MISTRAL_AZURE_API_BASE = "https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com" MISTRAL_AZURE_API_BASE = "https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com/v1"
``` ```
```python ```python

View file

@ -8,7 +8,7 @@ handler.setLevel(logging.DEBUG)
# Create a formatter and set it for the handler # Create a formatter and set it for the handler
formatter = logging.Formatter( formatter = logging.Formatter(
"\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(message)s", "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
datefmt="%H:%M:%S", datefmt="%H:%M:%S",
) )

View file

@ -3682,11 +3682,12 @@ async def ahealth_check(
response = {} # args like remaining ratelimit etc. response = {} # args like remaining ratelimit etc.
return response return response
except Exception as e: except Exception as e:
traceback.print_exc()
if model not in litellm.model_cost and mode is None: if model not in litellm.model_cost and mode is None:
raise Exception( raise Exception(
"Missing `mode`. Set the `mode` for the model - https://docs.litellm.ai/docs/proxy/health#embedding-models" "Missing `mode`. Set the `mode` for the model - https://docs.litellm.ai/docs/proxy/health#embedding-models"
) )
return {"error": str(e)} return {"error": f"{str(e)}"}
####### HELPER FUNCTIONS ################ ####### HELPER FUNCTIONS ################

View file

@ -324,7 +324,10 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
try: try:
self.print_verbose(f"Inside Max Parallel Request Failure Hook") self.print_verbose(f"Inside Max Parallel Request Failure Hook")
user_api_key = kwargs["litellm_params"]["metadata"]["user_api_key"] user_api_key = (
kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None)
)
self.print_verbose(f"user_api_key: {user_api_key}")
if user_api_key is None: if user_api_key is None:
return return
@ -355,7 +358,6 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
# ------------ # ------------
# Update usage # Update usage
# ------------ # ------------
current = self.user_api_key_cache.get_cache( current = self.user_api_key_cache.get_cache(
key=request_count_api_key key=request_count_api_key
) or { ) or {
@ -375,4 +377,6 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
request_count_api_key, new_val, ttl=60 request_count_api_key, new_val, ttl=60
) # save in cache for up to 1 min. ) # save in cache for up to 1 min.
except Exception as e: except Exception as e:
print(f"An exception occurred - {str(e)}") # noqa verbose_proxy_logger.info(
f"Inside Parallel Request Limiter: An exception occurred - {str(e)}."
)