From dad1ad20773618fe6a5cc031f66965292e1c8dcb Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Sat, 14 Sep 2024 10:32:39 -0700 Subject: [PATCH] LiteLLM Minor Fixes and Improvements (09/14/2024) (#5697) * fix(health_check.py): hide sensitive keys from health check debug information k * fix(route_llm_request.py): fix proxy model not found error message to indicate how to resolve issue * fix(vertex_llm_base.py): fix exception message to not log credentials --- .../vertex_llm_base.py | 12 +++++++++--- litellm/proxy/README.md | 12 ++++++++++++ litellm/proxy/health_check.py | 10 +++++++++- .../proxy/hooks/parallel_request_limiter.py | 4 +--- litellm/proxy/route_llm_request.py | 19 ++++++++++++------- 5 files changed, 43 insertions(+), 14 deletions(-) diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_llm_base.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_llm_base.py index 195b29436..dbd19b8c3 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_llm_base.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_llm_base.py @@ -255,9 +255,15 @@ class VertexBase(BaseLLM): return self.access_token, self.project_id if not self._credentials: - self._credentials, cred_project_id = await asyncify(self.load_auth)( - credentials=credentials, project_id=project_id - ) + try: + self._credentials, cred_project_id = await asyncify(self.load_auth)( + credentials=credentials, project_id=project_id + ) + except Exception: + verbose_logger.exception( + "Failed to load vertex credentials. Check to see if credentials containing partial/invalid information." + ) + raise if not self.project_id: self.project_id = project_id or cred_project_id else: diff --git a/litellm/proxy/README.md b/litellm/proxy/README.md index 92df6026c..552c9777b 100644 --- a/litellm/proxy/README.md +++ b/litellm/proxy/README.md @@ -29,3 +29,15 @@ print(response) ``` [**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/simple_proxy) + + +--- + +### Folder Structure + +**Routes** +- `proxy_server.py` - all openai-compatible routes - `/v1/chat/completion`, `/v1/embedding` + model info routes - `/v1/models`, `/v1/model/info`, `/v1/model_group_info` routes. +- `health_endpoints/` - `/health`, `/health/liveliness`, `/health/readiness` +- `management_endpoints/key_management_endpoints.py` - all `/key/*` routes +- `management_endpoints/team_endpoints.py` - all `/team/*` routes +- `management_endpoints/internal_user_endpoints.py` - all `/user/*` routes \ No newline at end of file diff --git a/litellm/proxy/health_check.py b/litellm/proxy/health_check.py index 215d2d8d6..596648638 100644 --- a/litellm/proxy/health_check.py +++ b/litellm/proxy/health_check.py @@ -11,7 +11,15 @@ from litellm._logging import print_verbose logger = logging.getLogger(__name__) -ILLEGAL_DISPLAY_PARAMS = ["messages", "api_key", "prompt", "input"] +ILLEGAL_DISPLAY_PARAMS = [ + "messages", + "api_key", + "prompt", + "input", + "vertex_credentials", + "aws_access_key_id", + "aws_secret_access_key", +] MINIMAL_DISPLAY_PARAMS = ["model", "mode_error"] diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index 08baf78d4..fd18fbac9 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -608,9 +608,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): global_max_parallel_requests = _metadata.get( "global_max_parallel_requests", None ) - user_api_key = ( - kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None) - ) + user_api_key = _metadata.get("user_api_key", None) self.print_verbose(f"user_api_key: {user_api_key}") if user_api_key is None: return diff --git a/litellm/proxy/route_llm_request.py b/litellm/proxy/route_llm_request.py index 361c5be0c..41da68b51 100644 --- a/litellm/proxy/route_llm_request.py +++ b/litellm/proxy/route_llm_request.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any, Literal, Optional, Union +from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union from fastapi import ( Depends, @@ -37,6 +37,14 @@ ROUTE_ENDPOINT_MAPPING = { } +class ProxyModelNotFoundError(HTTPException): + def __init__(self, route: str, model_name: str): + detail = { + "error": f"{route}: Invalid model name passed in model={model_name}. Call `/v1/models` to view available models for your key." + } + super().__init__(status_code=status.HTTP_400_BAD_REQUEST, detail=detail) + + async def route_request( data: dict, llm_router: Optional[LitellmRouter], @@ -110,10 +118,7 @@ async def route_request( # if no route found then it's a bad request route_name = ROUTE_ENDPOINT_MAPPING.get(route_type, route_type) - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail={ - "error": f"{route_name}: Invalid model name passed in model=" - + data.get("model", "") - }, + raise ProxyModelNotFoundError( + route=route_name, + model_name=data.get("model", ""), )