From dad1ad20773618fe6a5cc031f66965292e1c8dcb Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 14 Sep 2024 10:32:39 -0700
Subject: [PATCH] LiteLLM Minor Fixes and Improvements (09/14/2024)  (#5697)

* fix(health_check.py): hide sensitive keys from health check debug information k

* fix(route_llm_request.py): fix proxy model not found error message to indicate how to resolve issue

* fix(vertex_llm_base.py): fix exception message to not log credentials
---
 .../vertex_llm_base.py                        | 12 +++++++++---
 litellm/proxy/README.md                       | 12 ++++++++++++
 litellm/proxy/health_check.py                 | 10 +++++++++-
 .../proxy/hooks/parallel_request_limiter.py   |  4 +---
 litellm/proxy/route_llm_request.py            | 19 ++++++++++++-------
 5 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_llm_base.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_llm_base.py
index 195b29436..dbd19b8c3 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_llm_base.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_llm_base.py
@@ -255,9 +255,15 @@ class VertexBase(BaseLLM):
                 return self.access_token, self.project_id
 
         if not self._credentials:
-            self._credentials, cred_project_id = await asyncify(self.load_auth)(
-                credentials=credentials, project_id=project_id
-            )
+            try:
+                self._credentials, cred_project_id = await asyncify(self.load_auth)(
+                    credentials=credentials, project_id=project_id
+                )
+            except Exception:
+                verbose_logger.exception(
+                    "Failed to load vertex credentials. Check to see if credentials containing partial/invalid information."
+                )
+                raise
             if not self.project_id:
                 self.project_id = project_id or cred_project_id
         else:
diff --git a/litellm/proxy/README.md b/litellm/proxy/README.md
index 92df6026c..552c9777b 100644
--- a/litellm/proxy/README.md
+++ b/litellm/proxy/README.md
@@ -29,3 +29,15 @@ print(response)
 ``` 
 
 [**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/simple_proxy)
+
+
+---
+
+### Folder Structure
+
+**Routes**
+- `proxy_server.py` - all openai-compatible routes - `/v1/chat/completion`, `/v1/embedding` + model info routes - `/v1/models`, `/v1/model/info`, `/v1/model_group_info` routes.
+- `health_endpoints/` - `/health`, `/health/liveliness`, `/health/readiness`
+- `management_endpoints/key_management_endpoints.py` - all `/key/*` routes
+- `management_endpoints/team_endpoints.py` - all `/team/*` routes
+- `management_endpoints/internal_user_endpoints.py` - all `/user/*` routes
\ No newline at end of file
diff --git a/litellm/proxy/health_check.py b/litellm/proxy/health_check.py
index 215d2d8d6..596648638 100644
--- a/litellm/proxy/health_check.py
+++ b/litellm/proxy/health_check.py
@@ -11,7 +11,15 @@ from litellm._logging import print_verbose
 logger = logging.getLogger(__name__)
 
 
-ILLEGAL_DISPLAY_PARAMS = ["messages", "api_key", "prompt", "input"]
+ILLEGAL_DISPLAY_PARAMS = [
+    "messages",
+    "api_key",
+    "prompt",
+    "input",
+    "vertex_credentials",
+    "aws_access_key_id",
+    "aws_secret_access_key",
+]
 
 MINIMAL_DISPLAY_PARAMS = ["model", "mode_error"]
 
diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py
index 08baf78d4..fd18fbac9 100644
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@@ -608,9 +608,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
             global_max_parallel_requests = _metadata.get(
                 "global_max_parallel_requests", None
             )
-            user_api_key = (
-                kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None)
-            )
+            user_api_key = _metadata.get("user_api_key", None)
             self.print_verbose(f"user_api_key: {user_api_key}")
             if user_api_key is None:
                 return
diff --git a/litellm/proxy/route_llm_request.py b/litellm/proxy/route_llm_request.py
index 361c5be0c..41da68b51 100644
--- a/litellm/proxy/route_llm_request.py
+++ b/litellm/proxy/route_llm_request.py
@@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Any, Literal, Optional, Union
+from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
 
 from fastapi import (
     Depends,
@@ -37,6 +37,14 @@ ROUTE_ENDPOINT_MAPPING = {
 }
 
 
+class ProxyModelNotFoundError(HTTPException):
+    def __init__(self, route: str, model_name: str):
+        detail = {
+            "error": f"{route}: Invalid model name passed in model={model_name}. Call `/v1/models` to view available models for your key."
+        }
+        super().__init__(status_code=status.HTTP_400_BAD_REQUEST, detail=detail)
+
+
 async def route_request(
     data: dict,
     llm_router: Optional[LitellmRouter],
@@ -110,10 +118,7 @@ async def route_request(
 
     # if no route found then it's a bad request
     route_name = ROUTE_ENDPOINT_MAPPING.get(route_type, route_type)
-    raise HTTPException(
-        status_code=status.HTTP_400_BAD_REQUEST,
-        detail={
-            "error": f"{route_name}: Invalid model name passed in model="
-            + data.get("model", "")
-        },
+    raise ProxyModelNotFoundError(
+        route=route_name,
+        model_name=data.get("model", ""),
     )