Litellm dev 02 07 2025 p2 (#8377)

* fix(caching_routes.py): mask redis password on `/cache/ping` route * fix(caching_routes.py): fix linting erro * fix(caching_routes.py): fix linting error on caching routes * fix: fix test - ignore mask_dict - has a breakpoint * fix(azure.py): add timeout param + elapsed time in azure timeout error * fix(http_handler.py): add elapsed time to http timeout request makes it easier to debug how long request took before failing
2025-04-26 11:14:04 +00:00 · 2025-02-07 17:30:38 -08:00 · 2025-02-07 17:30:38 -08:00 · 7363b072c1
commit 7363b072c1
parent ff2529a994
7 changed files with 126 additions and 25 deletions
--- a/litellm/litellm_core_utils/sensitive_data_masker.py
+++ b/litellm/litellm_core_utils/sensitive_data_masker.py
@ -0,0 +1,81 @@
+from typing import Any, Dict, Optional, Set
+
+
+class SensitiveDataMasker:
+    def __init__(
+        self,
+        sensitive_patterns: Optional[Set[str]] = None,
+        visible_prefix: int = 4,
+        visible_suffix: int = 4,
+        mask_char: str = "*",
+    ):
+        self.sensitive_patterns = sensitive_patterns or {
+            "password",
+            "secret",
+            "key",
+            "token",
+            "auth",
+            "credential",
+            "access",
+            "private",
+            "certificate",
+        }
+
+        self.visible_prefix = visible_prefix
+        self.visible_suffix = visible_suffix
+        self.mask_char = mask_char
+
+    def _mask_value(self, value: str) -> str:
+        if not value or len(str(value)) < (self.visible_prefix + self.visible_suffix):
+            return value
+
+        value_str = str(value)
+        masked_length = len(value_str) - (self.visible_prefix + self.visible_suffix)
+        return f"{value_str[:self.visible_prefix]}{self.mask_char * masked_length}{value_str[-self.visible_suffix:]}"
+
+    def is_sensitive_key(self, key: str) -> bool:
+        key_lower = str(key).lower()
+        result = any(pattern in key_lower for pattern in self.sensitive_patterns)
+        return result
+
+    def mask_dict(
+        self, data: Dict[str, Any], depth: int = 0, max_depth: int = 10
+    ) -> Dict[str, Any]:
+        if depth >= max_depth:
+            return data
+
+        masked_data: Dict[str, Any] = {}
+        for k, v in data.items():
+            try:
+                if isinstance(v, dict):
+                    masked_data[k] = self.mask_dict(v, depth + 1)
+                elif hasattr(v, "__dict__") and not isinstance(v, type):
+                    masked_data[k] = self.mask_dict(vars(v), depth + 1)
+                elif self.is_sensitive_key(k):
+                    str_value = str(v) if v is not None else ""
+                    masked_data[k] = self._mask_value(str_value)
+                else:
+                    masked_data[k] = (
+                        v if isinstance(v, (int, float, bool, str)) else str(v)
+                    )
+            except Exception:
+                masked_data[k] = "<unable to serialize>"
+
+        return masked_data
+
+
+# Usage example:
+"""
+masker = SensitiveDataMasker()
+data = {
+    "api_key": "sk-1234567890abcdef",
+    "redis_password": "very_secret_pass",
+    "port": 6379
+}
+masked = masker.mask_dict(data)
+# Result: {
+#    "api_key": "sk-1****cdef",
+#    "redis_password": "very****pass",
+#    "port": 6379
+# }
+"""
--- a/litellm/llms/azure/azure.py
+++ b/litellm/llms/azure/azure.py
@ -5,7 +5,7 @@ import time
 from typing import Any, Callable, Dict, List, Literal, Optional, Union

 import httpx  # type: ignore
-from openai import AsyncAzureOpenAI, AzureOpenAI
+from openai import APITimeoutError, AsyncAzureOpenAI, AzureOpenAI

 import litellm
 from litellm.caching.caching import DualCache
@ -305,6 +305,7 @@ class AzureChatCompletion(BaseLLM):
        - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
        - call chat.completions.create by default
        """
+        start_time = time.time()
        try:
            raw_response = await azure_client.chat.completions.with_raw_response.create(
                **data, timeout=timeout
@ -313,6 +314,11 @@ class AzureChatCompletion(BaseLLM):
            headers = dict(raw_response.headers)
            response = raw_response.parse()
            return headers, response
+        except APITimeoutError as e:
+            end_time = time.time()
+            time_delta = round(end_time - start_time, 2)
+            e.message += f" - timeout value={timeout}, time taken={time_delta} seconds"
+            raise e
        except Exception as e:
            raise e

@ -642,6 +648,7 @@ class AzureChatCompletion(BaseLLM):
            )
            raise AzureOpenAIError(status_code=500, message=str(e))
        except Exception as e:
+            message = getattr(e, "message", str(e))
            ## LOGGING
            logging_obj.post_call(
                input=data["messages"],
@ -652,7 +659,7 @@ class AzureChatCompletion(BaseLLM):
            if hasattr(e, "status_code"):
                raise e
            else:
-                raise AzureOpenAIError(status_code=500, message=str(e))
+                raise AzureOpenAIError(status_code=500, message=message)

    def streaming(
        self,
@ -797,10 +804,11 @@ class AzureChatCompletion(BaseLLM):
            status_code = getattr(e, "status_code", 500)
            error_headers = getattr(e, "headers", None)
            error_response = getattr(e, "response", None)
+            message = getattr(e, "message", str(e))
            if error_headers is None and error_response:
                error_headers = getattr(error_response, "headers", None)
            raise AzureOpenAIError(
-                status_code=status_code, message=str(e), headers=error_headers
+                status_code=status_code, message=message, headers=error_headers
            )

    async def aembedding(
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -1,5 +1,6 @@
 import asyncio
 import os
+import time
 from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union

 import httpx
@ -179,6 +180,7 @@ class AsyncHTTPHandler:
        stream: bool = False,
        logging_obj: Optional[LiteLLMLoggingObject] = None,
    ):
+        start_time = time.time()
        try:
            if timeout is None:
                timeout = self.timeout
@ -207,6 +209,8 @@ class AsyncHTTPHandler:
            finally:
                await new_client.aclose()
        except httpx.TimeoutException as e:
+            end_time = time.time()
+            time_delta = round(end_time - start_time, 3)
            headers = {}
            error_response = getattr(e, "response", None)
            if error_response is not None:
@ -214,7 +218,7 @@ class AsyncHTTPHandler:
                    headers["response_headers-{}".format(key)] = value

            raise litellm.Timeout(
-                message=f"Connection timed out after {timeout} seconds.",
+                message=f"Connection timed out. Timeout passed={timeout}, time taken={time_delta} seconds",
                model="default-model-name",
                llm_provider="litellm-httpx-handler",
                headers=headers,
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -5,6 +5,12 @@ model_list:
  - model_name: gpt-4
    litellm_params:
      model: gpt-3.5-turbo
+  - model_name: azure-gpt-35-turbo
+    litellm_params:
+      model: azure/chatgpt-v-2
+      api_key: os.environ/AZURE_API_KEY
+      api_base: os.environ/AZURE_API_BASE
+      timeout: 0.000000001
  - model_name: o3-mini
    litellm_params:
      model: o3-mini
@ -12,7 +18,7 @@ model_list:
  - model_name: anthropic-claude
    litellm_params:
      model: claude-3-5-haiku-20241022
-      mock_response: Hi!
+      timeout: 0.000000001
  - model_name: groq/*
    litellm_params:
      model: groq/*
@ -28,16 +34,11 @@ model_list:
      api_key: fake-key
      api_base: https://exampleopenaiendpoint-production.up.railway.app/

-general_settings:
-  enable_jwt_auth: True
-  litellm_jwtauth:
-    team_id_jwt_field: "client_id"
-    team_id_upsert: true
-    scope_mappings:
-      - scope: litellm.api.consumer
-        models: ["anthropic-claude"]
-        routes: ["/v1/chat/completions"]
-      - scope: litellm.api.gpt_3_5_turbo
-        models: ["gpt-3.5-turbo-testing"]
-    enforce_scope_based_access: true
-    enforce_rbac: true
+litellm_settings:
+  cache: true
+      
+
+router_settings:
+  redis_host: os.environ/REDIS_HOST
+  redis_password: os.environ/REDIS_PASSWORD
+  redis_port: os.environ/REDIS_PORT
--- a/litellm/proxy/caching_routes.py
+++ b/litellm/proxy/caching_routes.py
@ -1,12 +1,15 @@
-import copy
+from typing import Any, Dict

 from fastapi import APIRouter, Depends, HTTPException, Request

 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import RedisCache
+from litellm.litellm_core_utils.sensitive_data_masker import SensitiveDataMasker
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth

+masker = SensitiveDataMasker()
+
 router = APIRouter(
    prefix="/cache",
    tags=["caching"],
@ -21,27 +24,30 @@ async def cache_ping():
    """
    Endpoint for checking if cache can be pinged
    """
-    litellm_cache_params = {}
-    specific_cache_params = {}
+    litellm_cache_params: Dict[str, Any] = {}
+    specific_cache_params: Dict[str, Any] = {}
    try:

        if litellm.cache is None:
            raise HTTPException(
                status_code=503, detail="Cache not initialized. litellm.cache is None"
            )
-
+        litellm_cache_params = {}
+        specific_cache_params = {}
        for k, v in vars(litellm.cache).items():
            try:
                if k == "cache":
                    continue
-                litellm_cache_params[k] = str(copy.deepcopy(v))
+                litellm_cache_params[k] = v
            except Exception:
                litellm_cache_params[k] = "<unable to copy or convert>"
        for k, v in vars(litellm.cache.cache).items():
            try:
-                specific_cache_params[k] = str(v)
+                specific_cache_params[k] = v
            except Exception:
                specific_cache_params[k] = "<unable to copy or convert>"
+        litellm_cache_params = masker.mask_dict(litellm_cache_params)
+        specific_cache_params = masker.mask_dict(specific_cache_params)
        if litellm.cache.type == "redis":
            # ping the redis cache
            ping_response = await litellm.cache.ping()
@ -56,6 +62,7 @@ async def cache_ping():
                messages=[{"role": "user", "content": "test from litellm"}],
            )
            verbose_proxy_logger.debug("/cache/ping: done with set_cache()")
+
            return {
                "status": "healthy",
                "cache_type": litellm.cache.type,
--- a/tests/code_coverage_tests/recursive_detector.py
+++ b/tests/code_coverage_tests/recursive_detector.py
@ -13,6 +13,7 @@ IGNORE_FUNCTIONS = [
    "add_object_type",
    "strip_field",
    "_transform_prompt",
+    "mask_dict",
 ]