Litellm dev 02 07 2025 p2 (#8377)

* fix(caching_routes.py): mask redis password on `/cache/ping` route * fix(caching_routes.py): fix linting erro * fix(caching_routes.py): fix linting error on caching routes * fix: fix test - ignore mask_dict - has a breakpoint * fix(azure.py): add timeout param + elapsed time in azure timeout error * fix(http_handler.py): add elapsed time to http timeout request makes it easier to debug how long request took before failing
2025-04-26 11:14:04 +00:00 · 2025-02-07 17:30:38 -08:00 · 2025-02-07 17:30:38 -08:00 · 7363b072c1
commit 7363b072c1
parent ff2529a994
7 changed files with 126 additions and 25 deletions
--- a/litellm/litellm_core_utils/sensitive_data_masker.py
+++ b/litellm/litellm_core_utils/sensitive_data_masker.py
@ -0,0 +1,81 @@
 from typing import Any, Dict, Optional, Set
 class SensitiveDataMasker:
    def __init__(
        self,
        sensitive_patterns: Optional[Set[str]] = None,
        visible_prefix: int = 4,
        visible_suffix: int = 4,
        mask_char: str = "*",
    ):
        self.sensitive_patterns = sensitive_patterns or {
            "password",
            "secret",
            "key",
            "token",
            "auth",
            "credential",
            "access",
            "private",
            "certificate",
        }
        self.visible_prefix = visible_prefix
        self.visible_suffix = visible_suffix
        self.mask_char = mask_char
    def _mask_value(self, value: str) -> str:
        if not value or len(str(value)) < (self.visible_prefix + self.visible_suffix):
            return value
        value_str = str(value)
        masked_length = len(value_str) - (self.visible_prefix + self.visible_suffix)
        return f"{value_str[:self.visible_prefix]}{self.mask_char * masked_length}{value_str[-self.visible_suffix:]}"
    def is_sensitive_key(self, key: str) -> bool:
        key_lower = str(key).lower()
        result = any(pattern in key_lower for pattern in self.sensitive_patterns)
        return result
    def mask_dict(
        self, data: Dict[str, Any], depth: int = 0, max_depth: int = 10
    ) -> Dict[str, Any]:
        if depth >= max_depth:
            return data
        masked_data: Dict[str, Any] = {}
        for k, v in data.items():
            try:
                if isinstance(v, dict):
                    masked_data[k] = self.mask_dict(v, depth + 1)
                elif hasattr(v, "__dict__") and not isinstance(v, type):
                    masked_data[k] = self.mask_dict(vars(v), depth + 1)
                elif self.is_sensitive_key(k):
                    str_value = str(v) if v is not None else ""
                    masked_data[k] = self._mask_value(str_value)
                else:
                    masked_data[k] = (
                        v if isinstance(v, (int, float, bool, str)) else str(v)
                    )
            except Exception:
                masked_data[k] = "<unable to serialize>"
        return masked_data
 # Usage example:
 """
 masker = SensitiveDataMasker()
 data = {
    "api_key": "sk-1234567890abcdef",
    "redis_password": "very_secret_pass",
    "port": 6379
 }
 masked = masker.mask_dict(data)
 # Result: {
 #    "api_key": "sk-1****cdef",
 #    "redis_password": "very****pass",
 #    "port": 6379
 # }
 """
--- a/litellm/llms/azure/azure.py
+++ b/litellm/llms/azure/azure.py
@ -5,7 +5,7 @@ import time
 from typing import Any, Callable, Dict, List, Literal, Optional, Union
 import httpx  # type: ignore
-from openai import AsyncAzureOpenAI, AzureOpenAI
+from openai import APITimeoutError, AsyncAzureOpenAI, AzureOpenAI
 import litellm
 from litellm.caching.caching import DualCache
@ -305,6 +305,7 @@ class AzureChatCompletion(BaseLLM):
        - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
        - call chat.completions.create by default
        """
        start_time = time.time()
        try:
            raw_response = await azure_client.chat.completions.with_raw_response.create(
                **data, timeout=timeout
@ -313,6 +314,11 @@ class AzureChatCompletion(BaseLLM):
            headers = dict(raw_response.headers)
            response = raw_response.parse()
            return headers, response
        except APITimeoutError as e:
            end_time = time.time()
            time_delta = round(end_time - start_time, 2)
            e.message += f" - timeout value={timeout}, time taken={time_delta} seconds"
            raise e
        except Exception as e:
            raise e
@ -642,6 +648,7 @@ class AzureChatCompletion(BaseLLM):
            )
            raise AzureOpenAIError(status_code=500, message=str(e))
        except Exception as e:
            message = getattr(e, "message", str(e))
            ## LOGGING
            logging_obj.post_call(
                input=data["messages"],
@ -652,7 +659,7 @@ class AzureChatCompletion(BaseLLM):
            if hasattr(e, "status_code"):
                raise e
            else:
-                raise AzureOpenAIError(status_code=500, message=str(e))
+                raise AzureOpenAIError(status_code=500, message=message)
    def streaming(
        self,
@ -797,10 +804,11 @@ class AzureChatCompletion(BaseLLM):
            status_code = getattr(e, "status_code", 500)
            error_headers = getattr(e, "headers", None)
            error_response = getattr(e, "response", None)
            message = getattr(e, "message", str(e))
            if error_headers is None and error_response:
                error_headers = getattr(error_response, "headers", None)
            raise AzureOpenAIError(
-                status_code=status_code, message=str(e), headers=error_headers
+                status_code=status_code, message=message, headers=error_headers
            )
    async def aembedding(
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -1,5 +1,6 @@
 import asyncio
 import os
 import time
 from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union
 import httpx
@ -179,6 +180,7 @@ class AsyncHTTPHandler:
        stream: bool = False,
        logging_obj: Optional[LiteLLMLoggingObject] = None,
    ):
        start_time = time.time()
        try:
            if timeout is None:
                timeout = self.timeout
@ -207,6 +209,8 @@ class AsyncHTTPHandler:
            finally:
                await new_client.aclose()
        except httpx.TimeoutException as e:
            end_time = time.time()
            time_delta = round(end_time - start_time, 3)
            headers = {}
            error_response = getattr(e, "response", None)
            if error_response is not None:
@ -214,7 +218,7 @@ class AsyncHTTPHandler:
                    headers["response_headers-{}".format(key)] = value
            raise litellm.Timeout(
-                message=f"Connection timed out after {timeout} seconds.",
+                message=f"Connection timed out. Timeout passed={timeout}, time taken={time_delta} seconds",
                model="default-model-name",
                llm_provider="litellm-httpx-handler",
                headers=headers,
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -5,6 +5,12 @@ model_list:
  - model_name: gpt-4
    litellm_params:
      model: gpt-3.5-turbo
  - model_name: azure-gpt-35-turbo
    litellm_params:
      model: azure/chatgpt-v-2
      api_key: os.environ/AZURE_API_KEY
      api_base: os.environ/AZURE_API_BASE
      timeout: 0.000000001
  - model_name: o3-mini
    litellm_params:
      model: o3-mini
@ -12,7 +18,7 @@ model_list:
  - model_name: anthropic-claude
    litellm_params:
      model: claude-3-5-haiku-20241022
-      mock_response: Hi!
+      timeout: 0.000000001
  - model_name: groq/*
    litellm_params:
      model: groq/*
@ -28,16 +34,11 @@ model_list:
      api_key: fake-key
      api_base: https://exampleopenaiendpoint-production.up.railway.app/
-general_settings:
+litellm_settings:
-  enable_jwt_auth: True
+  cache: true
-  litellm_jwtauth:
+      
-    team_id_jwt_field: "client_id"
+
-    team_id_upsert: true
+router_settings:
-    scope_mappings:
+  redis_host: os.environ/REDIS_HOST
-      - scope: litellm.api.consumer
+  redis_password: os.environ/REDIS_PASSWORD
-        models: ["anthropic-claude"]
+  redis_port: os.environ/REDIS_PORT
        routes: ["/v1/chat/completions"]
      - scope: litellm.api.gpt_3_5_turbo
        models: ["gpt-3.5-turbo-testing"]
    enforce_scope_based_access: true
    enforce_rbac: true
--- a/litellm/proxy/caching_routes.py
+++ b/litellm/proxy/caching_routes.py
@ -1,12 +1,15 @@
-import copy
+from typing import Any, Dict
 from fastapi import APIRouter, Depends, HTTPException, Request
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import RedisCache
 from litellm.litellm_core_utils.sensitive_data_masker import SensitiveDataMasker
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
 masker = SensitiveDataMasker()
 router = APIRouter(
    prefix="/cache",
    tags=["caching"],
@ -21,27 +24,30 @@ async def cache_ping():
    """
    Endpoint for checking if cache can be pinged
    """
-    litellm_cache_params = {}
+    litellm_cache_params: Dict[str, Any] = {}
-    specific_cache_params = {}
+    specific_cache_params: Dict[str, Any] = {}
    try:
        if litellm.cache is None:
            raise HTTPException(
                status_code=503, detail="Cache not initialized. litellm.cache is None"
            )
-
+        litellm_cache_params = {}
        specific_cache_params = {}
        for k, v in vars(litellm.cache).items():
            try:
                if k == "cache":
                    continue
-                litellm_cache_params[k] = str(copy.deepcopy(v))
+                litellm_cache_params[k] = v
            except Exception:
                litellm_cache_params[k] = "<unable to copy or convert>"
        for k, v in vars(litellm.cache.cache).items():
            try:
-                specific_cache_params[k] = str(v)
+                specific_cache_params[k] = v
            except Exception:
                specific_cache_params[k] = "<unable to copy or convert>"
        litellm_cache_params = masker.mask_dict(litellm_cache_params)
        specific_cache_params = masker.mask_dict(specific_cache_params)
        if litellm.cache.type == "redis":
            # ping the redis cache
            ping_response = await litellm.cache.ping()
@ -56,6 +62,7 @@ async def cache_ping():
                messages=[{"role": "user", "content": "test from litellm"}],
            )
            verbose_proxy_logger.debug("/cache/ping: done with set_cache()")
            return {
                "status": "healthy",
                "cache_type": litellm.cache.type,
--- a/tests/code_coverage_tests/recursive_detector.py
+++ b/tests/code_coverage_tests/recursive_detector.py
@ -13,6 +13,7 @@ IGNORE_FUNCTIONS = [
    "add_object_type",
    "strip_field",
    "_transform_prompt",
    "mask_dict",
 ]