Litellm dev 02 07 2025 p2 (#8377)

* fix(caching_routes.py): mask redis password on `/cache/ping` route

* fix(caching_routes.py): fix linting erro

* fix(caching_routes.py): fix linting error on caching routes

* fix: fix test - ignore mask_dict - has a breakpoint

* fix(azure.py): add timeout param + elapsed time in azure timeout error

* fix(http_handler.py): add elapsed time to http timeout request

makes it easier to debug how long request took before failing
This commit is contained in:
Krish Dholakia 2025-02-07 17:30:38 -08:00 committed by GitHub
parent ff2529a994
commit 7363b072c1
7 changed files with 126 additions and 25 deletions

View file

@ -0,0 +1,81 @@
from typing import Any, Dict, Optional, Set
class SensitiveDataMasker:
def __init__(
self,
sensitive_patterns: Optional[Set[str]] = None,
visible_prefix: int = 4,
visible_suffix: int = 4,
mask_char: str = "*",
):
self.sensitive_patterns = sensitive_patterns or {
"password",
"secret",
"key",
"token",
"auth",
"credential",
"access",
"private",
"certificate",
}
self.visible_prefix = visible_prefix
self.visible_suffix = visible_suffix
self.mask_char = mask_char
def _mask_value(self, value: str) -> str:
if not value or len(str(value)) < (self.visible_prefix + self.visible_suffix):
return value
value_str = str(value)
masked_length = len(value_str) - (self.visible_prefix + self.visible_suffix)
return f"{value_str[:self.visible_prefix]}{self.mask_char * masked_length}{value_str[-self.visible_suffix:]}"
def is_sensitive_key(self, key: str) -> bool:
key_lower = str(key).lower()
result = any(pattern in key_lower for pattern in self.sensitive_patterns)
return result
def mask_dict(
self, data: Dict[str, Any], depth: int = 0, max_depth: int = 10
) -> Dict[str, Any]:
if depth >= max_depth:
return data
masked_data: Dict[str, Any] = {}
for k, v in data.items():
try:
if isinstance(v, dict):
masked_data[k] = self.mask_dict(v, depth + 1)
elif hasattr(v, "__dict__") and not isinstance(v, type):
masked_data[k] = self.mask_dict(vars(v), depth + 1)
elif self.is_sensitive_key(k):
str_value = str(v) if v is not None else ""
masked_data[k] = self._mask_value(str_value)
else:
masked_data[k] = (
v if isinstance(v, (int, float, bool, str)) else str(v)
)
except Exception:
masked_data[k] = "<unable to serialize>"
return masked_data
# Usage example:
"""
masker = SensitiveDataMasker()
data = {
"api_key": "sk-1234567890abcdef",
"redis_password": "very_secret_pass",
"port": 6379
}
masked = masker.mask_dict(data)
# Result: {
# "api_key": "sk-1****cdef",
# "redis_password": "very****pass",
# "port": 6379
# }
"""

View file

@ -5,7 +5,7 @@ import time
from typing import Any, Callable, Dict, List, Literal, Optional, Union from typing import Any, Callable, Dict, List, Literal, Optional, Union
import httpx # type: ignore import httpx # type: ignore
from openai import AsyncAzureOpenAI, AzureOpenAI from openai import APITimeoutError, AsyncAzureOpenAI, AzureOpenAI
import litellm import litellm
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
@ -305,6 +305,7 @@ class AzureChatCompletion(BaseLLM):
- call chat.completions.create.with_raw_response when litellm.return_response_headers is True - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
- call chat.completions.create by default - call chat.completions.create by default
""" """
start_time = time.time()
try: try:
raw_response = await azure_client.chat.completions.with_raw_response.create( raw_response = await azure_client.chat.completions.with_raw_response.create(
**data, timeout=timeout **data, timeout=timeout
@ -313,6 +314,11 @@ class AzureChatCompletion(BaseLLM):
headers = dict(raw_response.headers) headers = dict(raw_response.headers)
response = raw_response.parse() response = raw_response.parse()
return headers, response return headers, response
except APITimeoutError as e:
end_time = time.time()
time_delta = round(end_time - start_time, 2)
e.message += f" - timeout value={timeout}, time taken={time_delta} seconds"
raise e
except Exception as e: except Exception as e:
raise e raise e
@ -642,6 +648,7 @@ class AzureChatCompletion(BaseLLM):
) )
raise AzureOpenAIError(status_code=500, message=str(e)) raise AzureOpenAIError(status_code=500, message=str(e))
except Exception as e: except Exception as e:
message = getattr(e, "message", str(e))
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
input=data["messages"], input=data["messages"],
@ -652,7 +659,7 @@ class AzureChatCompletion(BaseLLM):
if hasattr(e, "status_code"): if hasattr(e, "status_code"):
raise e raise e
else: else:
raise AzureOpenAIError(status_code=500, message=str(e)) raise AzureOpenAIError(status_code=500, message=message)
def streaming( def streaming(
self, self,
@ -797,10 +804,11 @@ class AzureChatCompletion(BaseLLM):
status_code = getattr(e, "status_code", 500) status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None) error_headers = getattr(e, "headers", None)
error_response = getattr(e, "response", None) error_response = getattr(e, "response", None)
message = getattr(e, "message", str(e))
if error_headers is None and error_response: if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None) error_headers = getattr(error_response, "headers", None)
raise AzureOpenAIError( raise AzureOpenAIError(
status_code=status_code, message=str(e), headers=error_headers status_code=status_code, message=message, headers=error_headers
) )
async def aembedding( async def aembedding(

View file

@ -1,5 +1,6 @@
import asyncio import asyncio
import os import os
import time
from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union
import httpx import httpx
@ -179,6 +180,7 @@ class AsyncHTTPHandler:
stream: bool = False, stream: bool = False,
logging_obj: Optional[LiteLLMLoggingObject] = None, logging_obj: Optional[LiteLLMLoggingObject] = None,
): ):
start_time = time.time()
try: try:
if timeout is None: if timeout is None:
timeout = self.timeout timeout = self.timeout
@ -207,6 +209,8 @@ class AsyncHTTPHandler:
finally: finally:
await new_client.aclose() await new_client.aclose()
except httpx.TimeoutException as e: except httpx.TimeoutException as e:
end_time = time.time()
time_delta = round(end_time - start_time, 3)
headers = {} headers = {}
error_response = getattr(e, "response", None) error_response = getattr(e, "response", None)
if error_response is not None: if error_response is not None:
@ -214,7 +218,7 @@ class AsyncHTTPHandler:
headers["response_headers-{}".format(key)] = value headers["response_headers-{}".format(key)] = value
raise litellm.Timeout( raise litellm.Timeout(
message=f"Connection timed out after {timeout} seconds.", message=f"Connection timed out. Timeout passed={timeout}, time taken={time_delta} seconds",
model="default-model-name", model="default-model-name",
llm_provider="litellm-httpx-handler", llm_provider="litellm-httpx-handler",
headers=headers, headers=headers,

File diff suppressed because one or more lines are too long

View file

@ -5,6 +5,12 @@ model_list:
- model_name: gpt-4 - model_name: gpt-4
litellm_params: litellm_params:
model: gpt-3.5-turbo model: gpt-3.5-turbo
- model_name: azure-gpt-35-turbo
litellm_params:
model: azure/chatgpt-v-2
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE
timeout: 0.000000001
- model_name: o3-mini - model_name: o3-mini
litellm_params: litellm_params:
model: o3-mini model: o3-mini
@ -12,7 +18,7 @@ model_list:
- model_name: anthropic-claude - model_name: anthropic-claude
litellm_params: litellm_params:
model: claude-3-5-haiku-20241022 model: claude-3-5-haiku-20241022
mock_response: Hi! timeout: 0.000000001
- model_name: groq/* - model_name: groq/*
litellm_params: litellm_params:
model: groq/* model: groq/*
@ -28,16 +34,11 @@ model_list:
api_key: fake-key api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/ api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings: litellm_settings:
enable_jwt_auth: True cache: true
litellm_jwtauth:
team_id_jwt_field: "client_id"
team_id_upsert: true router_settings:
scope_mappings: redis_host: os.environ/REDIS_HOST
- scope: litellm.api.consumer redis_password: os.environ/REDIS_PASSWORD
models: ["anthropic-claude"] redis_port: os.environ/REDIS_PORT
routes: ["/v1/chat/completions"]
- scope: litellm.api.gpt_3_5_turbo
models: ["gpt-3.5-turbo-testing"]
enforce_scope_based_access: true
enforce_rbac: true

View file

@ -1,12 +1,15 @@
import copy from typing import Any, Dict
from fastapi import APIRouter, Depends, HTTPException, Request from fastapi import APIRouter, Depends, HTTPException, Request
import litellm import litellm
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.caching.caching import RedisCache from litellm.caching.caching import RedisCache
from litellm.litellm_core_utils.sensitive_data_masker import SensitiveDataMasker
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
masker = SensitiveDataMasker()
router = APIRouter( router = APIRouter(
prefix="/cache", prefix="/cache",
tags=["caching"], tags=["caching"],
@ -21,27 +24,30 @@ async def cache_ping():
""" """
Endpoint for checking if cache can be pinged Endpoint for checking if cache can be pinged
""" """
litellm_cache_params = {} litellm_cache_params: Dict[str, Any] = {}
specific_cache_params = {} specific_cache_params: Dict[str, Any] = {}
try: try:
if litellm.cache is None: if litellm.cache is None:
raise HTTPException( raise HTTPException(
status_code=503, detail="Cache not initialized. litellm.cache is None" status_code=503, detail="Cache not initialized. litellm.cache is None"
) )
litellm_cache_params = {}
specific_cache_params = {}
for k, v in vars(litellm.cache).items(): for k, v in vars(litellm.cache).items():
try: try:
if k == "cache": if k == "cache":
continue continue
litellm_cache_params[k] = str(copy.deepcopy(v)) litellm_cache_params[k] = v
except Exception: except Exception:
litellm_cache_params[k] = "<unable to copy or convert>" litellm_cache_params[k] = "<unable to copy or convert>"
for k, v in vars(litellm.cache.cache).items(): for k, v in vars(litellm.cache.cache).items():
try: try:
specific_cache_params[k] = str(v) specific_cache_params[k] = v
except Exception: except Exception:
specific_cache_params[k] = "<unable to copy or convert>" specific_cache_params[k] = "<unable to copy or convert>"
litellm_cache_params = masker.mask_dict(litellm_cache_params)
specific_cache_params = masker.mask_dict(specific_cache_params)
if litellm.cache.type == "redis": if litellm.cache.type == "redis":
# ping the redis cache # ping the redis cache
ping_response = await litellm.cache.ping() ping_response = await litellm.cache.ping()
@ -56,6 +62,7 @@ async def cache_ping():
messages=[{"role": "user", "content": "test from litellm"}], messages=[{"role": "user", "content": "test from litellm"}],
) )
verbose_proxy_logger.debug("/cache/ping: done with set_cache()") verbose_proxy_logger.debug("/cache/ping: done with set_cache()")
return { return {
"status": "healthy", "status": "healthy",
"cache_type": litellm.cache.type, "cache_type": litellm.cache.type,

View file

@ -13,6 +13,7 @@ IGNORE_FUNCTIONS = [
"add_object_type", "add_object_type",
"strip_field", "strip_field",
"_transform_prompt", "_transform_prompt",
"mask_dict",
] ]