fix(_logging.py): add sensitive data filter to logging

Fixes https://github.com/BerriAI/litellm/issues/7603

Also addresses https://github.com/BerriAI/litellm/issues/9815#issuecomment-2806844725
This commit is contained in:
Krrish Dholakia 2025-04-16 17:27:35 -07:00
parent f661dd7776
commit d2ad3bbfc0
3 changed files with 162 additions and 1 deletions

View file

@ -1,6 +1,7 @@
import json
import logging
import os
import re
import sys
from datetime import datetime
from logging import Formatter
@ -41,6 +42,54 @@ class JsonFormatter(Formatter):
return json.dumps(json_record)
class SensitiveDataFilter(logging.Filter):
"""Filter to redact sensitive information from logs"""
SENSITIVE_KEYS = [
"credentials",
"api_key",
"key",
"api_base",
"password",
"secret",
"token",
]
def filter(self, record):
if not hasattr(record, "msg") or not record.msg:
return True
# Convert message to string if it's not already
msg = str(record.msg)
key_pattern = r'["\']?([^"\':\s]+)["\']?\s*[:=]'
keys = re.findall(key_pattern, msg)
# Redact sensitive information
for key in keys:
# Check if any sensitive key is a substring of the current key
if any(
sensitive_key in key.lower() for sensitive_key in self.SENSITIVE_KEYS
):
# Handle JSON-like strings
pattern = f'"{key}":\\s*"[^"]*"'
msg = re.sub(pattern, f'"{key}": "REDACTED"', msg)
# Handle key-value pairs in plain text
pattern = f"{key}\\s*=\\s*[^\\s,}}]+"
msg = re.sub(pattern, f"{key}=REDACTED", msg)
# Handle dictionary-like strings
pattern = f"'{key}':\\s*'[^']*'"
msg = re.sub(pattern, f"'{key}': 'REDACTED'", msg)
pattern = f"\"{key}\":\\s*'[^']*'"
msg = re.sub(pattern, f"\"{key}\": 'REDACTED'", msg)
record.msg = msg
return True
# Function to set up exception handlers for JSON logging
def _setup_json_exception_handlers(formatter):
# Create a handler with JSON formatting for exceptions
@ -103,6 +152,12 @@ verbose_proxy_logger = logging.getLogger("LiteLLM Proxy")
verbose_router_logger = logging.getLogger("LiteLLM Router")
verbose_logger = logging.getLogger("LiteLLM")
# Add the sensitive data filter to all loggers
sensitive_filter = SensitiveDataFilter()
verbose_router_logger.addFilter(sensitive_filter)
verbose_proxy_logger.addFilter(sensitive_filter)
verbose_logger.addFilter(sensitive_filter)
# Add the handler to the logger
verbose_router_logger.addHandler(handler)
verbose_proxy_logger.addHandler(handler)

View file

@ -28,13 +28,20 @@ model_list:
api_base: https://krris-m2f9a9i7-eastus2.openai.azure.com/
model_info:
base_model: azure/gpt-4o-realtime-preview-2024-10-01
- model_name: "vertex_ai/gemini-1.5-pro-001"
litellm_params:
model: vertex_ai/gemini-1.5-pro-001
vertex_credentials: {"project_id": "krris-m2f9a9i7", "location": "us-central1"}
api_base: https://us-central1-aiplatform.googleapis.com/v1
litellm_settings:
num_retries: 0
callbacks: ["prometheus"]
check_provider_endpoint: true
router_settings:
routing_strategy: "usage-based-routing-v2"
files_settings:
- custom_llm_provider: gemini
api_key: os.environ/GEMINI_API_KEY

View file

@ -0,0 +1,99 @@
import logging
import pytest
from litellm._logging import SensitiveDataFilter
def test_sensitive_data_filter():
# Create a test logger
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
# Create a filter
sensitive_filter = SensitiveDataFilter()
# Test cases
test_cases = [
{
"input": '{"vertex_credentials": {"project_id": "test-project", "location": "us-central1", "private_key": "test-private-key"}}',
"expected": '{"vertex_credentials": {"project_id": "test-project", "location": "us-central1", "private_key": "REDACTED"}}',
},
{
"input": '{"api_key": "sk-1234567890"}',
"expected": '{"api_key": "REDACTED"}',
},
{
"input": '{"openai_api_key": "sk-1234567890"}',
"expected": '{"openai_api_key": "REDACTED"}',
},
{"input": '{"password": "secret123"}', "expected": '{"password": "REDACTED"}'},
{"input": '{"token": "abc123"}', "expected": '{"token": "REDACTED"}'},
{
"input": '{"api_base": "https://api.example.com"}',
"expected": '{"api_base": "REDACTED"}',
},
{
"input": '{"non_sensitive": "value", "credentials": "secret"}',
"expected": '{"non_sensitive": "value", "credentials": "REDACTED"}',
},
]
for test_case in test_cases:
# Create a log record
record = logging.LogRecord(
name="test_logger",
level=logging.INFO,
pathname="test.py",
lineno=1,
msg=test_case["input"],
args=(),
exc_info=None,
)
# Apply the filter
sensitive_filter.filter(record)
# Verify the output
assert (
record.msg == test_case["expected"]
), f"Failed for input: {test_case['input']}"
def test_sensitive_data_filter_with_different_formats():
# Create a test logger
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
# Create a filter
sensitive_filter = SensitiveDataFilter()
# Test different formats
test_cases = [
{"input": "api_key=sk-1234567890", "expected": "api_key=REDACTED"},
{
"input": "'credentials': 'secret123'",
"expected": "'credentials': 'REDACTED'",
},
{"input": "\"token\": 'abc123'", "expected": "\"token\": 'REDACTED'"},
]
for test_case in test_cases:
# Create a log record
record = logging.LogRecord(
name="test_logger",
level=logging.INFO,
pathname="test.py",
lineno=1,
msg=test_case["input"],
args=(),
exc_info=None,
)
# Apply the filter
sensitive_filter.filter(record)
# Verify the output
assert (
record.msg == test_case["expected"]
), f"Failed for input: {test_case['input']}"