This commit is contained in:
Krish Dholakia 2025-04-19 11:23:54 -07:00 committed by GitHub
commit 8f5fabec89
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 340 additions and 2 deletions

View file

@ -1,6 +1,7 @@
import json
import logging
import os
import re
import sys
from datetime import datetime
from logging import Formatter
@ -41,6 +42,83 @@ class JsonFormatter(Formatter):
return json.dumps(json_record)
class SensitiveDataFilter(logging.Filter):
"""Filter to redact sensitive information from logs"""
SENSITIVE_KEYS = [
"credentials",
"api_key",
"key",
"api_base",
"password",
"secret",
"token",
"private_key", # Added for nested JSON case
]
def filter(self, record):
try:
if not hasattr(record, "msg") or not record.msg:
return True
# If the message is a format string with args, we need to format it first
if record.args:
msg = record.msg % record.args
else:
msg = str(record.msg)
# Redact sensitive information
for key in self.SENSITIVE_KEYS:
# Create patterns for compound keys (e.g., openai_api_key)
key_pattern = f"[a-zA-Z0-9_/\\\\-]*{key}[a-zA-Z0-9_/\\\\-]*"
# Handle JSON-like strings with double quotes
json_pattern = f'"({key_pattern})":\\s*"[^"]*"'
msg = re.sub(
json_pattern, r'"\1": "REDACTED"', msg, flags=re.IGNORECASE
)
# Handle dictionary-like strings with single quotes
dict_pattern = f"'({key_pattern})':\\s*'[^']*'"
msg = re.sub(
dict_pattern, r"'\1': 'REDACTED'", msg, flags=re.IGNORECASE
)
# Handle mixed quote styles
mixed_pattern = f"\"({key_pattern})\":\\s*'[^']*'"
msg = re.sub(
mixed_pattern, r'"\1": \'REDACTED\'', msg, flags=re.IGNORECASE
)
# Handle key-value pairs in plain text
# Convert snake_case and special characters to flexible matching
display_key = key.replace("_", "[-_ ]")
# Match both original and display versions of the key, preserving the separator and spacing
plain_pattern = (
f"\\b({key_pattern}|{display_key})\\s*([:=])\\s*[^,\\s][^,]*"
)
msg = re.sub(
plain_pattern,
lambda m: f"{m.group(1)}{m.group(2)}{' ' if m.group(2) == ':' else ''}REDACTED",
msg,
flags=re.IGNORECASE,
)
# Handle mixed quotes without escaping
msg = msg.replace('\\"', '"').replace("\\'", "'")
# Set the message and clear args since we've already formatted it
record.msg = msg
record.args = None
return True
except Exception as e:
# If any error occurs during filtering, log the error and continue
logging.getLogger("litellm").error(
f"Error in SensitiveDataFilter: {str(e)}"
)
return True
# Function to set up exception handlers for JSON logging
def _setup_json_exception_handlers(formatter):
# Create a handler with JSON formatting for exceptions
@ -103,6 +181,12 @@ verbose_proxy_logger = logging.getLogger("LiteLLM Proxy")
verbose_router_logger = logging.getLogger("LiteLLM Router")
verbose_logger = logging.getLogger("LiteLLM")
# Add the sensitive data filter to all loggers
sensitive_filter = SensitiveDataFilter()
verbose_router_logger.addFilter(sensitive_filter)
verbose_proxy_logger.addFilter(sensitive_filter)
verbose_logger.addFilter(sensitive_filter)
# Add the handler to the logger
verbose_router_logger.addHandler(handler)
verbose_proxy_logger.addHandler(handler)
@ -165,3 +249,36 @@ def _is_debugging_on() -> bool:
if verbose_logger.isEnabledFor(logging.DEBUG) or set_verbose is True:
return True
return False
class ResilientLogger(logging.Logger):
"""A logger that continues to work even if filters fail"""
def handle(self, record):
"""
Handle a record by passing it to all handlers.
If a filter fails, log the error and continue.
"""
if self.disabled:
return
# Try to filter the record
try:
if not self.filter(record):
return
except Exception as e:
# If filter fails, log the error and continue
logging.getLogger("litellm").error(f"Filter failed: {str(e)}")
# If we get here, either filtering passed or failed gracefully
# Now pass to handlers
for handler in self.handlers:
try:
if handler.filter(record):
handler.handle(record)
except Exception as e:
logging.getLogger("litellm").error(f"Handler failed: {str(e)}")
# Replace the default logger class with our resilient one
logging.setLoggerClass(ResilientLogger)

View file

@ -27,13 +27,14 @@ model_list:
api_key: os.environ/AZURE_API_KEY_REALTIME
api_base: https://krris-m2f9a9i7-eastus2.openai.azure.com/
litellm_settings:
num_retries: 0
callbacks: ["prometheus"]
check_provider_endpoint: true
router_settings:
routing_strategy: "usage-based-routing-v2"
files_settings:
- custom_llm_provider: gemini
api_key: os.environ/GEMINI_API_KEY

View file

@ -0,0 +1,220 @@
import logging
import pytest
from litellm._logging import SensitiveDataFilter
def test_sensitive_data_filter():
# Create a test logger
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
# Create a filter
sensitive_filter = SensitiveDataFilter()
# Test cases
test_cases = [
{
"input": '{"vertex_credentials": {"project_id": "test-project", "location": "us-central1", "private_key": "test-private-key"}}',
"expected": '{"vertex_credentials": {"project_id": "test-project", "location": "us-central1", "private_key": "REDACTED"}}',
},
{
"input": '{"api_key": "sk-1234567890"}',
"expected": '{"api_key": "REDACTED"}',
},
{
"input": '{"openai_api_key": "sk-1234567890"}',
"expected": '{"openai_api_key": "REDACTED"}',
},
{"input": '{"password": "secret123"}', "expected": '{"password": "REDACTED"}'},
{"input": '{"token": "abc123"}', "expected": '{"token": "REDACTED"}'},
{
"input": '{"api_base": "https://api.example.com"}',
"expected": '{"api_base": "REDACTED"}',
},
{
"input": '{"non_sensitive": "value", "credentials": "secret"}',
"expected": '{"non_sensitive": "value", "credentials": "REDACTED"}',
},
]
for test_case in test_cases:
# Create a log record
record = logging.LogRecord(
name="test_logger",
level=logging.INFO,
pathname="test.py",
lineno=1,
msg=test_case["input"],
args=(),
exc_info=None,
)
# Apply the filter
sensitive_filter.filter(record)
# Verify the output
assert (
record.msg == test_case["expected"]
), f"Failed for input: {test_case['input']}"
def test_sensitive_data_filter_with_different_formats():
# Create a test logger
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
# Create a filter
sensitive_filter = SensitiveDataFilter()
# Test different formats
test_cases = [
{"input": "api_key=sk-1234567890", "expected": "api_key=REDACTED"},
{
"input": "'credentials': 'secret123'",
"expected": "'credentials': 'REDACTED'",
},
{"input": "\"token\": 'abc123'", "expected": "\"token\": 'REDACTED'"},
]
for test_case in test_cases:
# Create a log record
record = logging.LogRecord(
name="test_logger",
level=logging.INFO,
pathname="test.py",
lineno=1,
msg=test_case["input"],
args=(),
exc_info=None,
)
# Apply the filter
sensitive_filter.filter(record)
# Verify the output
assert (
record.msg == test_case["expected"]
), f"Failed for input: {test_case['input']}"
def test_sensitive_data_filter_with_special_characters():
# Create a test logger
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
# Create a filter
sensitive_filter = SensitiveDataFilter()
# Test cases with special characters in keys
test_cases = [
{
"input": '{"api_key": "sk-1234567890"}',
"expected": '{"api_key": "REDACTED"}',
},
{
"input": '{"api-key": "sk-1234567890"}',
"expected": '{"api-key": "REDACTED"}',
},
{
"input": '{"api/key": "sk-1234567890"}',
"expected": '{"api/key": "REDACTED"}',
},
{
"input": '{"api\\key": "sk-1234567890"}',
"expected": '{"api\\key": "REDACTED"}',
},
]
for test_case in test_cases:
# Create a log record
record = logging.LogRecord(
name="test_logger",
level=logging.INFO,
pathname="test.py",
lineno=1,
msg=test_case["input"],
args=(),
exc_info=None,
)
# Apply the filter
sensitive_filter.filter(record)
# Verify the output
assert (
record.msg == test_case["expected"]
), f"Failed for input: {test_case['input']}"
def test_sensitive_data_filter_with_format_strings():
# Create a test logger
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
# Create a filter
sensitive_filter = SensitiveDataFilter()
# Test cases with format strings
test_cases = [
{
"input": "API key: %s",
"args": ("sk-1234567890",),
"expected": "API key: REDACTED",
},
{
"input": "Credentials: %s, Token: %s",
"args": ("secret123", "abc123"),
"expected": "Credentials: REDACTED, Token: REDACTED",
},
{
"input": "API base: %s, Key: %s",
"args": ("https://api.example.com", "sk-1234567890"),
"expected": "API base: REDACTED, Key: REDACTED",
},
]
for test_case in test_cases:
# Create a log record
record = logging.LogRecord(
name="test_logger",
level=logging.INFO,
pathname="test.py",
lineno=1,
msg=test_case["input"],
args=test_case["args"],
exc_info=None,
)
# Apply the filter
sensitive_filter.filter(record)
# Verify the output
assert (
record.msg == test_case["expected"]
), f"Failed for input: {test_case['input']} with args: {test_case['args']}"
def test_sensitive_data_filter_reliability():
# Create a test logger
logger = logging.getLogger("test_logger")
logger.setLevel(logging.DEBUG)
# Create a SensitiveDataFilter and break its regex pattern to cause failure
sensitive_filter = SensitiveDataFilter()
sensitive_filter.SENSITIVE_KEYS = [
")"
] # Invalid regex pattern that will cause failure
# Add the filter
logger.addFilter(sensitive_filter)
# Try to log a message - this should not raise an exception
try:
logger.debug("Test message with sensitive data: api_key=sk-1234567890")
except Exception as e:
pytest.fail(f"Logging failed with exception: {str(e)}")
# Clean up
logger.removeFilter(sensitive_filter)