fix(_logging.py): add sensitive data filter to logging

Fixes https://github.com/BerriAI/litellm/issues/7603

Also addresses https://github.com/BerriAI/litellm/issues/9815#issuecomment-2806844725
This commit is contained in:
Krrish Dholakia 2025-04-16 17:27:35 -07:00
parent f661dd7776
commit d2ad3bbfc0
3 changed files with 162 additions and 1 deletions

View file

@ -1,6 +1,7 @@
import json import json
import logging import logging
import os import os
import re
import sys import sys
from datetime import datetime from datetime import datetime
from logging import Formatter from logging import Formatter
@ -41,6 +42,54 @@ class JsonFormatter(Formatter):
return json.dumps(json_record) return json.dumps(json_record)
class SensitiveDataFilter(logging.Filter):
"""Filter to redact sensitive information from logs"""
SENSITIVE_KEYS = [
"credentials",
"api_key",
"key",
"api_base",
"password",
"secret",
"token",
]
def filter(self, record):
if not hasattr(record, "msg") or not record.msg:
return True
# Convert message to string if it's not already
msg = str(record.msg)
key_pattern = r'["\']?([^"\':\s]+)["\']?\s*[:=]'
keys = re.findall(key_pattern, msg)
# Redact sensitive information
for key in keys:
# Check if any sensitive key is a substring of the current key
if any(
sensitive_key in key.lower() for sensitive_key in self.SENSITIVE_KEYS
):
# Handle JSON-like strings
pattern = f'"{key}":\\s*"[^"]*"'
msg = re.sub(pattern, f'"{key}": "REDACTED"', msg)
# Handle key-value pairs in plain text
pattern = f"{key}\\s*=\\s*[^\\s,}}]+"
msg = re.sub(pattern, f"{key}=REDACTED", msg)
# Handle dictionary-like strings
pattern = f"'{key}':\\s*'[^']*'"
msg = re.sub(pattern, f"'{key}': 'REDACTED'", msg)
pattern = f"\"{key}\":\\s*'[^']*'"
msg = re.sub(pattern, f"\"{key}\": 'REDACTED'", msg)
record.msg = msg
return True
# Function to set up exception handlers for JSON logging # Function to set up exception handlers for JSON logging
def _setup_json_exception_handlers(formatter): def _setup_json_exception_handlers(formatter):
# Create a handler with JSON formatting for exceptions # Create a handler with JSON formatting for exceptions
@ -103,6 +152,12 @@ verbose_proxy_logger = logging.getLogger("LiteLLM Proxy")
verbose_router_logger = logging.getLogger("LiteLLM Router") verbose_router_logger = logging.getLogger("LiteLLM Router")
verbose_logger = logging.getLogger("LiteLLM") verbose_logger = logging.getLogger("LiteLLM")
# Add the sensitive data filter to all loggers
sensitive_filter = SensitiveDataFilter()
verbose_router_logger.addFilter(sensitive_filter)
verbose_proxy_logger.addFilter(sensitive_filter)
verbose_logger.addFilter(sensitive_filter)
# Add the handler to the logger # Add the handler to the logger
verbose_router_logger.addHandler(handler) verbose_router_logger.addHandler(handler)
verbose_proxy_logger.addHandler(handler) verbose_proxy_logger.addHandler(handler)

View file

@ -28,13 +28,20 @@ model_list:
api_base: https://krris-m2f9a9i7-eastus2.openai.azure.com/ api_base: https://krris-m2f9a9i7-eastus2.openai.azure.com/
model_info: model_info:
base_model: azure/gpt-4o-realtime-preview-2024-10-01 base_model: azure/gpt-4o-realtime-preview-2024-10-01
- model_name: "vertex_ai/gemini-1.5-pro-001"
litellm_params:
model: vertex_ai/gemini-1.5-pro-001
vertex_credentials: {"project_id": "krris-m2f9a9i7", "location": "us-central1"}
api_base: https://us-central1-aiplatform.googleapis.com/v1
litellm_settings: litellm_settings:
num_retries: 0 num_retries: 0
callbacks: ["prometheus"] callbacks: ["prometheus"]
check_provider_endpoint: true check_provider_endpoint: true
router_settings:
routing_strategy: "usage-based-routing-v2"
files_settings: files_settings:
- custom_llm_provider: gemini - custom_llm_provider: gemini
api_key: os.environ/GEMINI_API_KEY api_key: os.environ/GEMINI_API_KEY

View file

@ -0,0 +1,99 @@
import logging
import pytest
from litellm._logging import SensitiveDataFilter
def test_sensitive_data_filter():
# Create a test logger
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
# Create a filter
sensitive_filter = SensitiveDataFilter()
# Test cases
test_cases = [
{
"input": '{"vertex_credentials": {"project_id": "test-project", "location": "us-central1", "private_key": "test-private-key"}}',
"expected": '{"vertex_credentials": {"project_id": "test-project", "location": "us-central1", "private_key": "REDACTED"}}',
},
{
"input": '{"api_key": "sk-1234567890"}',
"expected": '{"api_key": "REDACTED"}',
},
{
"input": '{"openai_api_key": "sk-1234567890"}',
"expected": '{"openai_api_key": "REDACTED"}',
},
{"input": '{"password": "secret123"}', "expected": '{"password": "REDACTED"}'},
{"input": '{"token": "abc123"}', "expected": '{"token": "REDACTED"}'},
{
"input": '{"api_base": "https://api.example.com"}',
"expected": '{"api_base": "REDACTED"}',
},
{
"input": '{"non_sensitive": "value", "credentials": "secret"}',
"expected": '{"non_sensitive": "value", "credentials": "REDACTED"}',
},
]
for test_case in test_cases:
# Create a log record
record = logging.LogRecord(
name="test_logger",
level=logging.INFO,
pathname="test.py",
lineno=1,
msg=test_case["input"],
args=(),
exc_info=None,
)
# Apply the filter
sensitive_filter.filter(record)
# Verify the output
assert (
record.msg == test_case["expected"]
), f"Failed for input: {test_case['input']}"
def test_sensitive_data_filter_with_different_formats():
# Create a test logger
logger = logging.getLogger("test_logger")
logger.setLevel(logging.INFO)
# Create a filter
sensitive_filter = SensitiveDataFilter()
# Test different formats
test_cases = [
{"input": "api_key=sk-1234567890", "expected": "api_key=REDACTED"},
{
"input": "'credentials': 'secret123'",
"expected": "'credentials': 'REDACTED'",
},
{"input": "\"token\": 'abc123'", "expected": "\"token\": 'REDACTED'"},
]
for test_case in test_cases:
# Create a log record
record = logging.LogRecord(
name="test_logger",
level=logging.INFO,
pathname="test.py",
lineno=1,
msg=test_case["input"],
args=(),
exc_info=None,
)
# Apply the filter
sensitive_filter.filter(record)
# Verify the output
assert (
record.msg == test_case["expected"]
), f"Failed for input: {test_case['input']}"