From d2ad3bbfc0894e50f416bd55815b657f0fc3d56d Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 16 Apr 2025 17:27:35 -0700 Subject: [PATCH] fix(_logging.py): add sensitive data filter to logging Fixes https://github.com/BerriAI/litellm/issues/7603 Also addresses https://github.com/BerriAI/litellm/issues/9815#issuecomment-2806844725 --- litellm/_logging.py | 55 +++++++++++++++ litellm/proxy/_new_secret_config.yaml | 9 ++- tests/litellm/test_logging.py | 99 +++++++++++++++++++++++++++ 3 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 tests/litellm/test_logging.py diff --git a/litellm/_logging.py b/litellm/_logging.py index d7e2c9e778..9af4a53ad8 100644 --- a/litellm/_logging.py +++ b/litellm/_logging.py @@ -1,6 +1,7 @@ import json import logging import os +import re import sys from datetime import datetime from logging import Formatter @@ -41,6 +42,54 @@ class JsonFormatter(Formatter): return json.dumps(json_record) +class SensitiveDataFilter(logging.Filter): + """Filter to redact sensitive information from logs""" + + SENSITIVE_KEYS = [ + "credentials", + "api_key", + "key", + "api_base", + "password", + "secret", + "token", + ] + + def filter(self, record): + if not hasattr(record, "msg") or not record.msg: + return True + + # Convert message to string if it's not already + msg = str(record.msg) + + key_pattern = r'["\']?([^"\':\s]+)["\']?\s*[:=]' + keys = re.findall(key_pattern, msg) + + # Redact sensitive information + for key in keys: + # Check if any sensitive key is a substring of the current key + if any( + sensitive_key in key.lower() for sensitive_key in self.SENSITIVE_KEYS + ): + # Handle JSON-like strings + pattern = f'"{key}":\\s*"[^"]*"' + msg = re.sub(pattern, f'"{key}": "REDACTED"', msg) + + # Handle key-value pairs in plain text + pattern = f"{key}\\s*=\\s*[^\\s,}}]+" + msg = re.sub(pattern, f"{key}=REDACTED", msg) + + # Handle dictionary-like strings + pattern = f"'{key}':\\s*'[^']*'" + msg = re.sub(pattern, f"'{key}': 'REDACTED'", msg) + + pattern = f"\"{key}\":\\s*'[^']*'" + msg = re.sub(pattern, f"\"{key}\": 'REDACTED'", msg) + + record.msg = msg + return True + + # Function to set up exception handlers for JSON logging def _setup_json_exception_handlers(formatter): # Create a handler with JSON formatting for exceptions @@ -103,6 +152,12 @@ verbose_proxy_logger = logging.getLogger("LiteLLM Proxy") verbose_router_logger = logging.getLogger("LiteLLM Router") verbose_logger = logging.getLogger("LiteLLM") +# Add the sensitive data filter to all loggers +sensitive_filter = SensitiveDataFilter() +verbose_router_logger.addFilter(sensitive_filter) +verbose_proxy_logger.addFilter(sensitive_filter) +verbose_logger.addFilter(sensitive_filter) + # Add the handler to the logger verbose_router_logger.addHandler(handler) verbose_proxy_logger.addHandler(handler) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index e166133cec..f1af784e5e 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -28,13 +28,20 @@ model_list: api_base: https://krris-m2f9a9i7-eastus2.openai.azure.com/ model_info: base_model: azure/gpt-4o-realtime-preview-2024-10-01 - + - model_name: "vertex_ai/gemini-1.5-pro-001" + litellm_params: + model: vertex_ai/gemini-1.5-pro-001 + vertex_credentials: {"project_id": "krris-m2f9a9i7", "location": "us-central1"} + api_base: https://us-central1-aiplatform.googleapis.com/v1 litellm_settings: num_retries: 0 callbacks: ["prometheus"] check_provider_endpoint: true +router_settings: + routing_strategy: "usage-based-routing-v2" + files_settings: - custom_llm_provider: gemini api_key: os.environ/GEMINI_API_KEY diff --git a/tests/litellm/test_logging.py b/tests/litellm/test_logging.py new file mode 100644 index 0000000000..5e8eb1d10d --- /dev/null +++ b/tests/litellm/test_logging.py @@ -0,0 +1,99 @@ +import logging + +import pytest + +from litellm._logging import SensitiveDataFilter + + +def test_sensitive_data_filter(): + # Create a test logger + logger = logging.getLogger("test_logger") + logger.setLevel(logging.INFO) + + # Create a filter + sensitive_filter = SensitiveDataFilter() + + # Test cases + test_cases = [ + { + "input": '{"vertex_credentials": {"project_id": "test-project", "location": "us-central1", "private_key": "test-private-key"}}', + "expected": '{"vertex_credentials": {"project_id": "test-project", "location": "us-central1", "private_key": "REDACTED"}}', + }, + { + "input": '{"api_key": "sk-1234567890"}', + "expected": '{"api_key": "REDACTED"}', + }, + { + "input": '{"openai_api_key": "sk-1234567890"}', + "expected": '{"openai_api_key": "REDACTED"}', + }, + {"input": '{"password": "secret123"}', "expected": '{"password": "REDACTED"}'}, + {"input": '{"token": "abc123"}', "expected": '{"token": "REDACTED"}'}, + { + "input": '{"api_base": "https://api.example.com"}', + "expected": '{"api_base": "REDACTED"}', + }, + { + "input": '{"non_sensitive": "value", "credentials": "secret"}', + "expected": '{"non_sensitive": "value", "credentials": "REDACTED"}', + }, + ] + + for test_case in test_cases: + # Create a log record + record = logging.LogRecord( + name="test_logger", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg=test_case["input"], + args=(), + exc_info=None, + ) + + # Apply the filter + sensitive_filter.filter(record) + + # Verify the output + assert ( + record.msg == test_case["expected"] + ), f"Failed for input: {test_case['input']}" + + +def test_sensitive_data_filter_with_different_formats(): + # Create a test logger + logger = logging.getLogger("test_logger") + logger.setLevel(logging.INFO) + + # Create a filter + sensitive_filter = SensitiveDataFilter() + + # Test different formats + test_cases = [ + {"input": "api_key=sk-1234567890", "expected": "api_key=REDACTED"}, + { + "input": "'credentials': 'secret123'", + "expected": "'credentials': 'REDACTED'", + }, + {"input": "\"token\": 'abc123'", "expected": "\"token\": 'REDACTED'"}, + ] + + for test_case in test_cases: + # Create a log record + record = logging.LogRecord( + name="test_logger", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg=test_case["input"], + args=(), + exc_info=None, + ) + + # Apply the filter + sensitive_filter.filter(record) + + # Verify the output + assert ( + record.msg == test_case["expected"] + ), f"Failed for input: {test_case['input']}"