Merge pull request #4669 from BerriAI/litellm_logging_only_masking

Flag for PII masking on Logging only
This commit is contained in:
Krish Dholakia 2024-07-11 22:03:37 -07:00 committed by GitHub
commit d72bcdbce3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 379 additions and 81 deletions

View file

@ -180,3 +180,59 @@ chat_completion = client.chat.completions.create(
"_response_ms": 1753.426 "_response_ms": 1753.426
} }
``` ```
## Turn on for logging only
Only apply PII Masking before logging to Langfuse, etc.
Not on the actual llm api request / response.
:::note
This is currently only applied for
- `/chat/completion` requests
- on 'success' logging
:::
1. Setup config.yaml
```yaml
litellm_settings:
presidio_logging_only: true
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
```bash
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "Hi, my name is Jane!"
}
]
}'
```
**Expected Logged Response**
```
Hi, my name is <PERSON>!
```

View file

@ -16,7 +16,7 @@ from litellm._logging import (
log_level, log_level,
) )
from litellm.types.guardrails import GuardrailItem
from litellm.proxy._types import ( from litellm.proxy._types import (
KeyManagementSystem, KeyManagementSystem,
KeyManagementSettings, KeyManagementSettings,
@ -124,6 +124,7 @@ llamaguard_unsafe_content_categories: Optional[str] = None
blocked_user_list: Optional[Union[str, List]] = None blocked_user_list: Optional[Union[str, List]] = None
banned_keywords_list: Optional[Union[str, List]] = None banned_keywords_list: Optional[Union[str, List]] = None
llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all" llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
guardrail_name_config_map: Optional[Dict[str, GuardrailItem]] = None
################## ##################
### PREVIEW FEATURES ### ### PREVIEW FEATURES ###
enable_preview_features: bool = False enable_preview_features: bool = False

View file

@ -2,7 +2,7 @@
# On success, logs events to Promptlayer # On success, logs events to Promptlayer
import os import os
import traceback import traceback
from typing import Literal, Optional, Union from typing import Any, Literal, Optional, Tuple, Union
import dotenv import dotenv
from pydantic import BaseModel from pydantic import BaseModel
@ -117,6 +117,18 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
): ):
pass pass
async def async_logging_hook(
self, kwargs: dict, result: Any, call_type: str
) -> Tuple[dict, Any]:
"""For masking logged request/response. Return a modified version of the request/result."""
return kwargs, result
def logging_hook(
self, kwargs: dict, result: Any, call_type: str
) -> Tuple[dict, Any]:
"""For masking logged request/response. Return a modified version of the request/result."""
return kwargs, result
async def async_moderation_hook( async def async_moderation_hook(
self, self,
data: dict, data: dict,

View file

@ -655,6 +655,16 @@ class Logging:
result=result, litellm_logging_obj=self result=result, litellm_logging_obj=self
) )
## LOGGING HOOK ##
for callback in callbacks:
if isinstance(callback, CustomLogger):
self.model_call_details["input"], result = callback.logging_hook(
kwargs=self.model_call_details,
result=result,
call_type=self.call_type,
)
for callback in callbacks: for callback in callbacks:
try: try:
litellm_params = self.model_call_details.get("litellm_params", {}) litellm_params = self.model_call_details.get("litellm_params", {})
@ -1302,6 +1312,18 @@ class Logging:
result=result, litellm_logging_obj=self result=result, litellm_logging_obj=self
) )
## LOGGING HOOK ##
for callback in callbacks:
if isinstance(callback, CustomLogger):
self.model_call_details["input"], result = (
await callback.async_logging_hook(
kwargs=self.model_call_details,
result=result,
call_type=self.call_type,
)
)
for callback in callbacks: for callback in callbacks:
# check if callback can run for this request # check if callback can run for this request
litellm_params = self.model_call_details.get("litellm_params", {}) litellm_params = self.model_call_details.get("litellm_params", {})

View file

@ -46,7 +46,17 @@ def initialize_callbacks_on_proxy(
_OPTIONAL_PresidioPIIMasking, _OPTIONAL_PresidioPIIMasking,
) )
pii_masking_object = _OPTIONAL_PresidioPIIMasking() presidio_logging_only: Optional[bool] = litellm_settings.get(
"presidio_logging_only", None
)
if presidio_logging_only is not None:
presidio_logging_only = bool(
presidio_logging_only
) # validate boolean given
pii_masking_object = _OPTIONAL_PresidioPIIMasking(
logging_only=presidio_logging_only
)
imported_list.append(pii_masking_object) imported_list.append(pii_masking_object)
elif isinstance(callback, str) and callback == "llamaguard_moderations": elif isinstance(callback, str) and callback == "llamaguard_moderations":
from enterprise.enterprise_hooks.llama_guard import ( from enterprise.enterprise_hooks.llama_guard import (

View file

@ -18,7 +18,7 @@ def initialize_guardrails(
premium_user: bool, premium_user: bool,
config_file_path: str, config_file_path: str,
litellm_settings: dict, litellm_settings: dict,
): ) -> Dict[str, GuardrailItem]:
try: try:
verbose_proxy_logger.debug(f"validating guardrails passed {guardrails_config}") verbose_proxy_logger.debug(f"validating guardrails passed {guardrails_config}")
global all_guardrails global all_guardrails
@ -55,7 +55,11 @@ def initialize_guardrails(
litellm_settings=litellm_settings, litellm_settings=litellm_settings,
) )
return guardrail_name_config_map
except Exception as e: except Exception as e:
verbose_proxy_logger.error(f"error initializing guardrails {str(e)}") verbose_proxy_logger.error(
traceback.print_exc() "error initializing guardrails {}\n{}".format(
str(e), traceback.format_exc()
)
)
raise e raise e

View file

@ -12,7 +12,7 @@ import asyncio
import json import json
import traceback import traceback
import uuid import uuid
from typing import Optional, Union from typing import Any, List, Optional, Tuple, Union
import aiohttp import aiohttp
from fastapi import HTTPException from fastapi import HTTPException
@ -27,6 +27,7 @@ from litellm.utils import (
ImageResponse, ImageResponse,
ModelResponse, ModelResponse,
StreamingChoices, StreamingChoices,
get_formatted_prompt,
) )
@ -36,14 +37,18 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
# Class variables or attributes # Class variables or attributes
def __init__( def __init__(
self, mock_testing: bool = False, mock_redacted_text: Optional[dict] = None self,
logging_only: Optional[bool] = None,
mock_testing: bool = False,
mock_redacted_text: Optional[dict] = None,
): ):
self.pii_tokens: dict = ( self.pii_tokens: dict = (
{} {}
) # mapping of PII token to original text - only used with Presidio `replace` operation ) # mapping of PII token to original text - only used with Presidio `replace` operation
self.mock_redacted_text = mock_redacted_text self.mock_redacted_text = mock_redacted_text
if mock_testing == True: # for testing purposes only self.logging_only = logging_only
if mock_testing is True: # for testing purposes only
return return
ad_hoc_recognizers = litellm.presidio_ad_hoc_recognizers ad_hoc_recognizers = litellm.presidio_ad_hoc_recognizers
@ -188,6 +193,10 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
For multiple messages in /chat/completions, we'll need to call them in parallel. For multiple messages in /chat/completions, we'll need to call them in parallel.
""" """
try: try:
if (
self.logging_only is True
): # only modify the logging obj data (done by async_logging_hook)
return data
permissions = user_api_key_dict.permissions permissions = user_api_key_dict.permissions
output_parse_pii = permissions.get( output_parse_pii = permissions.get(
"output_parse_pii", litellm.output_parse_pii "output_parse_pii", litellm.output_parse_pii
@ -244,7 +253,7 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
}, },
) )
if no_pii == True: # turn off pii masking if no_pii is True: # turn off pii masking
return data return data
if call_type == "completion": # /chat/completions requests if call_type == "completion": # /chat/completions requests
@ -274,6 +283,43 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
) )
raise e raise e
async def async_logging_hook(
self, kwargs: dict, result: Any, call_type: str
) -> Tuple[dict, Any]:
"""
Masks the input before logging to langfuse, datadog, etc.
"""
if (
call_type == "completion" or call_type == "acompletion"
): # /chat/completions requests
messages: Optional[List] = kwargs.get("messages", None)
tasks = []
if messages is None:
return kwargs, result
for m in messages:
text_str = ""
if m["content"] is None:
continue
if isinstance(m["content"], str):
text_str = m["content"]
tasks.append(
self.check_pii(text=text_str, output_parse_pii=False)
) # need to pass separately b/c presidio has context window limits
responses = await asyncio.gather(*tasks)
for index, r in enumerate(responses):
if isinstance(messages[index]["content"], str):
messages[index][
"content"
] = r # replace content with redacted string
verbose_proxy_logger.info(
f"Presidio PII Masking: Redacted pii message: {messages}"
)
kwargs["messages"] = messages
return kwargs, responses
async def async_post_call_success_hook( async def async_post_call_success_hook(
self, self,
user_api_key_dict: UserAPIKeyAuth, user_api_key_dict: UserAPIKeyAuth,

View file

@ -1469,12 +1469,14 @@ class ProxyConfig:
+ CommonProxyErrors.not_premium_user.value + CommonProxyErrors.not_premium_user.value
) )
initialize_guardrails( guardrail_name_config_map = initialize_guardrails(
guardrails_config=value, guardrails_config=value,
premium_user=premium_user, premium_user=premium_user,
config_file_path=config_file_path, config_file_path=config_file_path,
litellm_settings=litellm_settings, litellm_settings=litellm_settings,
) )
litellm.guardrail_name_config_map = guardrail_name_config_map
elif key == "callbacks": elif key == "callbacks":
initialize_callbacks_on_proxy( initialize_callbacks_on_proxy(

View file

@ -220,6 +220,9 @@ async def test_aarun_thread_litellm(sync_mode, provider, is_streaming):
- Create thread - Create thread
- Create run w/ Assistants + Thread - Create run w/ Assistants + Thread
""" """
import openai
try:
if sync_mode: if sync_mode:
assistants = litellm.get_assistants(custom_llm_provider=provider) assistants = litellm.get_assistants(custom_llm_provider=provider)
else: else:
@ -279,7 +282,7 @@ async def test_aarun_thread_litellm(sync_mode, provider, is_streaming):
AsyncAssistantEventHandler, AsyncAssistantEventHandler,
) )
print(run) print(run)
run.until_done() await run.until_done()
else: else:
run = await litellm.arun_thread( run = await litellm.arun_thread(
custom_llm_provider=provider, custom_llm_provider=provider,
@ -298,3 +301,5 @@ async def test_aarun_thread_litellm(sync_mode, provider, is_streaming):
run run
) )
) )
except openai.APIError as e:
pass

View file

@ -0,0 +1,73 @@
# What is this?
## Unit Tests for guardrails config
import asyncio
import inspect
import os
import sys
import time
import traceback
import uuid
from datetime import datetime
import pytest
from pydantic import BaseModel
import litellm.litellm_core_utils
import litellm.litellm_core_utils.litellm_logging
sys.path.insert(0, os.path.abspath("../.."))
from typing import Any, List, Literal, Optional, Tuple, Union
from unittest.mock import AsyncMock, MagicMock, patch
import litellm
from litellm import Cache, completion, embedding
from litellm.integrations.custom_logger import CustomLogger
from litellm.types.utils import LiteLLMCommonStrings
class CustomLoggingIntegration(CustomLogger):
def __init__(self) -> None:
super().__init__()
def logging_hook(
self, kwargs: dict, result: Any, call_type: str
) -> Tuple[dict, Any]:
input: Optional[Any] = kwargs.get("input", None)
messages: Optional[List] = kwargs.get("messages", None)
if call_type == "completion":
# assume input is of type messages
if input is not None and isinstance(input, list):
input[0]["content"] = "Hey, my name is [NAME]."
if messages is not None and isinstance(messages, List):
messages[0]["content"] = "Hey, my name is [NAME]."
kwargs["input"] = input
kwargs["messages"] = messages
return kwargs, result
def test_guardrail_masking_logging_only():
"""
Assert response is unmasked.
Assert logged response is masked.
"""
callback = CustomLoggingIntegration()
with patch.object(callback, "log_success_event", new=MagicMock()) as mock_call:
litellm.callbacks = [callback]
messages = [{"role": "user", "content": "Hey, my name is Peter."}]
response = completion(
model="gpt-3.5-turbo", messages=messages, mock_response="Hi Peter!"
)
assert response.choices[0].message.content == "Hi Peter!" # type: ignore
mock_call.assert_called_once()
print(mock_call.call_args.kwargs["kwargs"]["messages"][0]["content"])
assert (
mock_call.call_args.kwargs["kwargs"]["messages"][0]["content"]
== "Hey, my name is [NAME]."
)

View file

@ -16,6 +16,8 @@ import os
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
from unittest.mock import AsyncMock, MagicMock, patch
import pytest import pytest
import litellm import litellm
@ -196,3 +198,68 @@ async def test_presidio_pii_masking_input_b():
assert "<PERSON>" in new_data["messages"][0]["content"] assert "<PERSON>" in new_data["messages"][0]["content"]
assert "<PHONE_NUMBER>" not in new_data["messages"][0]["content"] assert "<PHONE_NUMBER>" not in new_data["messages"][0]["content"]
@pytest.mark.asyncio
async def test_presidio_pii_masking_logging_output_only_no_pre_api_hook():
pii_masking = _OPTIONAL_PresidioPIIMasking(
logging_only=True,
mock_testing=True,
mock_redacted_text=input_b_anonymizer_results,
)
_api_key = "sk-12345"
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
local_cache = DualCache()
test_messages = [
{
"role": "user",
"content": "My name is Jane Doe, who are you? Say my name in your response",
}
]
new_data = await pii_masking.async_pre_call_hook(
user_api_key_dict=user_api_key_dict,
cache=local_cache,
data={"messages": test_messages},
call_type="completion",
)
assert "Jane Doe" in new_data["messages"][0]["content"]
@pytest.mark.asyncio
async def test_presidio_pii_masking_logging_output_only_logged_response():
pii_masking = _OPTIONAL_PresidioPIIMasking(
logging_only=True,
mock_testing=True,
mock_redacted_text=input_b_anonymizer_results,
)
test_messages = [
{
"role": "user",
"content": "My name is Jane Doe, who are you? Say my name in your response",
}
]
with patch.object(
pii_masking, "async_log_success_event", new=AsyncMock()
) as mock_call:
litellm.callbacks = [pii_masking]
response = await litellm.acompletion(
model="gpt-3.5-turbo", messages=test_messages, mock_response="Hi Peter!"
)
await asyncio.sleep(3)
assert response.choices[0].message.content == "Hi Peter!" # type: ignore
mock_call.assert_called_once()
print(mock_call.call_args.kwargs["kwargs"]["messages"][0]["content"])
assert (
mock_call.call_args.kwargs["kwargs"]["messages"][0]["content"]
== "My name is <PERSON>, who are you? Say my name in your response"
)