forked from phoenix/litellm-mirror
Merge pull request #5514 from BerriAI/litellm_add_presidio
[Fix-Refactor] support presidio on new guardrails config
This commit is contained in:
commit
4b9163c7dc
9 changed files with 778 additions and 13 deletions
338
docs/my-website/docs/proxy/guardrails/pii_masking_v2.md
Normal file
338
docs/my-website/docs/proxy/guardrails/pii_masking_v2.md
Normal file
|
@ -0,0 +1,338 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# PII Masking - Presidio
|
||||
|
||||
## Quick Start
|
||||
|
||||
LiteLLM supports [Microsoft Presidio](https://github.com/microsoft/presidio/) for PII masking.
|
||||
|
||||
### 1. Define Guardrails on your LiteLLM config.yaml
|
||||
|
||||
Define your guardrails under the `guardrails` section
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: openai/gpt-3.5-turbo
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
guardrails:
|
||||
- guardrail_name: "presidio-pre-guard"
|
||||
litellm_params:
|
||||
guardrail: presidio # supported values: "aporia", "bedrock", "lakera", "presidio"
|
||||
mode: "pre_call"
|
||||
```
|
||||
|
||||
Set the following env vars
|
||||
|
||||
```bash
|
||||
export PRESIDIO_ANALYZER_API_BASE="http://localhost:5002"
|
||||
export PRESIDIO_ANONYMIZER_API_BASE="http://localhost:5001"
|
||||
```
|
||||
|
||||
#### Supported values for `mode`
|
||||
|
||||
- `pre_call` Run **before** LLM call, on **input**
|
||||
- `post_call` Run **after** LLM call, on **input & output**
|
||||
- `logging_only` Run **after** LLM call, only apply PII Masking before logging to Langfuse, etc. Not on the actual llm api request / response.
|
||||
|
||||
|
||||
### 2. Start LiteLLM Gateway
|
||||
|
||||
|
||||
```shell
|
||||
litellm --config config.yaml --detailed_debug
|
||||
```
|
||||
|
||||
### 3. Test request
|
||||
|
||||
**[Langchain, OpenAI SDK Usage Examples](../proxy/user_keys#request-format)**
|
||||
|
||||
<Tabs>
|
||||
<TabItem label="Masked PII call" value = "not-allowed">
|
||||
|
||||
Expect this to mask `Jane Doe` since it's PII
|
||||
|
||||
```shell
|
||||
curl http://localhost:4000/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello my name is Jane Doe"}
|
||||
],
|
||||
"guardrails": ["presidio-pre-guard"],
|
||||
}'
|
||||
```
|
||||
|
||||
Expected response on failure
|
||||
|
||||
```shell
|
||||
{
|
||||
"id": "chatcmpl-A3qSC39K7imjGbZ8xCDacGJZBoTJQ",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"content": "Hello, <PERSON>! How can I assist you today?",
|
||||
"role": "assistant",
|
||||
"tool_calls": null,
|
||||
"function_call": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1725479980,
|
||||
"model": "gpt-3.5-turbo-2024-07-18",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "fp_5bd87c427a",
|
||||
"usage": {
|
||||
"completion_tokens": 13,
|
||||
"prompt_tokens": 14,
|
||||
"total_tokens": 27
|
||||
},
|
||||
"service_tier": null
|
||||
}
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem label="No PII Call " value = "allowed">
|
||||
|
||||
```shell
|
||||
curl http://localhost:4000/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello good morning"}
|
||||
],
|
||||
"guardrails": ["presidio-pre-guard"],
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
|
||||
</Tabs>
|
||||
|
||||
## Advanced
|
||||
|
||||
### Set `language` per request
|
||||
|
||||
The Presidio API [supports passing the `language` param](https://microsoft.github.io/presidio/api-docs/api-docs.html#tag/Analyzer/paths/~1analyze/post). Here is how to set the `language` per request
|
||||
|
||||
<Tabs>
|
||||
<TabItem label="curl" value = "curl">
|
||||
|
||||
```shell
|
||||
curl http://localhost:4000/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-d '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{"role": "user", "content": "is this credit card number 9283833 correct?"}
|
||||
],
|
||||
"guardrails": ["presidio-pre-guard"],
|
||||
"guardrail_config": {"language": "es"}
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
|
||||
<TabItem label="OpenAI Python SDK" value = "python">
|
||||
|
||||
```python
|
||||
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "this is a test request, write a short poem"
|
||||
}
|
||||
],
|
||||
extra_body={
|
||||
"metadata": {
|
||||
"guardrails": ["presidio-pre-guard"],
|
||||
"guardrail_config": {"language": "es"}
|
||||
}
|
||||
}
|
||||
)
|
||||
print(response)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
|
||||
### Output parsing
|
||||
|
||||
|
||||
LLM responses can sometimes contain the masked tokens.
|
||||
|
||||
For presidio 'replace' operations, LiteLLM can check the LLM response and replace the masked token with the user-submitted values.
|
||||
|
||||
Define your guardrails under the `guardrails` section
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: openai/gpt-3.5-turbo
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
guardrails:
|
||||
- guardrail_name: "presidio-pre-guard"
|
||||
litellm_params:
|
||||
guardrail: presidio # supported values: "aporia", "bedrock", "lakera", "presidio"
|
||||
mode: "pre_call"
|
||||
output_parse_pii: True
|
||||
```
|
||||
|
||||
**Expected Flow: **
|
||||
|
||||
1. User Input: "hello world, my name is Jane Doe. My number is: 034453334"
|
||||
|
||||
2. LLM Input: "hello world, my name is [PERSON]. My number is: [PHONE_NUMBER]"
|
||||
|
||||
3. LLM Response: "Hey [PERSON], nice to meet you!"
|
||||
|
||||
4. User Response: "Hey Jane Doe, nice to meet you!"
|
||||
|
||||
### Ad Hoc Recognizers
|
||||
|
||||
|
||||
Send ad-hoc recognizers to presidio `/analyze` by passing a json file to the proxy
|
||||
|
||||
[**Example** ad-hoc recognizer](../../../../litellm/proxy/hooks/example_presidio_ad_hoc_recognize)
|
||||
|
||||
#### Define ad-hoc recognizer on your LiteLLM config.yaml
|
||||
|
||||
Define your guardrails under the `guardrails` section
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: openai/gpt-3.5-turbo
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
guardrails:
|
||||
- guardrail_name: "presidio-pre-guard"
|
||||
litellm_params:
|
||||
guardrail: presidio # supported values: "aporia", "bedrock", "lakera", "presidio"
|
||||
mode: "pre_call"
|
||||
presidio_ad_hoc_recognizers: "./hooks/example_presidio_ad_hoc_recognizer.json"
|
||||
```
|
||||
|
||||
Set the following env vars
|
||||
|
||||
```bash
|
||||
export PRESIDIO_ANALYZER_API_BASE="http://localhost:5002"
|
||||
export PRESIDIO_ANONYMIZER_API_BASE="http://localhost:5001"
|
||||
```
|
||||
|
||||
|
||||
You can see this working, when you run the proxy:
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml --debug
|
||||
```
|
||||
|
||||
Make a chat completions request, example:
|
||||
|
||||
```
|
||||
{
|
||||
"model": "azure-gpt-3.5",
|
||||
"messages": [{"role": "user", "content": "John Smith AHV number is 756.3026.0705.92. Zip code: 1334023"}]
|
||||
}
|
||||
```
|
||||
|
||||
And search for any log starting with `Presidio PII Masking`, example:
|
||||
```
|
||||
Presidio PII Masking: Redacted pii message: <PERSON> AHV number is <AHV_NUMBER>. Zip code: <US_DRIVER_LICENSE>
|
||||
```
|
||||
|
||||
### Logging Only
|
||||
|
||||
|
||||
Only apply PII Masking before logging to Langfuse, etc.
|
||||
|
||||
Not on the actual llm api request / response.
|
||||
|
||||
:::note
|
||||
This is currently only applied for
|
||||
- `/chat/completion` requests
|
||||
- on 'success' logging
|
||||
|
||||
:::
|
||||
|
||||
1. Define mode: `logging_only` on your LiteLLM config.yaml
|
||||
|
||||
Define your guardrails under the `guardrails` section
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: openai/gpt-3.5-turbo
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
guardrails:
|
||||
- guardrail_name: "presidio-pre-guard"
|
||||
litellm_params:
|
||||
guardrail: presidio # supported values: "aporia", "bedrock", "lakera", "presidio"
|
||||
mode: "logging_only"
|
||||
```
|
||||
|
||||
Set the following env vars
|
||||
|
||||
```bash
|
||||
export PRESIDIO_ANALYZER_API_BASE="http://localhost:5002"
|
||||
export PRESIDIO_ANONYMIZER_API_BASE="http://localhost:5001"
|
||||
```
|
||||
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-D '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hi, my name is Jane!"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
|
||||
**Expected Logged Response**
|
||||
|
||||
```
|
||||
Hi, my name is <PERSON>!
|
||||
```
|
||||
|
||||
|
|
@ -1,6 +1,14 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# PII Masking
|
||||
# PII Masking - LiteLLM Gateway (Deprecated Version)
|
||||
|
||||
:::warning
|
||||
|
||||
This is deprecated, please use [our new Presidio pii masking integration](./guardrails/pii_masking_v2)
|
||||
|
||||
:::
|
||||
|
||||
LiteLLM supports [Microsoft Presidio](https://github.com/microsoft/presidio/) for PII masking.
|
||||
|
||||
|
|
|
@ -67,7 +67,15 @@ const sidebars = {
|
|||
{
|
||||
type: "category",
|
||||
label: "🛡️ [Beta] Guardrails",
|
||||
items: ["proxy/guardrails/quick_start", "proxy/guardrails/aporia_api", "proxy/guardrails/lakera_ai", "proxy/guardrails/bedrock", "proxy/guardrails/custom_guardrail", "prompt_injection"],
|
||||
items: [
|
||||
"proxy/guardrails/quick_start",
|
||||
"proxy/guardrails/aporia_api",
|
||||
"proxy/guardrails/lakera_ai",
|
||||
"proxy/guardrails/bedrock",
|
||||
"proxy/guardrails/pii_masking_v2",
|
||||
"proxy/guardrails/custom_guardrail",
|
||||
"prompt_injection"
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
|
@ -101,7 +109,6 @@ const sidebars = {
|
|||
"proxy/model_management",
|
||||
"proxy/health",
|
||||
"proxy/debugging",
|
||||
"proxy/pii_masking",
|
||||
"proxy/call_hooks",
|
||||
"proxy/rules",
|
||||
"proxy/cli",
|
||||
|
@ -291,6 +298,7 @@ const sidebars = {
|
|||
"data_security",
|
||||
"migration_policy",
|
||||
"contributing",
|
||||
"proxy/pii_masking",
|
||||
"rules",
|
||||
"proxy_server",
|
||||
{
|
||||
|
|
|
@ -25,6 +25,7 @@ from litellm import (
|
|||
)
|
||||
from litellm.caching import DualCache, InMemoryCache, S3Cache
|
||||
from litellm.cost_calculator import _select_model_name_for_cost_calc
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.redact_messages import (
|
||||
redact_message_input_output_from_logging,
|
||||
|
@ -1359,7 +1360,24 @@ class Logging:
|
|||
## LOGGING HOOK ##
|
||||
|
||||
for callback in callbacks:
|
||||
if isinstance(callback, CustomLogger):
|
||||
if isinstance(callback, CustomGuardrail):
|
||||
from litellm.types.guardrails import GuardrailEventHooks
|
||||
|
||||
if (
|
||||
callback.should_run_guardrail(
|
||||
data=self.model_call_details,
|
||||
event_type=GuardrailEventHooks.logging_only,
|
||||
)
|
||||
is not True
|
||||
):
|
||||
continue
|
||||
|
||||
self.model_call_details, result = await callback.async_logging_hook(
|
||||
kwargs=self.model_call_details,
|
||||
result=result,
|
||||
call_type=self.call_type,
|
||||
)
|
||||
elif isinstance(callback, CustomLogger):
|
||||
self.model_call_details, result = await callback.async_logging_hook(
|
||||
kwargs=self.model_call_details,
|
||||
result=result,
|
||||
|
|
352
litellm/proxy/guardrails/guardrail_hooks/presidio.py
Normal file
352
litellm/proxy/guardrails/guardrail_hooks/presidio.py
Normal file
|
@ -0,0 +1,352 @@
|
|||
# +-----------------------------------------------+
|
||||
# | |
|
||||
# | PII Masking |
|
||||
# | with Microsoft Presidio |
|
||||
# | https://github.com/BerriAI/litellm/issues/ |
|
||||
# +-----------------------------------------------+
|
||||
#
|
||||
# Tell us how we can improve! - Krrish & Ishaan
|
||||
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import Any, List, Optional, Tuple, Union
|
||||
|
||||
import aiohttp
|
||||
from fastapi import HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm # noqa: E401
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.llms.custom_httpx.http_handler import _get_async_httpx_client
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.utils import (
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
ModelResponse,
|
||||
StreamingChoices,
|
||||
get_formatted_prompt,
|
||||
)
|
||||
|
||||
|
||||
class PresidioPerRequestConfig(BaseModel):
|
||||
"""
|
||||
presdio params that can be controlled per request, api key
|
||||
"""
|
||||
|
||||
language: Optional[str] = None
|
||||
|
||||
|
||||
class _OPTIONAL_PresidioPIIMasking(CustomGuardrail):
|
||||
user_api_key_cache = None
|
||||
ad_hoc_recognizers = None
|
||||
|
||||
# Class variables or attributes
|
||||
def __init__(
|
||||
self,
|
||||
mock_testing: bool = False,
|
||||
mock_redacted_text: Optional[dict] = None,
|
||||
presidio_analyzer_api_base: Optional[str] = None,
|
||||
presidio_anonymizer_api_base: Optional[str] = None,
|
||||
output_parse_pii: Optional[bool] = False,
|
||||
presidio_ad_hoc_recognizers: Optional[str] = None,
|
||||
**kwargs,
|
||||
):
|
||||
self.pii_tokens: dict = (
|
||||
{}
|
||||
) # mapping of PII token to original text - only used with Presidio `replace` operation
|
||||
|
||||
self.mock_redacted_text = mock_redacted_text
|
||||
self.output_parse_pii = output_parse_pii or False
|
||||
if mock_testing is True: # for testing purposes only
|
||||
return
|
||||
|
||||
ad_hoc_recognizers = presidio_ad_hoc_recognizers
|
||||
if ad_hoc_recognizers is not None:
|
||||
try:
|
||||
with open(ad_hoc_recognizers, "r") as file:
|
||||
self.ad_hoc_recognizers = json.load(file)
|
||||
except FileNotFoundError:
|
||||
raise Exception(f"File not found. file_path={ad_hoc_recognizers}")
|
||||
except json.JSONDecodeError as e:
|
||||
raise Exception(
|
||||
f"Error decoding JSON file: {str(e)}, file_path={ad_hoc_recognizers}"
|
||||
)
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"An error occurred: {str(e)}, file_path={ad_hoc_recognizers}"
|
||||
)
|
||||
self.validate_environment(
|
||||
presidio_analyzer_api_base=presidio_analyzer_api_base,
|
||||
presidio_anonymizer_api_base=presidio_anonymizer_api_base,
|
||||
)
|
||||
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
presidio_analyzer_api_base: Optional[str] = None,
|
||||
presidio_anonymizer_api_base: Optional[str] = None,
|
||||
):
|
||||
self.presidio_analyzer_api_base: Optional[str] = (
|
||||
presidio_analyzer_api_base
|
||||
or litellm.get_secret("PRESIDIO_ANALYZER_API_BASE", None)
|
||||
)
|
||||
self.presidio_anonymizer_api_base: Optional[
|
||||
str
|
||||
] = presidio_anonymizer_api_base or litellm.get_secret(
|
||||
"PRESIDIO_ANONYMIZER_API_BASE", None
|
||||
) # type: ignore
|
||||
|
||||
if self.presidio_analyzer_api_base is None:
|
||||
raise Exception("Missing `PRESIDIO_ANALYZER_API_BASE` from environment")
|
||||
if not self.presidio_analyzer_api_base.endswith("/"):
|
||||
self.presidio_analyzer_api_base += "/"
|
||||
if not (
|
||||
self.presidio_analyzer_api_base.startswith("http://")
|
||||
or self.presidio_analyzer_api_base.startswith("https://")
|
||||
):
|
||||
# add http:// if unset, assume communicating over private network - e.g. render
|
||||
self.presidio_analyzer_api_base = (
|
||||
"http://" + self.presidio_analyzer_api_base
|
||||
)
|
||||
|
||||
if self.presidio_anonymizer_api_base is None:
|
||||
raise Exception("Missing `PRESIDIO_ANONYMIZER_API_BASE` from environment")
|
||||
if not self.presidio_anonymizer_api_base.endswith("/"):
|
||||
self.presidio_anonymizer_api_base += "/"
|
||||
if not (
|
||||
self.presidio_anonymizer_api_base.startswith("http://")
|
||||
or self.presidio_anonymizer_api_base.startswith("https://")
|
||||
):
|
||||
# add http:// if unset, assume communicating over private network - e.g. render
|
||||
self.presidio_anonymizer_api_base = (
|
||||
"http://" + self.presidio_anonymizer_api_base
|
||||
)
|
||||
|
||||
async def check_pii(
|
||||
self,
|
||||
text: str,
|
||||
output_parse_pii: bool,
|
||||
presidio_config: Optional[PresidioPerRequestConfig],
|
||||
) -> str:
|
||||
"""
|
||||
[TODO] make this more performant for high-throughput scenario
|
||||
"""
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
if self.mock_redacted_text is not None:
|
||||
redacted_text = self.mock_redacted_text
|
||||
else:
|
||||
# Make the first request to /analyze
|
||||
# Construct Request 1
|
||||
analyze_url = f"{self.presidio_analyzer_api_base}analyze"
|
||||
analyze_payload = {"text": text, "language": "en"}
|
||||
if presidio_config and presidio_config.language:
|
||||
analyze_payload["language"] = presidio_config.language
|
||||
if self.ad_hoc_recognizers is not None:
|
||||
analyze_payload["ad_hoc_recognizers"] = self.ad_hoc_recognizers
|
||||
# End of constructing Request 1
|
||||
|
||||
redacted_text = None
|
||||
verbose_proxy_logger.debug(
|
||||
"Making request to: %s with payload: %s",
|
||||
analyze_url,
|
||||
analyze_payload,
|
||||
)
|
||||
async with session.post(
|
||||
analyze_url, json=analyze_payload
|
||||
) as response:
|
||||
|
||||
analyze_results = await response.json()
|
||||
|
||||
# Make the second request to /anonymize
|
||||
anonymize_url = f"{self.presidio_anonymizer_api_base}anonymize"
|
||||
verbose_proxy_logger.debug("Making request to: %s", anonymize_url)
|
||||
anonymize_payload = {
|
||||
"text": text,
|
||||
"analyzer_results": analyze_results,
|
||||
}
|
||||
|
||||
async with session.post(
|
||||
anonymize_url, json=anonymize_payload
|
||||
) as response:
|
||||
redacted_text = await response.json()
|
||||
|
||||
new_text = text
|
||||
if redacted_text is not None:
|
||||
verbose_proxy_logger.debug("redacted_text: %s", redacted_text)
|
||||
for item in redacted_text["items"]:
|
||||
start = item["start"]
|
||||
end = item["end"]
|
||||
replacement = item["text"] # replacement token
|
||||
if item["operator"] == "replace" and output_parse_pii == True:
|
||||
# check if token in dict
|
||||
# if exists, add a uuid to the replacement token for swapping back to the original text in llm response output parsing
|
||||
if replacement in self.pii_tokens:
|
||||
replacement = replacement + str(uuid.uuid4())
|
||||
|
||||
self.pii_tokens[replacement] = new_text[
|
||||
start:end
|
||||
] # get text it'll replace
|
||||
|
||||
new_text = new_text[:start] + replacement + new_text[end:]
|
||||
return redacted_text["text"]
|
||||
else:
|
||||
raise Exception(f"Invalid anonymizer response: {redacted_text}")
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"litellm.proxy.hooks.presidio_pii_masking.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
raise e
|
||||
|
||||
async def async_pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
cache: DualCache,
|
||||
data: dict,
|
||||
call_type: str,
|
||||
):
|
||||
"""
|
||||
- Check if request turned off pii
|
||||
- Check if user allowed to turn off pii (key permissions -> 'allow_pii_controls')
|
||||
|
||||
- Take the request data
|
||||
- Call /analyze -> get the results
|
||||
- Call /anonymize w/ the analyze results -> get the redacted text
|
||||
|
||||
For multiple messages in /chat/completions, we'll need to call them in parallel.
|
||||
"""
|
||||
|
||||
try:
|
||||
|
||||
content_safety = data.get("content_safety", None)
|
||||
verbose_proxy_logger.debug("content_safety: %s", content_safety)
|
||||
presidio_config = self.get_presidio_settings_from_request_data(data)
|
||||
|
||||
if call_type == "completion": # /chat/completions requests
|
||||
messages = data["messages"]
|
||||
tasks = []
|
||||
|
||||
for m in messages:
|
||||
if isinstance(m["content"], str):
|
||||
tasks.append(
|
||||
self.check_pii(
|
||||
text=m["content"],
|
||||
output_parse_pii=self.output_parse_pii,
|
||||
presidio_config=presidio_config,
|
||||
)
|
||||
)
|
||||
responses = await asyncio.gather(*tasks)
|
||||
for index, r in enumerate(responses):
|
||||
if isinstance(messages[index]["content"], str):
|
||||
messages[index][
|
||||
"content"
|
||||
] = r # replace content with redacted string
|
||||
verbose_proxy_logger.info(
|
||||
f"Presidio PII Masking: Redacted pii message: {data['messages']}"
|
||||
)
|
||||
return data
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.info(
|
||||
f"An error occurred -",
|
||||
)
|
||||
raise e
|
||||
|
||||
async def async_logging_hook(
|
||||
self, kwargs: dict, result: Any, call_type: str
|
||||
) -> Tuple[dict, Any]:
|
||||
"""
|
||||
Masks the input before logging to langfuse, datadog, etc.
|
||||
"""
|
||||
if (
|
||||
call_type == "completion" or call_type == "acompletion"
|
||||
): # /chat/completions requests
|
||||
messages: Optional[List] = kwargs.get("messages", None)
|
||||
tasks = []
|
||||
|
||||
if messages is None:
|
||||
return kwargs, result
|
||||
|
||||
presidio_config = self.get_presidio_settings_from_request_data(kwargs)
|
||||
|
||||
for m in messages:
|
||||
text_str = ""
|
||||
if m["content"] is None:
|
||||
continue
|
||||
if isinstance(m["content"], str):
|
||||
text_str = m["content"]
|
||||
tasks.append(
|
||||
self.check_pii(
|
||||
text=text_str,
|
||||
output_parse_pii=False,
|
||||
presidio_config=presidio_config,
|
||||
)
|
||||
) # need to pass separately b/c presidio has context window limits
|
||||
responses = await asyncio.gather(*tasks)
|
||||
for index, r in enumerate(responses):
|
||||
if isinstance(messages[index]["content"], str):
|
||||
messages[index][
|
||||
"content"
|
||||
] = r # replace content with redacted string
|
||||
verbose_proxy_logger.info(
|
||||
f"Presidio PII Masking: Redacted pii message: {messages}"
|
||||
)
|
||||
kwargs["messages"] = messages
|
||||
|
||||
return kwargs, responses
|
||||
|
||||
async def async_post_call_success_hook(
|
||||
self,
|
||||
data: dict,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
response: Union[ModelResponse, EmbeddingResponse, ImageResponse],
|
||||
):
|
||||
"""
|
||||
Output parse the response object to replace the masked tokens with user sent values
|
||||
"""
|
||||
verbose_proxy_logger.debug(
|
||||
f"PII Masking Args: self.output_parse_pii={self.output_parse_pii}; type of response={type(response)}"
|
||||
)
|
||||
if self.output_parse_pii == False:
|
||||
return response
|
||||
|
||||
if isinstance(response, ModelResponse) and not isinstance(
|
||||
response.choices[0], StreamingChoices
|
||||
): # /chat/completions requests
|
||||
if isinstance(response.choices[0].message.content, str):
|
||||
verbose_proxy_logger.debug(
|
||||
f"self.pii_tokens: {self.pii_tokens}; initial response: {response.choices[0].message.content}"
|
||||
)
|
||||
for key, value in self.pii_tokens.items():
|
||||
response.choices[0].message.content = response.choices[
|
||||
0
|
||||
].message.content.replace(key, value)
|
||||
return response
|
||||
|
||||
def get_presidio_settings_from_request_data(
|
||||
self, data: dict
|
||||
) -> Optional[PresidioPerRequestConfig]:
|
||||
if "metadata" in data:
|
||||
_metadata = data["metadata"]
|
||||
_guardrail_config = _metadata.get("guardrail_config")
|
||||
if _guardrail_config:
|
||||
_presidio_config = PresidioPerRequestConfig(**_guardrail_config)
|
||||
return _presidio_config
|
||||
|
||||
return None
|
||||
|
||||
def print_verbose(self, print_statement):
|
||||
try:
|
||||
verbose_proxy_logger.debug(print_statement)
|
||||
if litellm.set_verbose:
|
||||
print(print_statement) # noqa
|
||||
except:
|
||||
pass
|
|
@ -11,6 +11,7 @@ from litellm.proxy.common_utils.callback_utils import initialize_callbacks_on_pr
|
|||
# v2 implementation
|
||||
from litellm.types.guardrails import (
|
||||
Guardrail,
|
||||
GuardrailEventHooks,
|
||||
GuardrailItem,
|
||||
GuardrailItemSpec,
|
||||
LakeraCategoryThresholds,
|
||||
|
@ -104,6 +105,10 @@ def init_guardrails_v2(
|
|||
api_base=litellm_params_data.get("api_base"),
|
||||
guardrailIdentifier=litellm_params_data.get("guardrailIdentifier"),
|
||||
guardrailVersion=litellm_params_data.get("guardrailVersion"),
|
||||
output_parse_pii=litellm_params_data.get("output_parse_pii"),
|
||||
presidio_ad_hoc_recognizers=litellm_params_data.get(
|
||||
"presidio_ad_hoc_recognizers"
|
||||
),
|
||||
)
|
||||
|
||||
if (
|
||||
|
@ -165,6 +170,33 @@ def init_guardrails_v2(
|
|||
category_thresholds=litellm_params.get("category_thresholds"),
|
||||
)
|
||||
litellm.callbacks.append(_lakera_callback) # type: ignore
|
||||
elif litellm_params["guardrail"] == "presidio":
|
||||
from litellm.proxy.guardrails.guardrail_hooks.presidio import (
|
||||
_OPTIONAL_PresidioPIIMasking,
|
||||
)
|
||||
|
||||
_presidio_callback = _OPTIONAL_PresidioPIIMasking(
|
||||
guardrail_name=guardrail["guardrail_name"],
|
||||
event_hook=litellm_params["mode"],
|
||||
output_parse_pii=litellm_params["output_parse_pii"],
|
||||
presidio_ad_hoc_recognizers=litellm_params[
|
||||
"presidio_ad_hoc_recognizers"
|
||||
],
|
||||
)
|
||||
|
||||
if litellm_params["output_parse_pii"] is True:
|
||||
_success_callback = _OPTIONAL_PresidioPIIMasking(
|
||||
output_parse_pii=True,
|
||||
guardrail_name=guardrail["guardrail_name"],
|
||||
event_hook=GuardrailEventHooks.post_call.value,
|
||||
presidio_ad_hoc_recognizers=litellm_params[
|
||||
"presidio_ad_hoc_recognizers"
|
||||
],
|
||||
)
|
||||
|
||||
litellm.callbacks.append(_success_callback) # type: ignore
|
||||
|
||||
litellm.callbacks.append(_presidio_callback) # type: ignore
|
||||
elif (
|
||||
isinstance(litellm_params["guardrail"], str)
|
||||
and "." in litellm_params["guardrail"]
|
||||
|
|
|
@ -420,6 +420,10 @@ def move_guardrails_to_metadata(
|
|||
data[_metadata_variable_name]["guardrails"] = data["guardrails"]
|
||||
del data["guardrails"]
|
||||
|
||||
if "guardrail_config" in data:
|
||||
data[_metadata_variable_name]["guardrail_config"] = data["guardrail_config"]
|
||||
del data["guardrail_config"]
|
||||
|
||||
|
||||
def add_provider_specific_headers_to_request(
|
||||
data: dict,
|
||||
|
|
|
@ -1,20 +1,20 @@
|
|||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
- model_name: openai/*
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
model: gpt-3.5-turbo
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: gpt-3.5-turbo-end-user-test
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||
api_version: "2023-05-15"
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["prometheus"]
|
||||
failure_callback: ["prometheus"]
|
||||
|
||||
guardrails:
|
||||
- guardrail_name: "presidio-pre-guard"
|
||||
litellm_params:
|
||||
guardrail: presidio # supported values: "aporia", "lakera", "presidio"
|
||||
mode: "pre_call" # pre_call, during_call, post_call
|
||||
output_parse_pii: True
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
||||
|
|
|
@ -84,6 +84,10 @@ class LitellmParams(TypedDict, total=False):
|
|||
guardrailIdentifier: Optional[str]
|
||||
guardrailVersion: Optional[str]
|
||||
|
||||
# Presidio params
|
||||
output_parse_pii: Optional[bool]
|
||||
presidio_ad_hoc_recognizers: Optional[str]
|
||||
|
||||
|
||||
class Guardrail(TypedDict):
|
||||
guardrail_name: str
|
||||
|
@ -98,6 +102,7 @@ class GuardrailEventHooks(str, Enum):
|
|||
pre_call = "pre_call"
|
||||
post_call = "post_call"
|
||||
during_call = "during_call"
|
||||
logging_only = "logging_only"
|
||||
|
||||
|
||||
class BedrockTextContent(TypedDict, total=False):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue