diff --git a/docs/my-website/docs/proxy/guardrails/pii_masking_v2.md b/docs/my-website/docs/proxy/guardrails/pii_masking_v2.md new file mode 100644 index 000000000..59690666e --- /dev/null +++ b/docs/my-website/docs/proxy/guardrails/pii_masking_v2.md @@ -0,0 +1,338 @@ +import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# PII Masking - Presidio + +## Quick Start + +LiteLLM supports [Microsoft Presidio](https://github.com/microsoft/presidio/) for PII masking. + +### 1. Define Guardrails on your LiteLLM config.yaml + +Define your guardrails under the `guardrails` section +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: openai/gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + +guardrails: + - guardrail_name: "presidio-pre-guard" + litellm_params: + guardrail: presidio # supported values: "aporia", "bedrock", "lakera", "presidio" + mode: "pre_call" +``` + +Set the following env vars + +```bash +export PRESIDIO_ANALYZER_API_BASE="http://localhost:5002" +export PRESIDIO_ANONYMIZER_API_BASE="http://localhost:5001" +``` + +#### Supported values for `mode` + +- `pre_call` Run **before** LLM call, on **input** +- `post_call` Run **after** LLM call, on **input & output** +- `logging_only` Run **after** LLM call, only apply PII Masking before logging to Langfuse, etc. Not on the actual llm api request / response. + + +### 2. Start LiteLLM Gateway + + +```shell +litellm --config config.yaml --detailed_debug +``` + +### 3. Test request + +**[Langchain, OpenAI SDK Usage Examples](../proxy/user_keys#request-format)** + + + + +Expect this to mask `Jane Doe` since it's PII + +```shell +curl http://localhost:4000/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-3.5-turbo", + "messages": [ + {"role": "user", "content": "Hello my name is Jane Doe"} + ], + "guardrails": ["presidio-pre-guard"], + }' +``` + +Expected response on failure + +```shell +{ + "id": "chatcmpl-A3qSC39K7imjGbZ8xCDacGJZBoTJQ", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": "Hello, ! How can I assist you today?", + "role": "assistant", + "tool_calls": null, + "function_call": null + } + } + ], + "created": 1725479980, + "model": "gpt-3.5-turbo-2024-07-18", + "object": "chat.completion", + "system_fingerprint": "fp_5bd87c427a", + "usage": { + "completion_tokens": 13, + "prompt_tokens": 14, + "total_tokens": 27 + }, + "service_tier": null +} +``` + + + + + +```shell +curl http://localhost:4000/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-3.5-turbo", + "messages": [ + {"role": "user", "content": "Hello good morning"} + ], + "guardrails": ["presidio-pre-guard"], + }' +``` + + + + + + +## Advanced + +### Set `language` per request + +The Presidio API [supports passing the `language` param](https://microsoft.github.io/presidio/api-docs/api-docs.html#tag/Analyzer/paths/~1analyze/post). Here is how to set the `language` per request + + + + +```shell +curl http://localhost:4000/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-3.5-turbo", + "messages": [ + {"role": "user", "content": "is this credit card number 9283833 correct?"} + ], + "guardrails": ["presidio-pre-guard"], + "guardrail_config": {"language": "es"} + }' +``` + + + + + + +```python + +import openai +client = openai.OpenAI( + api_key="anything", + base_url="http://0.0.0.0:4000" +) + +# request sent to model set on litellm proxy, `litellm --model` +response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages = [ + { + "role": "user", + "content": "this is a test request, write a short poem" + } + ], + extra_body={ + "metadata": { + "guardrails": ["presidio-pre-guard"], + "guardrail_config": {"language": "es"} + } + } +) +print(response) +``` + + + + + + +### Output parsing + + +LLM responses can sometimes contain the masked tokens. + +For presidio 'replace' operations, LiteLLM can check the LLM response and replace the masked token with the user-submitted values. + +Define your guardrails under the `guardrails` section +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: openai/gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + +guardrails: + - guardrail_name: "presidio-pre-guard" + litellm_params: + guardrail: presidio # supported values: "aporia", "bedrock", "lakera", "presidio" + mode: "pre_call" + output_parse_pii: True +``` + +**Expected Flow: ** + +1. User Input: "hello world, my name is Jane Doe. My number is: 034453334" + +2. LLM Input: "hello world, my name is [PERSON]. My number is: [PHONE_NUMBER]" + +3. LLM Response: "Hey [PERSON], nice to meet you!" + +4. User Response: "Hey Jane Doe, nice to meet you!" + +### Ad Hoc Recognizers + + +Send ad-hoc recognizers to presidio `/analyze` by passing a json file to the proxy + +[**Example** ad-hoc recognizer](../../../../litellm/proxy/hooks/example_presidio_ad_hoc_recognize) + +#### Define ad-hoc recognizer on your LiteLLM config.yaml + +Define your guardrails under the `guardrails` section +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: openai/gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + +guardrails: + - guardrail_name: "presidio-pre-guard" + litellm_params: + guardrail: presidio # supported values: "aporia", "bedrock", "lakera", "presidio" + mode: "pre_call" + presidio_ad_hoc_recognizers: "./hooks/example_presidio_ad_hoc_recognizer.json" +``` + +Set the following env vars + +```bash +export PRESIDIO_ANALYZER_API_BASE="http://localhost:5002" +export PRESIDIO_ANONYMIZER_API_BASE="http://localhost:5001" +``` + + +You can see this working, when you run the proxy: + +```bash +litellm --config /path/to/config.yaml --debug +``` + +Make a chat completions request, example: + +``` +{ + "model": "azure-gpt-3.5", + "messages": [{"role": "user", "content": "John Smith AHV number is 756.3026.0705.92. Zip code: 1334023"}] +} +``` + +And search for any log starting with `Presidio PII Masking`, example: +``` +Presidio PII Masking: Redacted pii message: AHV number is . Zip code: +``` + +### Logging Only + + +Only apply PII Masking before logging to Langfuse, etc. + +Not on the actual llm api request / response. + +:::note +This is currently only applied for +- `/chat/completion` requests +- on 'success' logging + +::: + +1. Define mode: `logging_only` on your LiteLLM config.yaml + +Define your guardrails under the `guardrails` section +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: openai/gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + +guardrails: + - guardrail_name: "presidio-pre-guard" + litellm_params: + guardrail: presidio # supported values: "aporia", "bedrock", "lakera", "presidio" + mode: "logging_only" +``` + +Set the following env vars + +```bash +export PRESIDIO_ANALYZER_API_BASE="http://localhost:5002" +export PRESIDIO_ANONYMIZER_API_BASE="http://localhost:5001" +``` + + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-D '{ + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": "Hi, my name is Jane!" + } + ] + }' +``` + + +**Expected Logged Response** + +``` +Hi, my name is ! +``` + + diff --git a/docs/my-website/docs/proxy/pii_masking.md b/docs/my-website/docs/proxy/pii_masking.md index 8106765f4..83e4965a4 100644 --- a/docs/my-website/docs/proxy/pii_masking.md +++ b/docs/my-website/docs/proxy/pii_masking.md @@ -1,6 +1,14 @@ import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; -# PII Masking +# PII Masking - LiteLLM Gateway (Deprecated Version) + +:::warning + +This is deprecated, please use [our new Presidio pii masking integration](./guardrails/pii_masking_v2) + +::: LiteLLM supports [Microsoft Presidio](https://github.com/microsoft/presidio/) for PII masking. diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 0abb5144f..f3780b84e 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -67,7 +67,15 @@ const sidebars = { { type: "category", label: "🛡️ [Beta] Guardrails", - items: ["proxy/guardrails/quick_start", "proxy/guardrails/aporia_api", "proxy/guardrails/lakera_ai", "proxy/guardrails/bedrock", "proxy/guardrails/custom_guardrail", "prompt_injection"], + items: [ + "proxy/guardrails/quick_start", + "proxy/guardrails/aporia_api", + "proxy/guardrails/lakera_ai", + "proxy/guardrails/bedrock", + "proxy/guardrails/pii_masking_v2", + "proxy/guardrails/custom_guardrail", + "prompt_injection" + ], }, { type: "category", @@ -101,7 +109,6 @@ const sidebars = { "proxy/model_management", "proxy/health", "proxy/debugging", - "proxy/pii_masking", "proxy/call_hooks", "proxy/rules", "proxy/cli", @@ -291,6 +298,7 @@ const sidebars = { "data_security", "migration_policy", "contributing", + "proxy/pii_masking", "rules", "proxy_server", { diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 87c11f1fd..2ea3f23d3 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -25,6 +25,7 @@ from litellm import ( ) from litellm.caching import DualCache, InMemoryCache, S3Cache from litellm.cost_calculator import _select_model_name_for_cost_calc +from litellm.integrations.custom_guardrail import CustomGuardrail from litellm.integrations.custom_logger import CustomLogger from litellm.litellm_core_utils.redact_messages import ( redact_message_input_output_from_logging, @@ -1359,7 +1360,24 @@ class Logging: ## LOGGING HOOK ## for callback in callbacks: - if isinstance(callback, CustomLogger): + if isinstance(callback, CustomGuardrail): + from litellm.types.guardrails import GuardrailEventHooks + + if ( + callback.should_run_guardrail( + data=self.model_call_details, + event_type=GuardrailEventHooks.logging_only, + ) + is not True + ): + continue + + self.model_call_details, result = await callback.async_logging_hook( + kwargs=self.model_call_details, + result=result, + call_type=self.call_type, + ) + elif isinstance(callback, CustomLogger): self.model_call_details, result = await callback.async_logging_hook( kwargs=self.model_call_details, result=result, diff --git a/litellm/proxy/guardrails/guardrail_hooks/presidio.py b/litellm/proxy/guardrails/guardrail_hooks/presidio.py new file mode 100644 index 000000000..857704bf2 --- /dev/null +++ b/litellm/proxy/guardrails/guardrail_hooks/presidio.py @@ -0,0 +1,352 @@ +# +-----------------------------------------------+ +# | | +# | PII Masking | +# | with Microsoft Presidio | +# | https://github.com/BerriAI/litellm/issues/ | +# +-----------------------------------------------+ +# +# Tell us how we can improve! - Krrish & Ishaan + + +import asyncio +import json +import traceback +import uuid +from typing import Any, List, Optional, Tuple, Union + +import aiohttp +from fastapi import HTTPException +from pydantic import BaseModel + +import litellm # noqa: E401 +from litellm._logging import verbose_proxy_logger +from litellm.caching import DualCache +from litellm.integrations.custom_guardrail import CustomGuardrail +from litellm.llms.custom_httpx.http_handler import _get_async_httpx_client +from litellm.proxy._types import UserAPIKeyAuth +from litellm.utils import ( + EmbeddingResponse, + ImageResponse, + ModelResponse, + StreamingChoices, + get_formatted_prompt, +) + + +class PresidioPerRequestConfig(BaseModel): + """ + presdio params that can be controlled per request, api key + """ + + language: Optional[str] = None + + +class _OPTIONAL_PresidioPIIMasking(CustomGuardrail): + user_api_key_cache = None + ad_hoc_recognizers = None + + # Class variables or attributes + def __init__( + self, + mock_testing: bool = False, + mock_redacted_text: Optional[dict] = None, + presidio_analyzer_api_base: Optional[str] = None, + presidio_anonymizer_api_base: Optional[str] = None, + output_parse_pii: Optional[bool] = False, + presidio_ad_hoc_recognizers: Optional[str] = None, + **kwargs, + ): + self.pii_tokens: dict = ( + {} + ) # mapping of PII token to original text - only used with Presidio `replace` operation + + self.mock_redacted_text = mock_redacted_text + self.output_parse_pii = output_parse_pii or False + if mock_testing is True: # for testing purposes only + return + + ad_hoc_recognizers = presidio_ad_hoc_recognizers + if ad_hoc_recognizers is not None: + try: + with open(ad_hoc_recognizers, "r") as file: + self.ad_hoc_recognizers = json.load(file) + except FileNotFoundError: + raise Exception(f"File not found. file_path={ad_hoc_recognizers}") + except json.JSONDecodeError as e: + raise Exception( + f"Error decoding JSON file: {str(e)}, file_path={ad_hoc_recognizers}" + ) + except Exception as e: + raise Exception( + f"An error occurred: {str(e)}, file_path={ad_hoc_recognizers}" + ) + self.validate_environment( + presidio_analyzer_api_base=presidio_analyzer_api_base, + presidio_anonymizer_api_base=presidio_anonymizer_api_base, + ) + + super().__init__(**kwargs) + + def validate_environment( + self, + presidio_analyzer_api_base: Optional[str] = None, + presidio_anonymizer_api_base: Optional[str] = None, + ): + self.presidio_analyzer_api_base: Optional[str] = ( + presidio_analyzer_api_base + or litellm.get_secret("PRESIDIO_ANALYZER_API_BASE", None) + ) + self.presidio_anonymizer_api_base: Optional[ + str + ] = presidio_anonymizer_api_base or litellm.get_secret( + "PRESIDIO_ANONYMIZER_API_BASE", None + ) # type: ignore + + if self.presidio_analyzer_api_base is None: + raise Exception("Missing `PRESIDIO_ANALYZER_API_BASE` from environment") + if not self.presidio_analyzer_api_base.endswith("/"): + self.presidio_analyzer_api_base += "/" + if not ( + self.presidio_analyzer_api_base.startswith("http://") + or self.presidio_analyzer_api_base.startswith("https://") + ): + # add http:// if unset, assume communicating over private network - e.g. render + self.presidio_analyzer_api_base = ( + "http://" + self.presidio_analyzer_api_base + ) + + if self.presidio_anonymizer_api_base is None: + raise Exception("Missing `PRESIDIO_ANONYMIZER_API_BASE` from environment") + if not self.presidio_anonymizer_api_base.endswith("/"): + self.presidio_anonymizer_api_base += "/" + if not ( + self.presidio_anonymizer_api_base.startswith("http://") + or self.presidio_anonymizer_api_base.startswith("https://") + ): + # add http:// if unset, assume communicating over private network - e.g. render + self.presidio_anonymizer_api_base = ( + "http://" + self.presidio_anonymizer_api_base + ) + + async def check_pii( + self, + text: str, + output_parse_pii: bool, + presidio_config: Optional[PresidioPerRequestConfig], + ) -> str: + """ + [TODO] make this more performant for high-throughput scenario + """ + try: + async with aiohttp.ClientSession() as session: + if self.mock_redacted_text is not None: + redacted_text = self.mock_redacted_text + else: + # Make the first request to /analyze + # Construct Request 1 + analyze_url = f"{self.presidio_analyzer_api_base}analyze" + analyze_payload = {"text": text, "language": "en"} + if presidio_config and presidio_config.language: + analyze_payload["language"] = presidio_config.language + if self.ad_hoc_recognizers is not None: + analyze_payload["ad_hoc_recognizers"] = self.ad_hoc_recognizers + # End of constructing Request 1 + + redacted_text = None + verbose_proxy_logger.debug( + "Making request to: %s with payload: %s", + analyze_url, + analyze_payload, + ) + async with session.post( + analyze_url, json=analyze_payload + ) as response: + + analyze_results = await response.json() + + # Make the second request to /anonymize + anonymize_url = f"{self.presidio_anonymizer_api_base}anonymize" + verbose_proxy_logger.debug("Making request to: %s", anonymize_url) + anonymize_payload = { + "text": text, + "analyzer_results": analyze_results, + } + + async with session.post( + anonymize_url, json=anonymize_payload + ) as response: + redacted_text = await response.json() + + new_text = text + if redacted_text is not None: + verbose_proxy_logger.debug("redacted_text: %s", redacted_text) + for item in redacted_text["items"]: + start = item["start"] + end = item["end"] + replacement = item["text"] # replacement token + if item["operator"] == "replace" and output_parse_pii == True: + # check if token in dict + # if exists, add a uuid to the replacement token for swapping back to the original text in llm response output parsing + if replacement in self.pii_tokens: + replacement = replacement + str(uuid.uuid4()) + + self.pii_tokens[replacement] = new_text[ + start:end + ] # get text it'll replace + + new_text = new_text[:start] + replacement + new_text[end:] + return redacted_text["text"] + else: + raise Exception(f"Invalid anonymizer response: {redacted_text}") + except Exception as e: + verbose_proxy_logger.error( + "litellm.proxy.hooks.presidio_pii_masking.py::async_pre_call_hook(): Exception occured - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) + raise e + + async def async_pre_call_hook( + self, + user_api_key_dict: UserAPIKeyAuth, + cache: DualCache, + data: dict, + call_type: str, + ): + """ + - Check if request turned off pii + - Check if user allowed to turn off pii (key permissions -> 'allow_pii_controls') + + - Take the request data + - Call /analyze -> get the results + - Call /anonymize w/ the analyze results -> get the redacted text + + For multiple messages in /chat/completions, we'll need to call them in parallel. + """ + + try: + + content_safety = data.get("content_safety", None) + verbose_proxy_logger.debug("content_safety: %s", content_safety) + presidio_config = self.get_presidio_settings_from_request_data(data) + + if call_type == "completion": # /chat/completions requests + messages = data["messages"] + tasks = [] + + for m in messages: + if isinstance(m["content"], str): + tasks.append( + self.check_pii( + text=m["content"], + output_parse_pii=self.output_parse_pii, + presidio_config=presidio_config, + ) + ) + responses = await asyncio.gather(*tasks) + for index, r in enumerate(responses): + if isinstance(messages[index]["content"], str): + messages[index][ + "content" + ] = r # replace content with redacted string + verbose_proxy_logger.info( + f"Presidio PII Masking: Redacted pii message: {data['messages']}" + ) + return data + except Exception as e: + verbose_proxy_logger.info( + f"An error occurred -", + ) + raise e + + async def async_logging_hook( + self, kwargs: dict, result: Any, call_type: str + ) -> Tuple[dict, Any]: + """ + Masks the input before logging to langfuse, datadog, etc. + """ + if ( + call_type == "completion" or call_type == "acompletion" + ): # /chat/completions requests + messages: Optional[List] = kwargs.get("messages", None) + tasks = [] + + if messages is None: + return kwargs, result + + presidio_config = self.get_presidio_settings_from_request_data(kwargs) + + for m in messages: + text_str = "" + if m["content"] is None: + continue + if isinstance(m["content"], str): + text_str = m["content"] + tasks.append( + self.check_pii( + text=text_str, + output_parse_pii=False, + presidio_config=presidio_config, + ) + ) # need to pass separately b/c presidio has context window limits + responses = await asyncio.gather(*tasks) + for index, r in enumerate(responses): + if isinstance(messages[index]["content"], str): + messages[index][ + "content" + ] = r # replace content with redacted string + verbose_proxy_logger.info( + f"Presidio PII Masking: Redacted pii message: {messages}" + ) + kwargs["messages"] = messages + + return kwargs, responses + + async def async_post_call_success_hook( + self, + data: dict, + user_api_key_dict: UserAPIKeyAuth, + response: Union[ModelResponse, EmbeddingResponse, ImageResponse], + ): + """ + Output parse the response object to replace the masked tokens with user sent values + """ + verbose_proxy_logger.debug( + f"PII Masking Args: self.output_parse_pii={self.output_parse_pii}; type of response={type(response)}" + ) + if self.output_parse_pii == False: + return response + + if isinstance(response, ModelResponse) and not isinstance( + response.choices[0], StreamingChoices + ): # /chat/completions requests + if isinstance(response.choices[0].message.content, str): + verbose_proxy_logger.debug( + f"self.pii_tokens: {self.pii_tokens}; initial response: {response.choices[0].message.content}" + ) + for key, value in self.pii_tokens.items(): + response.choices[0].message.content = response.choices[ + 0 + ].message.content.replace(key, value) + return response + + def get_presidio_settings_from_request_data( + self, data: dict + ) -> Optional[PresidioPerRequestConfig]: + if "metadata" in data: + _metadata = data["metadata"] + _guardrail_config = _metadata.get("guardrail_config") + if _guardrail_config: + _presidio_config = PresidioPerRequestConfig(**_guardrail_config) + return _presidio_config + + return None + + def print_verbose(self, print_statement): + try: + verbose_proxy_logger.debug(print_statement) + if litellm.set_verbose: + print(print_statement) # noqa + except: + pass diff --git a/litellm/proxy/guardrails/init_guardrails.py b/litellm/proxy/guardrails/init_guardrails.py index 643e13596..cff9fca05 100644 --- a/litellm/proxy/guardrails/init_guardrails.py +++ b/litellm/proxy/guardrails/init_guardrails.py @@ -11,6 +11,7 @@ from litellm.proxy.common_utils.callback_utils import initialize_callbacks_on_pr # v2 implementation from litellm.types.guardrails import ( Guardrail, + GuardrailEventHooks, GuardrailItem, GuardrailItemSpec, LakeraCategoryThresholds, @@ -104,6 +105,10 @@ def init_guardrails_v2( api_base=litellm_params_data.get("api_base"), guardrailIdentifier=litellm_params_data.get("guardrailIdentifier"), guardrailVersion=litellm_params_data.get("guardrailVersion"), + output_parse_pii=litellm_params_data.get("output_parse_pii"), + presidio_ad_hoc_recognizers=litellm_params_data.get( + "presidio_ad_hoc_recognizers" + ), ) if ( @@ -165,6 +170,33 @@ def init_guardrails_v2( category_thresholds=litellm_params.get("category_thresholds"), ) litellm.callbacks.append(_lakera_callback) # type: ignore + elif litellm_params["guardrail"] == "presidio": + from litellm.proxy.guardrails.guardrail_hooks.presidio import ( + _OPTIONAL_PresidioPIIMasking, + ) + + _presidio_callback = _OPTIONAL_PresidioPIIMasking( + guardrail_name=guardrail["guardrail_name"], + event_hook=litellm_params["mode"], + output_parse_pii=litellm_params["output_parse_pii"], + presidio_ad_hoc_recognizers=litellm_params[ + "presidio_ad_hoc_recognizers" + ], + ) + + if litellm_params["output_parse_pii"] is True: + _success_callback = _OPTIONAL_PresidioPIIMasking( + output_parse_pii=True, + guardrail_name=guardrail["guardrail_name"], + event_hook=GuardrailEventHooks.post_call.value, + presidio_ad_hoc_recognizers=litellm_params[ + "presidio_ad_hoc_recognizers" + ], + ) + + litellm.callbacks.append(_success_callback) # type: ignore + + litellm.callbacks.append(_presidio_callback) # type: ignore elif ( isinstance(litellm_params["guardrail"], str) and "." in litellm_params["guardrail"] diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 60052bc27..d41aae50f 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -420,6 +420,10 @@ def move_guardrails_to_metadata( data[_metadata_variable_name]["guardrails"] = data["guardrails"] del data["guardrails"] + if "guardrail_config" in data: + data[_metadata_variable_name]["guardrail_config"] = data["guardrail_config"] + del data["guardrail_config"] + def add_provider_specific_headers_to_request( data: dict, diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 7566f348a..f6942dd29 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,20 +1,20 @@ model_list: - - model_name: fake-openai-endpoint + - model_name: openai/* litellm_params: - model: openai/fake - api_base: https://exampleopenaiendpoint-production.up.railway.app/ + model: gpt-3.5-turbo api_key: os.environ/OPENAI_API_KEY - - model_name: gpt-3.5-turbo-end-user-test - litellm_params: - model: azure/chatgpt-v-2 - api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ - api_version: "2023-05-15" - api_key: os.environ/AZURE_API_KEY litellm_settings: success_callback: ["prometheus"] failure_callback: ["prometheus"] +guardrails: + - guardrail_name: "presidio-pre-guard" + litellm_params: + guardrail: presidio # supported values: "aporia", "lakera", "presidio" + mode: "pre_call" # pre_call, during_call, post_call + output_parse_pii: True + general_settings: master_key: sk-1234 diff --git a/litellm/types/guardrails.py b/litellm/types/guardrails.py index 10f4be7e1..cb70de505 100644 --- a/litellm/types/guardrails.py +++ b/litellm/types/guardrails.py @@ -84,6 +84,10 @@ class LitellmParams(TypedDict, total=False): guardrailIdentifier: Optional[str] guardrailVersion: Optional[str] + # Presidio params + output_parse_pii: Optional[bool] + presidio_ad_hoc_recognizers: Optional[str] + class Guardrail(TypedDict): guardrail_name: str @@ -98,6 +102,7 @@ class GuardrailEventHooks(str, Enum): pre_call = "pre_call" post_call = "post_call" during_call = "during_call" + logging_only = "logging_only" class BedrockTextContent(TypedDict, total=False):