Merge pull request #1970 from BerriAI/litellm_fix_pii_output_parsing

feat(presidio_pii_masking.py): enable output parsing for pii masking
2024-02-13 22:36:25 -08:00 · 2024-02-13 22:36:25 -08:00 · f9dbd74a2c
commit f9dbd74a2c
parent dc0b2b4501 f5c989cb83
6 changed files with 177 additions and 26 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -164,6 +164,8 @@ secret_manager_client: Optional[
 ] = None  # list of instantiated key management clients - e.g. azure kv, infisical, etc.
 _google_kms_resource_name: Optional[str] = None
 _key_management_system: Optional[KeyManagementSystem] = None
 #### PII MASKING ####
 output_parse_pii: bool = False
 #############################################
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -2,9 +2,11 @@
 #    On success, logs events to Promptlayer
 import dotenv, os
 import requests
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.caching import DualCache
-from typing import Literal
+
 from typing import Literal, Union
 dotenv.load_dotenv()  # Loading env variables using dotenv
 import traceback
@ -54,7 +56,7 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
        user_api_key_dict: UserAPIKeyAuth,
        cache: DualCache,
        data: dict,
-        call_type: Literal["completion", "embeddings"],
+        call_type: Literal["completion", "embeddings", "image_generation"],
    ):
        pass
@ -63,21 +65,11 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
    ):
        pass
    async def async_post_call_streaming_hook(
        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
    ):
        """
        Returns streaming chunk before their returned to user
        """
        pass
    async def async_post_call_success_hook(
-        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
+        self,
        user_api_key_dict: UserAPIKeyAuth,
        response,
    ):
        """
        Returns llm response before it's returned to user
        """
        pass
    #### SINGLE-USE #### - https://docs.litellm.ai/docs/observability/custom_callback#using-your-custom-callback-function
--- a/litellm/proxy/hooks/presidio_pii_masking.py
+++ b/litellm/proxy/hooks/presidio_pii_masking.py
@ -8,14 +8,19 @@
 #  Tell us how we can improve! - Krrish & Ishaan
-from typing import Optional
+from typing import Optional, Literal, Union
-import litellm, traceback, sys
+import litellm, traceback, sys, uuid
 from litellm.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
-from litellm import ModelResponse
+from litellm.utils import (
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    StreamingChoices,
 )
 from datetime import datetime
 import aiohttp, asyncio
@ -24,7 +29,13 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
    user_api_key_cache = None
    # Class variables or attributes
-    def __init__(self):
+    def __init__(self, mock_testing: bool = False):
        self.pii_tokens: dict = (
            {}
        )  # mapping of PII token to original text - only used with Presidio `replace` operation
        if mock_testing == True:  # for testing purposes only
            return
        self.presidio_analyzer_api_base = litellm.get_secret(
            "PRESIDIO_ANALYZER_API_BASE", None
        )
@ -51,12 +62,15 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
            pass
    async def check_pii(self, text: str) -> str:
        """
        [TODO] make this more performant for high-throughput scenario
        """
        try:
            async with aiohttp.ClientSession() as session:
                # Make the first request to /analyze
                analyze_url = f"{self.presidio_analyzer_api_base}/analyze"
                analyze_payload = {"text": text, "language": "en"}
-
+                redacted_text = None
                async with session.post(analyze_url, json=analyze_payload) as response:
                    analyze_results = await response.json()
@ -72,6 +86,26 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
                ) as response:
                    redacted_text = await response.json()
                new_text = text
                if redacted_text is not None:
                    for item in redacted_text["items"]:
                        start = item["start"]
                        end = item["end"]
                        replacement = item["text"]  # replacement token
                        if (
                            item["operator"] == "replace"
                            and litellm.output_parse_pii == True
                        ):
                            # check if token in dict
                            # if exists, add a uuid to the replacement token for swapping back to the original text in llm response output parsing
                            if replacement in self.pii_tokens:
                                replacement = replacement + uuid.uuid4()
                            self.pii_tokens[replacement] = new_text[
                                start:end
                            ]  # get text it'll replace
                        new_text = new_text[:start] + replacement + new_text[end:]
                return redacted_text["text"]
        except Exception as e:
            traceback.print_exc()
@ -94,6 +128,7 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
        if call_type == "completion":  # /chat/completions requests
            messages = data["messages"]
            tasks = []
            for m in messages:
                if isinstance(m["content"], str):
                    tasks.append(self.check_pii(text=m["content"]))
@ -104,3 +139,30 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
                        "content"
                    ] = r  # replace content with redacted string
        return data
    async def async_post_call_success_hook(
        self,
        user_api_key_dict: UserAPIKeyAuth,
        response: Union[ModelResponse, EmbeddingResponse, ImageResponse],
    ):
        """
        Output parse the response object to replace the masked tokens with user sent values
        """
        verbose_proxy_logger.debug(
            f"PII Masking Args: litellm.output_parse_pii={litellm.output_parse_pii}; type of response={type(response)}"
        )
        if litellm.output_parse_pii == False:
            return response
        if isinstance(response, ModelResponse) and not isinstance(
            response.choices[0], StreamingChoices
        ):  # /chat/completions requests
            if isinstance(response.choices[0].message.content, str):
                verbose_proxy_logger.debug(
                    f"self.pii_tokens: {self.pii_tokens}; initial response: {response.choices[0].message.content}"
                )
                for key, value in self.pii_tokens.items():
                    response.choices[0].message.content = response.choices[
                        0
                    ].message.content.replace(key, value)
        return response
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -166,9 +166,9 @@ class ProxyException(Exception):
 async def openai_exception_handler(request: Request, exc: ProxyException):
    # NOTE: DO NOT MODIFY THIS, its crucial to map to Openai exceptions
    return JSONResponse(
-        status_code=int(exc.code)
+        status_code=(
-        if exc.code
+            int(exc.code) if exc.code else status.HTTP_500_INTERNAL_SERVER_ERROR
-        else status.HTTP_500_INTERNAL_SERVER_ERROR,
+        ),
        content={
            "error": {
                "message": exc.message,
@ -2428,6 +2428,11 @@ async def chat_completion(
        )
        fastapi_response.headers["x-litellm-model-id"] = model_id
        ### CALL HOOKS ### - modify outgoing data
        response = await proxy_logging_obj.post_call_success_hook(
            user_api_key_dict=user_api_key_dict, response=response
        )
        return response
    except Exception as e:
        traceback.print_exc()
@ -4553,9 +4558,11 @@ async def get_routes():
            "path": getattr(route, "path", None),
            "methods": getattr(route, "methods", None),
            "name": getattr(route, "name", None),
-            "endpoint": getattr(route, "endpoint", None).__name__
+            "endpoint": (
                getattr(route, "endpoint", None).__name__
                if getattr(route, "endpoint", None)
-            else None,
+                else None
            ),
        }
        routes.append(route_info)
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -11,6 +11,7 @@ from litellm.caching import DualCache
 from litellm.proxy.hooks.parallel_request_limiter import (
    _PROXY_MaxParallelRequestsHandler,
 )
 from litellm import ModelResponse, EmbeddingResponse, ImageResponse
 from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
 from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
 from litellm.integrations.custom_logger import CustomLogger
@ -377,6 +378,28 @@ class ProxyLogging:
                raise e
        return
    async def post_call_success_hook(
        self,
        response: Union[ModelResponse, EmbeddingResponse, ImageResponse],
        user_api_key_dict: UserAPIKeyAuth,
    ):
        """
        Allow user to modify outgoing data
        Covers:
        1. /chat/completions
        """
        new_response = copy.deepcopy(response)
        for callback in litellm.callbacks:
            try:
                if isinstance(callback, CustomLogger):
                    await callback.async_post_call_success_hook(
                        user_api_key_dict=user_api_key_dict, response=new_response
                    )
            except Exception as e:
                raise e
        return new_response
 ### DB CONNECTOR ###
 # Define the retry decorator with backoff strategy
--- a/litellm/tests/test_presidio_masking.py
+++ b/litellm/tests/test_presidio_masking.py
@ -0,0 +1,65 @@
 # What is this?
 ## Unit test for presidio pii masking
 import sys, os, asyncio, time, random
 from datetime import datetime
 import traceback
 from dotenv import load_dotenv
 load_dotenv()
 import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
 from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
 from litellm import Router, mock_completion
 from litellm.proxy.utils import ProxyLogging
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.caching import DualCache
@pytest.mark.asyncio
 async def test_output_parsing():
    """
    - have presidio pii masking - mask an input message
    - make llm completion call
    - have presidio pii masking - output parse message
    - assert that no masked tokens are in the input message
    """
    litellm.output_parse_pii = True
    pii_masking = _OPTIONAL_PresidioPIIMasking(mock_testing=True)
    initial_message = [
        {
            "role": "user",
            "content": "hello world, my name is Jane Doe. My number is: 034453334",
        }
    ]
    filtered_message = [
        {
            "role": "user",
            "content": "hello world, my name is <PERSON>. My number is: <PHONE_NUMBER>",
        }
    ]
    pii_masking.pii_tokens = {"<PERSON>": "Jane Doe", "<PHONE_NUMBER>": "034453334"}
    response = mock_completion(
        model="gpt-3.5-turbo",
        messages=filtered_message,
        mock_response="Hello <PERSON>! How can I assist you today?",
    )
    new_response = await pii_masking.async_post_call_success_hook(
        user_api_key_dict=UserAPIKeyAuth(), response=response
    )
    assert (
        new_response.choices[0].message.content
        == "Hello Jane Doe! How can I assist you today?"
    )
 # asyncio.run(test_output_parsing())