feat(llm_guard.py): support llm guard for content moderation

https://github.com/BerriAI/litellm/issues/2056
This commit is contained in:
Krrish Dholakia 2024-02-19 20:51:25 -08:00
parent 05d8d550f8
commit 14513af2e2
3 changed files with 227 additions and 0 deletions

View file

@ -0,0 +1,122 @@
# +------------------------+
#
# LLM Guard
# https://llm-guard.com/
#
# +------------------------+
# Thank you users! We ❤️ you! - Krrish & Ishaan
## This provides an LLM Guard Integration for content moderation on the proxy
from typing import Optional, Literal, Union
import litellm, traceback, sys, uuid, os
from litellm.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm.utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
StreamingChoices,
)
from datetime import datetime
import aiohttp, asyncio
litellm.set_verbose = True
class _ENTERPRISE_LLMGuard(CustomLogger):
# Class variables or attributes
def __init__(
self, mock_testing: bool = False, mock_redacted_text: Optional[dict] = None
):
self.mock_redacted_text = mock_redacted_text
if mock_testing == True: # for testing purposes only
return
self.llm_guard_api_base = litellm.get_secret("LLM_GUARD_API_BASE", None)
if self.llm_guard_api_base is None:
raise Exception("Missing `LLM_GUARD_API_BASE` from environment")
elif not self.llm_guard_api_base.endswith("/"):
self.llm_guard_api_base += "/"
def print_verbose(self, print_statement):
try:
verbose_proxy_logger.debug(print_statement)
if litellm.set_verbose:
print(print_statement) # noqa
except:
pass
async def moderation_check(self, text: str):
"""
[TODO] make this more performant for high-throughput scenario
"""
try:
async with aiohttp.ClientSession() as session:
if self.mock_redacted_text is not None:
redacted_text = self.mock_redacted_text
else:
# Make the first request to /analyze
analyze_url = f"{self.llm_guard_api_base}analyze/prompt"
verbose_proxy_logger.debug(f"Making request to: {analyze_url}")
analyze_payload = {"prompt": text}
redacted_text = None
async with session.post(
analyze_url, json=analyze_payload
) as response:
redacted_text = await response.json()
if redacted_text is not None:
if (
redacted_text.get("is_valid", None) is not None
and redacted_text["is_valid"] == "True"
):
raise HTTPException(
status_code=400,
detail={"error": "Violated content safety policy"},
)
else:
pass
else:
raise HTTPException(
status_code=500,
detail={
"error": f"Invalid content moderation response: {redacted_text}"
},
)
except Exception as e:
traceback.print_exc()
raise e
async def async_moderation_hook(
self,
data: dict,
):
"""
- Calls the LLM Guard Endpoint
- Rejects request if it fails safety check
- Use the sanitized prompt returned
- LLM Guard can handle things like PII Masking, etc.
"""
if "messages" in data:
safety_check_messages = data["messages"][
-1
] # get the last response - llama guard has a 4k token limit
if (
isinstance(safety_check_messages, dict)
and "content" in safety_check_messages
and isinstance(safety_check_messages["content"], str)
):
await self.moderation_check(safety_check_messages["content"])
return data
# llm_guard = _ENTERPRISE_LLMGuard()
# asyncio.run(
# llm_guard.async_moderation_hook(
# data={"messages": [{"role": "user", "content": "Hey how's it going?"}]}
# )
# )

View file

@ -1467,6 +1467,16 @@ class ProxyConfig:
_ENTERPRISE_GoogleTextModeration()
)
imported_list.append(google_text_moderation_obj)
elif (
isinstance(callback, str)
and callback == "llmguard_moderations"
):
from litellm.proxy.enterprise.enterprise_hooks.llm_guard import (
_ENTERPRISE_LLMGuard,
)
llm_guard_moderation_obj = _ENTERPRISE_LLMGuard()
imported_list.append(llm_guard_moderation_obj)
else:
imported_list.append(
get_instance_fn(

View file

@ -0,0 +1,95 @@
# What is this?
## This tests the llm guard integration
# What is this?
## Unit test for presidio pii masking
import sys, os, asyncio, time, random
from datetime import datetime
import traceback
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
from litellm.proxy.enterprise.enterprise_hooks.llm_guard import _ENTERPRISE_LLMGuard
from litellm import Router, mock_completion
from litellm.proxy.utils import ProxyLogging
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache
### UNIT TESTS FOR LLM GUARD ###
# Test if PII masking works with input A
@pytest.mark.asyncio
async def test_llm_guard_valid_response():
"""
Tests to see llm guard raises an error for a flagged response
"""
input_a_anonymizer_results = {
"sanitized_prompt": "hello world",
"is_valid": True,
"scanners": {"Regex": 0.0},
}
llm_guard = _ENTERPRISE_LLMGuard(
mock_testing=True, mock_redacted_text=input_a_anonymizer_results
)
_api_key = "sk-12345"
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
local_cache = DualCache()
try:
await llm_guard.async_moderation_hook(
data={
"messages": [
{
"role": "user",
"content": "hello world, my name is Jane Doe. My number is: 23r323r23r2wwkl",
}
]
},
)
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
# Test if PII masking works with input B (also test if the response != A's response)
@pytest.mark.asyncio
async def test_llm_guard_error_raising():
"""
Tests to see llm guard raises an error for a flagged response
"""
input_b_anonymizer_results = {
"sanitized_prompt": "hello world",
"is_valid": False,
"scanners": {"Regex": 0.0},
}
llm_guard = _ENTERPRISE_LLMGuard(
mock_testing=True, mock_redacted_text=input_b_anonymizer_results
)
_api_key = "sk-12345"
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
local_cache = DualCache()
try:
await llm_guard.async_moderation_hook(
data={
"messages": [
{
"role": "user",
"content": "hello world, my name is Jane Doe. My number is: 23r323r23r2wwkl",
}
]
},
)
pytest.fail(f"Should have failed - {str(e)}")
except Exception as e:
pass