feat(proxy_server.py): enable llm api based prompt injection checks

run user calls through an llm api to check for prompt injection attacks. This happens in parallel to th e actual llm call using `async_moderation_hook`
2024-03-20 22:43:42 -07:00 · 2024-03-20 22:43:42 -07:00 · d91f9a9f50
commit d91f9a9f50
parent f24d3ffdb6
11 changed files with 271 additions and 24 deletions
--- a/enterprise/enterprise_hooks/google_text_moderation.py
+++ b/enterprise/enterprise_hooks/google_text_moderation.py
@ -96,6 +96,9 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger):
    async def async_moderation_hook(
        self,
        data: dict,
+        call_type: (
+            Literal["completion"] | Literal["embeddings"] | Literal["image_generation"]
+        ),
    ):
        """
        - Calls Google's Text Moderation API
--- a/enterprise/enterprise_hooks/llama_guard.py
+++ b/enterprise/enterprise_hooks/llama_guard.py
@ -99,6 +99,9 @@ class _ENTERPRISE_LlamaGuard(CustomLogger):
    async def async_moderation_hook(
        self,
        data: dict,
+        call_type: (
+            Literal["completion"] | Literal["embeddings"] | Literal["image_generation"]
+        ),
    ):
        """
        - Calls the Llama Guard Endpoint
--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@ -22,6 +22,7 @@ from litellm.utils import (
 )
 from datetime import datetime
 import aiohttp, asyncio
+from litellm.utils import get_formatted_prompt

 litellm.set_verbose = True

@ -94,6 +95,9 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
    async def async_moderation_hook(
        self,
        data: dict,
+        call_type: (
+            Literal["completion"] | Literal["embeddings"] | Literal["image_generation"]
+        ),
    ):
        """
        - Calls the LLM Guard Endpoint