fix(llm_guard.py): add streaming hook for moderation calls

2024-02-20 20:31:32 -08:00 · 2024-02-20 20:31:32 -08:00 · 49847347d0
commit 49847347d0
parent 0a5b8f0e4e
4 changed files with 36 additions and 25 deletions
--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@ -101,19 +101,16 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
        - Use the sanitized prompt returned
            - LLM Guard can handle things like PII Masking, etc.
        """
-        if "messages" in data:
-            safety_check_messages = data["messages"][
-                -1
-            ]  # get the last response - llama guard has a 4k token limit
-            if (
-                isinstance(safety_check_messages, dict)
-                and "content" in safety_check_messages
-                and isinstance(safety_check_messages["content"], str)
-            ):
-                await self.moderation_check(safety_check_messages["content"])
-
        return data

+    async def async_post_call_streaming_hook(
+        self, user_api_key_dict: UserAPIKeyAuth, response: str
+    ):
+        if response is not None:
+            await self.moderation_check(text=response)
+
+        return response
+

 # llm_guard = _ENTERPRISE_LLMGuard()