fix(llm_guard.py): add streaming hook for moderation calls

2024-02-20 20:31:32 -08:00 · 2024-02-20 20:31:32 -08:00 · 49847347d0
commit 49847347d0
parent 0a5b8f0e4e
4 changed files with 36 additions and 25 deletions
--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@ -101,19 +101,16 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
        - Use the sanitized prompt returned
            - LLM Guard can handle things like PII Masking, etc.
        """
        if "messages" in data:
            safety_check_messages = data["messages"][
                -1
            ]  # get the last response - llama guard has a 4k token limit
            if (
                isinstance(safety_check_messages, dict)
                and "content" in safety_check_messages
                and isinstance(safety_check_messages["content"], str)
            ):
                await self.moderation_check(safety_check_messages["content"])
        return data
    async def async_post_call_streaming_hook(
        self, user_api_key_dict: UserAPIKeyAuth, response: str
    ):
        if response is not None:
            await self.moderation_check(text=response)
        return response
 # llm_guard = _ENTERPRISE_LLMGuard()
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -75,6 +75,13 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
    async def async_moderation_hook(self, data: dict):
        pass
    async def async_post_call_streaming_hook(
        self,
        user_api_key_dict: UserAPIKeyAuth,
        response: str,
    ):
        pass
    #### SINGLE-USE #### - https://docs.litellm.ai/docs/observability/custom_callback#using-your-custom-callback-function
    def log_input_event(self, model, messages, kwargs, print_verbose, callback_func):
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -401,6 +401,27 @@ class ProxyLogging:
                raise e
        return new_response
    async def post_call_streaming_hook(
        self,
        response: str,
        user_api_key_dict: UserAPIKeyAuth,
    ):
        """
        - Check outgoing streaming response uptil that point
        - Run through moderation check
        - Reject request if it fails moderation check
        """
        new_response = copy.deepcopy(response)
        for callback in litellm.callbacks:
            try:
                if isinstance(callback, CustomLogger):
                    await callback.async_post_call_streaming_hook(
                        user_api_key_dict=user_api_key_dict, response=new_response
                    )
            except Exception as e:
                raise e
        return new_response
 ### DB CONNECTOR ###
 # Define the retry decorator with backoff strategy
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -909,20 +909,6 @@ class Logging:
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
                    )
            if litellm.max_budget and self.stream:
                start_time = self.start_time
                end_time = (
                    self.start_time
                )  # no time has passed as the call hasn't been made yet
                time_diff = (end_time - start_time).total_seconds()
                float_diff = float(time_diff)
                litellm._current_cost += litellm.completion_cost(
                    model=self.model,
                    prompt="".join(message["content"] for message in self.messages),
                    completion="",
                    total_time=float_diff,
                )
            # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
            callbacks = litellm.input_callback + self.dynamic_input_callbacks
            for callback in callbacks: