Merge pull request #4176 from BerriAI/litellm_fix_redacting_msgs

[Fix] Redacting messages from OTEL + Refactor `utils.py` to use `litellm_core_utils`
2024-06-13 13:50:13 -07:00 · 2024-06-13 13:50:13 -07:00 · 944d95d636
commit 944d95d636
parent 6ba7a0a909 d274cfeb3f
4 changed files with 131 additions and 39 deletions
--- a/litellm/litellm_core_utils/redact_messages.py
+++ b/litellm/litellm_core_utils/redact_messages.py
@ -0,0 +1,65 @@
 # +-----------------------------------------------+
 # |                                               |
 # |           Give Feedback / Get Help            |
 # | https://github.com/BerriAI/litellm/issues/new |
 # |                                               |
 # +-----------------------------------------------+
 #
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
 import copy
 from typing import TYPE_CHECKING, Any
 import litellm
 if TYPE_CHECKING:
    from litellm.utils import Logging as _LiteLLMLoggingObject
    LiteLLMLoggingObject = _LiteLLMLoggingObject
 else:
    LiteLLMLoggingObject = Any
 def redact_message_input_output_from_logging(
    litellm_logging_obj: LiteLLMLoggingObject, result
 ):
    """
    Removes messages, prompts, input, response from logging. This modifies the data in-place
    only redacts when litellm.turn_off_message_logging == True
    """
    # check if user opted out of logging message/response to callbacks
    if litellm.turn_off_message_logging is not True:
        return result
    _result = copy.deepcopy(result)
    # remove messages, prompts, input, response from logging
    litellm_logging_obj.model_call_details["messages"] = [
        {"role": "user", "content": "redacted-by-litellm"}
    ]
    litellm_logging_obj.model_call_details["prompt"] = ""
    litellm_logging_obj.model_call_details["input"] = ""
    # response cleaning
    # ChatCompletion Responses
    if (
        litellm_logging_obj.stream is True
        and "complete_streaming_response" in litellm_logging_obj.model_call_details
    ):
        _streaming_response = litellm_logging_obj.model_call_details[
            "complete_streaming_response"
        ]
        for choice in _streaming_response.choices:
            if isinstance(choice, litellm.Choices):
                choice.message.content = "redacted-by-litellm"
            elif isinstance(choice, litellm.utils.StreamingChoices):
                choice.delta.content = "redacted-by-litellm"
    else:
        if _result is not None:
            if isinstance(_result, litellm.ModelResponse):
                if hasattr(_result, "choices") and _result.choices is not None:
                    for choice in _result.choices:
                        if isinstance(choice, litellm.Choices):
                            choice.message.content = "redacted-by-litellm"
                        elif isinstance(choice, litellm.utils.StreamingChoices):
                            choice.delta.content = "redacted-by-litellm"
    return _result
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -24,6 +24,7 @@ general_settings:
 litellm_settings:
  callbacks: ["otel"]
  store_audit_logs: true
  turn_off_message_logging: true
  redact_messages_in_exceptions: True
  enforced_params:  
    - user
--- a/litellm/tests/test_utils.py
+++ b/litellm/tests/test_utils.py
@ -3,6 +3,7 @@ from unittest import mock
 from dotenv import load_dotenv
 import copy
 from datetime import datetime
 load_dotenv()
 import os
@ -395,3 +396,52 @@ def test_get_supported_openai_params() -> None:
    # Unmapped provider
    assert get_supported_openai_params("nonexistent") is None
 def test_redact_msgs_from_logs():
    """
    Tests that turn_off_message_logging does not modify the response_obj
    On the proxy some users were seeing the redaction impact client side responses
    """
    from litellm.litellm_core_utils.redact_messages import (
        redact_message_input_output_from_logging,
    )
    from litellm.utils import Logging
    litellm.turn_off_message_logging = True
    response_obj = litellm.ModelResponse(
        choices=[
            {
                "finish_reason": "stop",
                "index": 0,
                "message": {
                    "content": "I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner.",
                    "role": "assistant",
                },
            }
        ]
    )
    _redacted_response_obj = redact_message_input_output_from_logging(
        result=response_obj,
        litellm_logging_obj=Logging(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "hi"}],
            stream=False,
            call_type="acompletion",
            litellm_call_id="1234",
            start_time=datetime.now(),
            function_id="1234",
        ),
    )
    # Assert the response_obj content is NOT modified
    assert (
        response_obj.choices[0].message.content
        == "I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner."
    )
    litellm.turn_off_message_logging = False
    print("Test passed")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -35,6 +35,9 @@ import litellm._service_logger  # for storing API inputs, outputs, and metadata
 from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
 from litellm.caching import DualCache
 from litellm.types.utils import CostPerToken, ProviderField, ModelInfo
 from litellm.litellm_core_utils.redact_messages import (
    redact_message_input_output_from_logging,
 )
 oidc_cache = DualCache()
@ -1478,7 +1481,9 @@ class Logging:
                    print_verbose(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
                    )
-            self.redact_message_input_output_from_logging(result=original_response)
+            original_response = redact_message_input_output_from_logging(
                litellm_logging_obj=self, result=original_response
            )
            # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
            callbacks = litellm.input_callback + self.dynamic_input_callbacks
@ -1675,7 +1680,9 @@ class Logging:
            else:
                callbacks = litellm.success_callback
-            self.redact_message_input_output_from_logging(result=result)
+            result = redact_message_input_output_from_logging(
                result=result, litellm_logging_obj=self
            )
            for callback in callbacks:
                try:
@ -2308,7 +2315,9 @@ class Logging:
        else:
            callbacks = litellm._async_success_callback
-        self.redact_message_input_output_from_logging(result=result)
+        result = redact_message_input_output_from_logging(
            result=result, litellm_logging_obj=self
        )
        for callback in callbacks:
            # check if callback can run for this request
@ -2518,7 +2527,9 @@ class Logging:
            result = None  # result sent to all loggers, init this to None incase it's not created
-            self.redact_message_input_output_from_logging(result=result)
+            result = redact_message_input_output_from_logging(
                result=result, litellm_logging_obj=self
            )
            for callback in callbacks:
                try:
                    if callback == "lite_debugger":
@ -2742,41 +2753,6 @@ class Logging:
                    f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
                )
    def redact_message_input_output_from_logging(self, result):
        """
        Removes messages, prompts, input, response from logging. This modifies the data in-place
        only redacts when litellm.turn_off_message_logging == True
        """
        # check if user opted out of logging message/response to callbacks
        if litellm.turn_off_message_logging is True:
            # remove messages, prompts, input, response from logging
            self.model_call_details["messages"] = [
                {"role": "user", "content": "redacted-by-litellm"}
            ]
            self.model_call_details["prompt"] = ""
            self.model_call_details["input"] = ""
            # response cleaning
            # ChatCompletion Responses
            if self.stream and "complete_streaming_response" in self.model_call_details:
                _streaming_response = self.model_call_details[
                    "complete_streaming_response"
                ]
                for choice in _streaming_response.choices:
                    if isinstance(choice, litellm.Choices):
                        choice.message.content = "redacted-by-litellm"
                    elif isinstance(choice, litellm.utils.StreamingChoices):
                        choice.delta.content = "redacted-by-litellm"
            else:
                if result is not None:
                    if isinstance(result, litellm.ModelResponse):
                        if hasattr(result, "choices") and result.choices is not None:
                            for choice in result.choices:
                                if isinstance(choice, litellm.Choices):
                                    choice.message.content = "redacted-by-litellm"
                                elif isinstance(choice, litellm.utils.StreamingChoices):
                                    choice.delta.content = "redacted-by-litellm"
 def exception_logging(
    additional_args={},