Merge pull request #3330 from BerriAI/litellm_rdct_msgs

[Feat] Redact Logging Messages/Response content on Logging Providers with `litellm.turn_off_message_logging=True`
2024-04-27 11:25:09 -07:00 · 2024-04-27 11:25:09 -07:00 · 6762d07c7f
commit 6762d07c7f
parent 7502cb1aa8 f55838d185
6 changed files with 127 additions and 53 deletions
--- a/docs/my-website/docs/observability/langfuse_integration.md
+++ b/docs/my-website/docs/observability/langfuse_integration.md
@ -167,6 +167,9 @@ messages = [
 chat(messages)
 ```
 ## Redacting Messages, Response Content from Langfuse Logging 
 Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to langfuse, but request metadata will still be logged.
 ## Troubleshooting & Errors
 ### Data not getting logged to Langfuse ? 
--- a/docs/my-website/docs/observability/sentry.md
+++ b/docs/my-website/docs/observability/sentry.md
@ -40,5 +40,9 @@ response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content
 print(response)
 ```
 ## Redacting Messages, Response Content from Sentry Logging 
 Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to sentry, but request metadata will still be logged.
 [Let us know](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+) if you need any additional options from Sentry. 
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -569,6 +569,22 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
 All requests made with these keys will log data to their team-specific logging.
 ### Redacting Messages, Response Content from Langfuse Logging 
 Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to langfuse, but request metadata will still be logged.
 ```yaml
 model_list:
 - model_name: gpt-3.5-turbo
    litellm_params:
      model: gpt-3.5-turbo
 litellm_settings:
  success_callback: ["langfuse"]
  turn_off_message_logging: True
 ```
 ## Logging Proxy Input/Output - DataDog
 We will use the `--config` to set `litellm.success_callback = ["datadog"]` this will log all successfull LLM calls to DataDog
--- a/litellm/init.py
+++ b/litellm/init.py
@ -45,6 +45,7 @@ _async_failure_callback: List[Callable] = (
 )  # internal variable - async custom callbacks are routed here.
 pre_call_rules: List[Callable] = []
 post_call_rules: List[Callable] = []
 turn_off_message_logging: Optional[bool] = False
 ## end of callbacks #############
 email: Optional[str] = (
--- a/litellm/tests/test_alangfuse.py
+++ b/litellm/tests/test_alangfuse.py
@ -161,40 +161,54 @@ async def make_async_calls():
    return total_time
-# def test_langfuse_logging_async_text_completion():
+@pytest.mark.asyncio
-#     try:
+@pytest.mark.parametrize("stream", [False, True])
-#         pre_langfuse_setup()
+async def test_langfuse_logging_without_request_response(stream):
-#         litellm.set_verbose = False
+    try:
-#         litellm.success_callback = ["langfuse"]
+        import uuid
-#         async def _test_langfuse():
+        _unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
-#             response = await litellm.atext_completion(
+        litellm.set_verbose = True
-#                 model="gpt-3.5-turbo-instruct",
+        litellm.turn_off_message_logging = True
-#                 prompt="this is a test",
+        litellm.success_callback = ["langfuse"]
-#                 max_tokens=5,
+        response = await litellm.acompletion(
-#                 temperature=0.7,
+            model="gpt-3.5-turbo",
-#                 timeout=5,
+            mock_response="It's simple to use and easy to get started",
-#                 user="test_user",
+            messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
-#                 stream=True
+            max_tokens=10,
-#             )
+            temperature=0.2,
-#             async for chunk in response:
+            stream=stream,
-#                 print()
+            metadata={"trace_id": _unique_trace_name},
-#                 print(chunk)
+        )
-#             await asyncio.sleep(1)
+        print(response)
-#             return response
+        if stream:
            async for chunk in response:
                print(chunk)
-#         response = asyncio.run(_test_langfuse())
+        await asyncio.sleep(3)
 #         print(f"response: {response}")
-#         # # check langfuse.log to see if there was a failed response
+        import langfuse
 #         search_logs("langfuse.log")
 #     except litellm.Timeout as e:
 #         pass
 #     except Exception as e:
 #         pytest.fail(f"An exception occurred - {e}")
        langfuse_client = langfuse.Langfuse(
            public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
            secret_key=os.environ["LANGFUSE_SECRET_KEY"],
        )
-# test_langfuse_logging_async_text_completion()
+        # get trace with _unique_trace_name
        trace = langfuse_client.get_generations(trace_id=_unique_trace_name)
        print("trace_from_langfuse", trace)
        _trace_data = trace.data
        assert _trace_data[0].input == {"messages": "redacted-by-litellm"}
        assert _trace_data[0].output == {
            "role": "assistant",
            "content": "redacted-by-litellm",
        }
    except Exception as e:
        pytest.fail(f"An exception occurred - {e}")
@pytest.mark.skip(reason="beta test - checking langfuse output")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1212,7 +1212,6 @@ class Logging:
                    print_verbose(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
                    )
            # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
            callbacks = litellm.input_callback + self.dynamic_input_callbacks
            for callback in callbacks:
@ -1229,29 +1228,17 @@ class Logging:
                            litellm_call_id=self.litellm_params["litellm_call_id"],
                            print_verbose=print_verbose,
                        )
                    elif callback == "lite_debugger":
                        print_verbose(
                            f"reaches litedebugger for logging! - model_call_details {self.model_call_details}"
                        )
                        model = self.model_call_details["model"]
                        messages = self.model_call_details["input"]
                        print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
                        liteDebuggerClient.input_log_event(
                            model=model,
                            messages=messages,
                            end_user=self.model_call_details.get("user", "default"),
                            litellm_call_id=self.litellm_params["litellm_call_id"],
                            litellm_params=self.model_call_details["litellm_params"],
                            optional_params=self.model_call_details["optional_params"],
                            print_verbose=print_verbose,
                            call_type=self.call_type,
                        )
                    elif callback == "sentry" and add_breadcrumb:
-                        print_verbose("reaches sentry breadcrumbing")
+                        details_to_log = copy.deepcopy(self.model_call_details)
                        if litellm.turn_off_message_logging:
                            # make a copy of the _model_Call_details and log it
                            details_to_log.pop("messages", None)
                            details_to_log.pop("input", None)
                            details_to_log.pop("prompt", None)
                        add_breadcrumb(
                            category="litellm.llm_call",
-                            message=f"Model Call Details pre-call: {self.model_call_details}",
+                            message=f"Model Call Details pre-call: {details_to_log}",
                            level="info",
                        )
                    elif isinstance(callback, CustomLogger):  # custom logger class
@ -1315,7 +1302,7 @@ class Logging:
                    print_verbose(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
                    )
-
+            self.redact_message_input_output_from_logging(result=original_response)
            # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
            callbacks = litellm.input_callback + self.dynamic_input_callbacks
@ -1333,9 +1320,17 @@ class Logging:
                        )
                    elif callback == "sentry" and add_breadcrumb:
                        print_verbose("reaches sentry breadcrumbing")
                        details_to_log = copy.deepcopy(self.model_call_details)
                        if litellm.turn_off_message_logging:
                            # make a copy of the _model_Call_details and log it
                            details_to_log.pop("messages", None)
                            details_to_log.pop("input", None)
                            details_to_log.pop("prompt", None)
                        add_breadcrumb(
                            category="litellm.llm_call",
-                            message=f"Model Call Details post-call: {self.model_call_details}",
+                            message=f"Model Call Details post-call: {details_to_log}",
                            level="info",
                        )
                    elif isinstance(callback, CustomLogger):  # custom logger class
@ -1527,6 +1522,8 @@ class Logging:
            else:
                callbacks = litellm.success_callback
            self.redact_message_input_output_from_logging(result=result)
            for callback in callbacks:
                try:
                    litellm_params = self.model_call_details.get("litellm_params", {})
@ -2071,6 +2068,9 @@ class Logging:
                    callbacks.append(callback)
        else:
            callbacks = litellm._async_success_callback
        self.redact_message_input_output_from_logging(result=result)
        print_verbose(f"Async success callbacks: {callbacks}")
        for callback in callbacks:
            # check if callback can run for this request
@ -2232,7 +2232,10 @@ class Logging:
                start_time=start_time,
                end_time=end_time,
            )
            result = None  # result sent to all loggers, init this to None incase it's not created
            self.redact_message_input_output_from_logging(result=result)
            for callback in litellm.failure_callback:
                try:
                    if callback == "lite_debugger":
@ -2417,6 +2420,33 @@ class Logging:
                    f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
                )
    def redact_message_input_output_from_logging(self, result):
        """
        Removes messages, prompts, input, response from logging. This modifies the data in-place
        only redacts when litellm.turn_off_message_logging == True
        """
        # check if user opted out of logging message/response to callbacks
        if litellm.turn_off_message_logging == True:
            # remove messages, prompts, input, response from logging
            self.model_call_details["messages"] = "redacted-by-litellm"
            self.model_call_details["prompt"] = ""
            self.model_call_details["input"] = ""
            # response cleaning
            # ChatCompletion Responses
            if self.stream and "complete_streaming_response" in self.model_call_details:
                _streaming_response = self.model_call_details[
                    "complete_streaming_response"
                ]
                for choice in _streaming_response.choices:
                    choice.message.content = "redacted-by-litellm"
            else:
                if result is not None:
                    if isinstance(result, litellm.ModelResponse):
                        if hasattr(result, "choices"):
                            for choice in result.choices:
                                choice.message.content = "redacted-by-litellm"
 def exception_logging(
    additional_args={},
@ -2598,9 +2628,15 @@ def function_setup(
            dynamic_success_callbacks = kwargs.pop("success_callback")
        if add_breadcrumb:
            details_to_log = copy.deepcopy(kwargs)
            if litellm.turn_off_message_logging:
                # make a copy of the _model_Call_details and log it
                details_to_log.pop("messages", None)
                details_to_log.pop("input", None)
                details_to_log.pop("prompt", None)
            add_breadcrumb(
                category="litellm.llm_call",
-                message=f"Positional Args: {args}, Keyword Args: {kwargs}",
+                message=f"Positional Args: {args}, Keyword Args: {details_to_log}",
                level="info",
            )
        if "logger_fn" in kwargs: