Merge pull request #3330 from BerriAI/litellm_rdct_msgs

[Feat] Redact Logging Messages/Response content on Logging Providers with `litellm.turn_off_message_logging=True`
2024-04-27 11:25:09 -07:00 · 2024-04-27 11:25:09 -07:00 · 6762d07c7f
commit 6762d07c7f
parent 7502cb1aa8 f55838d185
6 changed files with 127 additions and 53 deletions
--- a/docs/my-website/docs/observability/langfuse_integration.md
+++ b/docs/my-website/docs/observability/langfuse_integration.md
@ -167,6 +167,9 @@ messages = [
 chat(messages)
 ```

+## Redacting Messages, Response Content from Langfuse Logging 
+
+Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to langfuse, but request metadata will still be logged.

 ## Troubleshooting & Errors
 ### Data not getting logged to Langfuse ? 
--- a/docs/my-website/docs/observability/sentry.md
+++ b/docs/my-website/docs/observability/sentry.md
@ -40,5 +40,9 @@ response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content
 print(response)
 ```

+## Redacting Messages, Response Content from Sentry Logging 
+
+Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to sentry, but request metadata will still be logged.
+
 [Let us know](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+) if you need any additional options from Sentry. 

--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -569,6 +569,22 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \

 All requests made with these keys will log data to their team-specific logging.

+### Redacting Messages, Response Content from Langfuse Logging 
+
+Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to langfuse, but request metadata will still be logged.
+
+```yaml
+model_list:
+ - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: gpt-3.5-turbo
+litellm_settings:
+  success_callback: ["langfuse"]
+  turn_off_message_logging: True
+```
+
+
+
 ## Logging Proxy Input/Output - DataDog
 We will use the `--config` to set `litellm.success_callback = ["datadog"]` this will log all successfull LLM calls to DataDog

--- a/litellm/init.py
+++ b/litellm/init.py
@ -45,6 +45,7 @@ _async_failure_callback: List[Callable] = (
 )  # internal variable - async custom callbacks are routed here.
 pre_call_rules: List[Callable] = []
 post_call_rules: List[Callable] = []
+turn_off_message_logging: Optional[bool] = False
 ## end of callbacks #############

 email: Optional[str] = (
--- a/litellm/tests/test_alangfuse.py
+++ b/litellm/tests/test_alangfuse.py
@ -161,40 +161,54 @@ async def make_async_calls():
    return total_time


-# def test_langfuse_logging_async_text_completion():
-#     try:
-#         pre_langfuse_setup()
-#         litellm.set_verbose = False
-#         litellm.success_callback = ["langfuse"]
+@pytest.mark.asyncio
+@pytest.mark.parametrize("stream", [False, True])
+async def test_langfuse_logging_without_request_response(stream):
+    try:
+        import uuid

-#         async def _test_langfuse():
-#             response = await litellm.atext_completion(
-#                 model="gpt-3.5-turbo-instruct",
-#                 prompt="this is a test",
-#                 max_tokens=5,
-#                 temperature=0.7,
-#                 timeout=5,
-#                 user="test_user",
-#                 stream=True
-#             )
-#             async for chunk in response:
-#                 print()
-#                 print(chunk)
-#             await asyncio.sleep(1)
-#             return response
+        _unique_trace_name = f"litellm-test-{str(uuid.uuid4())}"
+        litellm.set_verbose = True
+        litellm.turn_off_message_logging = True
+        litellm.success_callback = ["langfuse"]
+        response = await litellm.acompletion(
+            model="gpt-3.5-turbo",
+            mock_response="It's simple to use and easy to get started",
+            messages=[{"role": "user", "content": "Hi 👋 - i'm claude"}],
+            max_tokens=10,
+            temperature=0.2,
+            stream=stream,
+            metadata={"trace_id": _unique_trace_name},
+        )
+        print(response)
+        if stream:
+            async for chunk in response:
+                print(chunk)

-#         response = asyncio.run(_test_langfuse())
-#         print(f"response: {response}")
+        await asyncio.sleep(3)

-#         # # check langfuse.log to see if there was a failed response
-#         search_logs("langfuse.log")
-#     except litellm.Timeout as e:
-#         pass
-#     except Exception as e:
-#         pytest.fail(f"An exception occurred - {e}")
+        import langfuse

+        langfuse_client = langfuse.Langfuse(
+            public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
+            secret_key=os.environ["LANGFUSE_SECRET_KEY"],
+        )

-# test_langfuse_logging_async_text_completion()
+        # get trace with _unique_trace_name
+        trace = langfuse_client.get_generations(trace_id=_unique_trace_name)
+
+        print("trace_from_langfuse", trace)
+
+        _trace_data = trace.data
+
+        assert _trace_data[0].input == {"messages": "redacted-by-litellm"}
+        assert _trace_data[0].output == {
+            "role": "assistant",
+            "content": "redacted-by-litellm",
+        }
+
+    except Exception as e:
+        pytest.fail(f"An exception occurred - {e}")


@pytest.mark.skip(reason="beta test - checking langfuse output")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1212,7 +1212,6 @@ class Logging:
                    print_verbose(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
                    )
-
            # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
            callbacks = litellm.input_callback + self.dynamic_input_callbacks
            for callback in callbacks:
@ -1229,29 +1228,17 @@ class Logging:
                            litellm_call_id=self.litellm_params["litellm_call_id"],
                            print_verbose=print_verbose,
                        )
-
-                    elif callback == "lite_debugger":
-                        print_verbose(
-                            f"reaches litedebugger for logging! - model_call_details {self.model_call_details}"
-                        )
-                        model = self.model_call_details["model"]
-                        messages = self.model_call_details["input"]
-                        print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
-                        liteDebuggerClient.input_log_event(
-                            model=model,
-                            messages=messages,
-                            end_user=self.model_call_details.get("user", "default"),
-                            litellm_call_id=self.litellm_params["litellm_call_id"],
-                            litellm_params=self.model_call_details["litellm_params"],
-                            optional_params=self.model_call_details["optional_params"],
-                            print_verbose=print_verbose,
-                            call_type=self.call_type,
-                        )
                    elif callback == "sentry" and add_breadcrumb:
-                        print_verbose("reaches sentry breadcrumbing")
+                        details_to_log = copy.deepcopy(self.model_call_details)
+                        if litellm.turn_off_message_logging:
+                            # make a copy of the _model_Call_details and log it
+                            details_to_log.pop("messages", None)
+                            details_to_log.pop("input", None)
+                            details_to_log.pop("prompt", None)
+
                        add_breadcrumb(
                            category="litellm.llm_call",
-                            message=f"Model Call Details pre-call: {self.model_call_details}",
+                            message=f"Model Call Details pre-call: {details_to_log}",
                            level="info",
                        )
                    elif isinstance(callback, CustomLogger):  # custom logger class
@ -1315,7 +1302,7 @@ class Logging:
                    print_verbose(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
                    )
-
+            self.redact_message_input_output_from_logging(result=original_response)
            # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made

            callbacks = litellm.input_callback + self.dynamic_input_callbacks
@ -1333,9 +1320,17 @@ class Logging:
                        )
                    elif callback == "sentry" and add_breadcrumb:
                        print_verbose("reaches sentry breadcrumbing")
+
+                        details_to_log = copy.deepcopy(self.model_call_details)
+                        if litellm.turn_off_message_logging:
+                            # make a copy of the _model_Call_details and log it
+                            details_to_log.pop("messages", None)
+                            details_to_log.pop("input", None)
+                            details_to_log.pop("prompt", None)
+
                        add_breadcrumb(
                            category="litellm.llm_call",
-                            message=f"Model Call Details post-call: {self.model_call_details}",
+                            message=f"Model Call Details post-call: {details_to_log}",
                            level="info",
                        )
                    elif isinstance(callback, CustomLogger):  # custom logger class
@ -1527,6 +1522,8 @@ class Logging:
            else:
                callbacks = litellm.success_callback

+            self.redact_message_input_output_from_logging(result=result)
+
            for callback in callbacks:
                try:
                    litellm_params = self.model_call_details.get("litellm_params", {})
@ -2071,6 +2068,9 @@ class Logging:
                    callbacks.append(callback)
        else:
            callbacks = litellm._async_success_callback
+
+        self.redact_message_input_output_from_logging(result=result)
+
        print_verbose(f"Async success callbacks: {callbacks}")
        for callback in callbacks:
            # check if callback can run for this request
@ -2232,7 +2232,10 @@ class Logging:
                start_time=start_time,
                end_time=end_time,
            )
+
            result = None  # result sent to all loggers, init this to None incase it's not created
+
+            self.redact_message_input_output_from_logging(result=result)
            for callback in litellm.failure_callback:
                try:
                    if callback == "lite_debugger":
@ -2417,6 +2420,33 @@ class Logging:
                    f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
                )

+    def redact_message_input_output_from_logging(self, result):
+        """
+        Removes messages, prompts, input, response from logging. This modifies the data in-place
+        only redacts when litellm.turn_off_message_logging == True
+        """
+        # check if user opted out of logging message/response to callbacks
+        if litellm.turn_off_message_logging == True:
+            # remove messages, prompts, input, response from logging
+            self.model_call_details["messages"] = "redacted-by-litellm"
+            self.model_call_details["prompt"] = ""
+            self.model_call_details["input"] = ""
+
+            # response cleaning
+            # ChatCompletion Responses
+            if self.stream and "complete_streaming_response" in self.model_call_details:
+                _streaming_response = self.model_call_details[
+                    "complete_streaming_response"
+                ]
+                for choice in _streaming_response.choices:
+                    choice.message.content = "redacted-by-litellm"
+            else:
+                if result is not None:
+                    if isinstance(result, litellm.ModelResponse):
+                        if hasattr(result, "choices"):
+                            for choice in result.choices:
+                                choice.message.content = "redacted-by-litellm"
+

 def exception_logging(
    additional_args={},
@ -2598,9 +2628,15 @@ def function_setup(
            dynamic_success_callbacks = kwargs.pop("success_callback")

        if add_breadcrumb:
+            details_to_log = copy.deepcopy(kwargs)
+            if litellm.turn_off_message_logging:
+                # make a copy of the _model_Call_details and log it
+                details_to_log.pop("messages", None)
+                details_to_log.pop("input", None)
+                details_to_log.pop("prompt", None)
            add_breadcrumb(
                category="litellm.llm_call",
-                message=f"Positional Args: {args}, Keyword Args: {kwargs}",
+                message=f"Positional Args: {args}, Keyword Args: {details_to_log}",
                level="info",
            )
        if "logger_fn" in kwargs: