Merge pull request #5603 from BerriAI/litellm_allow_turning_off_message_logging_for_callbacks

[Feat-Proxy] allow turning off message logging for OTEL (callback specific)
2024-09-09 22:00:09 -07:00 · 2024-09-09 22:00:09 -07:00 · 43cd657ac5
commit 43cd657ac5
parent 2e5583919a 479b12be09
9 changed files with 138 additions and 42 deletions
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -744,6 +744,20 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \

 ** 🎉 Expect to see this trace logged in your OTEL collector**

+### Redacting Messages, Response Content from OTEL Logging
+
+Set `message_logging=False` for `otel`, no messages / response will be logged
+
+```yaml
+litellm_settings:
+  callbacks: ["otel"]
+
+## 👇 Key Change
+callback_settings:
+  otel:
+    message_logging: False
+```
+
 ### Context propagation across Services `Traceparent HTTP Header`

 ❓ Use this when you want to **pass information about the incoming request in a distributed tracing system**
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -15,7 +15,8 @@ from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse

 class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
    # Class variables or attributes
-    def __init__(self) -> None:
+    def __init__(self, message_logging: bool = True) -> None:
+        self.message_logging = message_logging
        pass

    def log_pre_api_call(self, model, messages, kwargs):
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -71,7 +71,10 @@ class OpenTelemetryConfig:

 class OpenTelemetry(CustomLogger):
    def __init__(
-        self, config=OpenTelemetryConfig.from_env(), callback_name: Optional[str] = None
+        self,
+        config=OpenTelemetryConfig.from_env(),
+        callback_name: Optional[str] = None,
+        **kwargs,
    ):
        from opentelemetry import trace
        from opentelemetry.sdk.resources import Resource
@ -101,6 +104,9 @@ class OpenTelemetry(CustomLogger):
            otel_exporter_logger = logging.getLogger("opentelemetry.sdk.trace.export")
            otel_exporter_logger.setLevel(logging.DEBUG)

+        # init CustomLogger params
+        super().__init__(**kwargs)
+
    def log_success_event(self, kwargs, response_obj, start_time, end_time):
        self._handle_sucess(kwargs, response_obj, start_time, end_time)

@ -261,6 +267,8 @@ class OpenTelemetry(CustomLogger):

        if litellm.turn_off_message_logging is True:
            pass
+        elif self.message_logging is not True:
+            pass
        else:
            # Span 2: Raw Request / Response to LLM
            raw_request_span = self.tracer.start_span(
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -28,6 +28,7 @@ from litellm.cost_calculator import _select_model_name_for_cost_calc
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.redact_messages import (
+    redact_message_input_output_from_custom_logger,
    redact_message_input_output_from_logging,
 )
 from litellm.rerank_api.types import RerankResponse
@ -1395,6 +1396,9 @@ class Logging:
                    call_type=self.call_type,
                )
            elif isinstance(callback, CustomLogger):
+                result = redact_message_input_output_from_custom_logger(
+                    result=result, litellm_logging_obj=self, custom_logger=callback
+                )
                self.model_call_details, result = await callback.async_logging_hook(
                    kwargs=self.model_call_details,
                    result=result,
--- a/litellm/litellm_core_utils/redact_messages.py
+++ b/litellm/litellm_core_utils/redact_messages.py
@ -11,6 +11,7 @@ import copy
 from typing import TYPE_CHECKING, Any

 import litellm
+from litellm.integrations.custom_logger import CustomLogger

 if TYPE_CHECKING:
    from litellm.litellm_core_utils.litellm_logging import (
@ -22,6 +23,56 @@ else:
    LiteLLMLoggingObject = Any


+def redact_message_input_output_from_custom_logger(
+    litellm_logging_obj: LiteLLMLoggingObject, result, custom_logger: CustomLogger
+):
+    if (
+        hasattr(custom_logger, "message_logging")
+        and custom_logger.message_logging is not True
+    ):
+        return perform_redaction(litellm_logging_obj, result)
+    return result
+
+
+def perform_redaction(litellm_logging_obj: LiteLLMLoggingObject, result):
+    """
+    Performs the actual redaction on the logging object and result.
+    """
+    # Redact model_call_details
+    litellm_logging_obj.model_call_details["messages"] = [
+        {"role": "user", "content": "redacted-by-litellm"}
+    ]
+    litellm_logging_obj.model_call_details["prompt"] = ""
+    litellm_logging_obj.model_call_details["input"] = ""
+
+    # Redact streaming response
+    if (
+        litellm_logging_obj.stream is True
+        and "complete_streaming_response" in litellm_logging_obj.model_call_details
+    ):
+        _streaming_response = litellm_logging_obj.model_call_details[
+            "complete_streaming_response"
+        ]
+        for choice in _streaming_response.choices:
+            if isinstance(choice, litellm.Choices):
+                choice.message.content = "redacted-by-litellm"
+            elif isinstance(choice, litellm.utils.StreamingChoices):
+                choice.delta.content = "redacted-by-litellm"
+
+    # Redact result
+    if result is not None and isinstance(result, litellm.ModelResponse):
+        _result = copy.deepcopy(result)
+        if hasattr(_result, "choices") and _result.choices is not None:
+            for choice in _result.choices:
+                if isinstance(choice, litellm.Choices):
+                    choice.message.content = "redacted-by-litellm"
+                elif isinstance(choice, litellm.utils.StreamingChoices):
+                    choice.delta.content = "redacted-by-litellm"
+        return _result
+
+    return result
+
+
 def redact_message_input_output_from_logging(
    litellm_logging_obj: LiteLLMLoggingObject, result
 ):
@ -50,43 +101,7 @@ def redact_message_input_output_from_logging(
    ):
        return result

-    # remove messages, prompts, input, response from logging
-    litellm_logging_obj.model_call_details["messages"] = [
-        {"role": "user", "content": "redacted-by-litellm"}
-    ]
-    litellm_logging_obj.model_call_details["prompt"] = ""
-    litellm_logging_obj.model_call_details["input"] = ""
-
-    # response cleaning
-    # ChatCompletion Responses
-    if (
-        litellm_logging_obj.stream is True
-        and "complete_streaming_response" in litellm_logging_obj.model_call_details
-    ):
-        _streaming_response = litellm_logging_obj.model_call_details[
-            "complete_streaming_response"
-        ]
-        for choice in _streaming_response.choices:
-            if isinstance(choice, litellm.Choices):
-                choice.message.content = "redacted-by-litellm"
-            elif isinstance(choice, litellm.utils.StreamingChoices):
-                choice.delta.content = "redacted-by-litellm"
-    else:
-        if result is not None:
-            if isinstance(result, litellm.ModelResponse):
-                # only deep copy litellm.ModelResponse
-                _result = copy.deepcopy(result)
-                if hasattr(_result, "choices") and _result.choices is not None:
-                    for choice in _result.choices:
-                        if isinstance(choice, litellm.Choices):
-                            choice.message.content = "redacted-by-litellm"
-                        elif isinstance(choice, litellm.utils.StreamingChoices):
-                            choice.delta.content = "redacted-by-litellm"
-
-                return _result
-
-    # by default return result
-    return result
+    return perform_redaction(litellm_logging_obj, result)


 def redact_user_api_key_info(metadata: dict) -> dict:
--- a/litellm/proxy/common_utils/callback_utils.py
+++ b/litellm/proxy/common_utils/callback_utils.py
@ -17,7 +17,7 @@ def initialize_callbacks_on_proxy(
    litellm_settings: dict,
    callback_specific_params: dict = {},
 ):
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm.proxy.proxy_server import callback_settings, prisma_client

    verbose_proxy_logger.debug(
        f"{blue_color_code}initializing callbacks={value} on proxy{reset_color_code}"
@ -34,7 +34,11 @@ def initialize_callbacks_on_proxy(
                from litellm.integrations.opentelemetry import OpenTelemetry
                from litellm.proxy import proxy_server

-                open_telemetry_logger = OpenTelemetry()
+                _otel_settings = {}
+                if isinstance(callback_settings, dict) and "otel" in callback_settings:
+                    _otel_settings = callback_settings["otel"]
+
+                open_telemetry_logger = OpenTelemetry(**_otel_settings)

                # Add Otel as a service callback
                if "otel" not in litellm.service_callback:
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -15,6 +15,13 @@ model_list:



+litellm_settings:
+  callbacks: ["otel"]
+
+callback_settings:
+  otel:
+    message_logging: False
+
 router_settings:
 enable_tag_filtering: True # 👈 Key Chang

--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -478,6 +478,7 @@ experimental = False
 llm_router: Optional[litellm.Router] = None
 llm_model_list: Optional[list] = None
 general_settings: dict = {}
+callback_settings: dict = {}
 log_file = "api_log.json"
 worker_config = None
 master_key = None
@ -1491,7 +1492,7 @@ class ProxyConfig:
        """
        Load config values into proxy global state
        """
-        global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, proxy_budget_rescheduler_max_time, proxy_budget_rescheduler_min_time, ui_access_mode, litellm_master_key_hash, proxy_batch_write_at, disable_spend_logs, prompt_injection_detection_obj, redis_usage_cache, store_model_in_db, premium_user, open_telemetry_logger, health_check_details
+        global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, proxy_budget_rescheduler_max_time, proxy_budget_rescheduler_min_time, ui_access_mode, litellm_master_key_hash, proxy_batch_write_at, disable_spend_logs, prompt_injection_detection_obj, redis_usage_cache, store_model_in_db, premium_user, open_telemetry_logger, health_check_details, callback_settings

        # Load existing config
        if os.environ.get("LITELLM_CONFIG_BUCKET_NAME") is not None:
@ -1533,6 +1534,9 @@ class ProxyConfig:
                _license_check.license_str = os.getenv("LITELLM_LICENSE", None)
                premium_user = _license_check.is_premium()

+        ## Callback settings
+        callback_settings = config.get("callback_settings", None)
+
        ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
        litellm_settings = config.get("litellm_settings", None)
        if litellm_settings is None:
--- a/litellm/tests/test_async_opentelemetry.py
+++ b/litellm/tests/test_async_opentelemetry.py
@ -12,6 +12,45 @@ from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfi
 verbose_logger.setLevel(logging.DEBUG)


+class TestOpenTelemetry(OpenTelemetry):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.kwargs = None
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        print("in async_log_success_event for TestOpenTelemetry kwargs=", kwargs)
+        self.kwargs = kwargs
+        await super().async_log_success_event(
+            kwargs, response_obj, start_time, end_time
+        )
+
+
+@pytest.mark.asyncio
+async def test_awesome_otel_with_message_logging_off():
+    litellm.set_verbose = True
+
+    otel_logger = TestOpenTelemetry(
+        message_logging=False, config=OpenTelemetryConfig(exporter="console")
+    )
+
+    litellm.callbacks = [otel_logger]
+    litellm.success_callback = []
+    litellm.failure_callback = []
+
+    response = await litellm.acompletion(
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "hi"}],
+        mock_response="hi",
+    )
+    print("response", response)
+
+    await asyncio.sleep(5)
+
+    assert otel_logger.kwargs["messages"] == [
+        {"role": "user", "content": "redacted-by-litellm"}
+    ]
+
+
@pytest.mark.asyncio
@pytest.mark.skip(reason="Local only test. WIP.")
 async def test_async_otel_callback():