Litellm staging (#8270)

* fix(opik.py): cleanup * docs(opik_integration.md): cleanup opik integration docs * fix(redact_messages.py): fix redact messages check header logic ensures stringified bool value in header is still asserted to true allows dynamic message redaction * feat(redact_messages.py): support `x-litellm-enable-message-redaction` request header allows dynamic message redaction
2025-04-25 02:34:29 +00:00 · 2025-02-04 22:35:48 -08:00 · 2025-02-04 22:35:48 -08:00 · 8d3a942fbd
commit 8d3a942fbd
parent 3c813b3a87
10 changed files with 258 additions and 66 deletions
--- a/docs/my-website/docs/observability/opik_integration.md
+++ b/docs/my-website/docs/observability/opik_integration.md
@ -1,3 +1,5 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import Image from '@theme/IdealImage';
 # Comet Opik - Logging + Evals
@ -21,17 +23,16 @@ Use just 4 lines of code, to instantly log your responses **across all providers
 Get your Opik API Key by signing up [here](https://www.comet.com/signup?utm_source=litelllm&utm_medium=docs&utm_content=api_key_cell)!
 ```python
 from litellm.integrations.opik.opik import OpikLogger
 import litellm
-
+litellm.callbacks = ["opik"]
 opik_logger = OpikLogger()
 litellm.callbacks = [opik_logger]
 ```
 Full examples:
 <Tabs>
 <TabItem value="sdk" label="SDK">
 ```python
 from litellm.integrations.opik.opik import OpikLogger
 import litellm
 import os
@ -43,8 +44,7 @@ os.environ["OPIK_WORKSPACE"] = ""
 os.environ["OPENAI_API_KEY"] = ""
 # set "opik" as a callback, litellm will send the data to an Opik server (such as comet.com)
-opik_logger = OpikLogger()
+litellm.callbacks = ["opik"]
 litellm.callbacks = [opik_logger]
 # openai call
 response = litellm.completion(
@ -55,18 +55,16 @@ response = litellm.completion(
 )
 ```
-If you are liteLLM within a function tracked using Opik's `@track` decorator,
+If you are using liteLLM within a function tracked using Opik's `@track` decorator,
 you will need provide the `current_span_data` field in the metadata attribute
 so that the LLM call is assigned to the correct trace:
 ```python
 from opik import track
 from opik.opik_context import get_current_span_data
 from litellm.integrations.opik.opik import OpikLogger
 import litellm
-opik_logger = OpikLogger()
+litellm.callbacks = ["opik"]
 litellm.callbacks = [opik_logger]
@track()
 def streaming_function(input):
@ -87,6 +85,126 @@ response = streaming_function("Why is tracking and evaluation of LLMs important?
 chunks = list(response)
 ```
 </TabItem>
 <TabItem value="proxy" label="Proxy">
 1. Setup config.yaml
 ```yaml
 model_list:
  - model_name: gpt-3.5-turbo-testing
    litellm_params:
      model: gpt-3.5-turbo
      api_key: os.environ/OPENAI_API_KEY
 litellm_settings:
  callbacks: ["opik"]
 environment_variables:
  OPIK_API_KEY: ""
  OPIK_WORKSPACE: ""
 ```
 2. Run proxy
 ```bash
 litellm --config config.yaml
 ```
 3. Test it! 
 ```bash
 curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
 -H 'Content-Type: application/json' \
 -H 'Authorization: Bearer sk-1234' \
 -d '{
  "model": "gpt-3.5-turbo-testing",
  "messages": [
    {
      "role": "user",
      "content": "What's the weather like in Boston today?"
    }
  ]
 }'
 ```
 </TabItem>
 </Tabs>
 ## Opik-Specific Parameters
 These can be passed inside metadata with the `opik` key.
 ### Fields 
 - `project_name` - Name of the Opik project to send data to.
 - `current_span_data` - The current span data to be used for tracing.
 - `tags` - Tags to be used for tracing.
 ### Usage
 <Tabs>
 <TabItem value="sdk" label="SDK">
 ```python
 from opik import track
 from opik.opik_context import get_current_span_data
 import litellm
 litellm.callbacks = ["opik"]
 messages = [{"role": "user", "content": input}]
 response = litellm.completion(
    model="gpt-3.5-turbo",
    messages=messages,
    metadata = {
        "opik": {
            "current_span_data": get_current_span_data(),
            "tags": ["streaming-test"],
        },
    }
 )
 return response
 ```
 </TabItem>
 <TabItem value="proxy" label="Proxy">
 ```bash
 curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
 -H 'Content-Type: application/json' \
 -H 'Authorization: Bearer sk-1234' \
 -d '{
  "model": "gpt-3.5-turbo-testing",
  "messages": [
    {
      "role": "user",
      "content": "What's the weather like in Boston today?"
    }
  ],
  "metadata": {
    "opik": {
      "current_span_data": "...",
      "tags": ["streaming-test"],
    },
  }
 }'
 ``` 
 </TabItem>
 </Tabs>
 ## Support & Talk to Founders
 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -1,3 +1,7 @@
 import Image from '@theme/IdealImage';
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 # Logging
 Log Proxy input, output, and exceptions using:
@ -13,9 +17,7 @@ Log Proxy input, output, and exceptions using:
 - DynamoDB
 - etc.
-import Image from '@theme/IdealImage';
+
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 ## Getting the LiteLLM Call ID
@ -77,10 +79,13 @@ litellm_settings:
 ### Redact Messages, Response Content
-Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to your logging provider, but request metadata will still be logged.
+Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to your logging provider, but request metadata - e.g. spend, will still be tracked.
 <Tabs>
-Example config.yaml
+<TabItem value="global" label="Global">
 **1. Setup config.yaml **
 ```yaml
 model_list:
 - model_name: gpt-3.5-turbo
@ -91,9 +96,87 @@ litellm_settings:
  turn_off_message_logging: True # 👈 Key Change
 ```
-If you have this feature turned on, you can override it for specific requests by
+**2. Send request**
 ```shell
 curl --location 'http://0.0.0.0:4000/chat/completions' \
    --header 'Content-Type: application/json' \
    --data '{
    "model": "gpt-3.5-turbo",
    "messages": [
        {
        "role": "user",
        "content": "what llm are you"
        }
    ]
 }'
 ```
 </TabItem>
 <TabItem value="dynamic" label="Per Request">
 :::info
 Dynamic request message redaction is in BETA. 
 :::
 Pass in a request header to enable message redaction for a request.
 ```
 x-litellm-enable-message-redaction: true
 ```
 Example config.yaml
 **1. Setup config.yaml **
 ```yaml
 model_list:
 - model_name: gpt-3.5-turbo
    litellm_params:
      model: gpt-3.5-turbo
 ```
 **2. Setup per request header**
 ```shell
 curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
 -H 'Content-Type: application/json' \
 -H 'Authorization: Bearer sk-zV5HlSIm8ihj1F9C_ZbB1g' \
 -H 'x-litellm-enable-message-redaction: true' \
 -d '{
  "model": "gpt-3.5-turbo-testing",
  "messages": [
    {
      "role": "user",
      "content": "Hey, how'\''s it going 1234?"
    }
  ]
 }'
 ```
 </TabItem>
 </Tabs>
 **3. Check Logging Tool + Spend Logs**
 **Logging Tool**
 <Image img={require('../../img/message_redaction_logging.png')}/>
 **Spend Logs**
 <Image img={require('../../img/message_redaction_spend_logs.png')} />
 ### Disable Message Redaction
 If you have `litellm.turn_on_message_logging` turned on, you can override it for specific requests by
 setting a request header `LiteLLM-Disable-Message-Redaction: true`.
 ```shell
 curl --location 'http://0.0.0.0:4000/chat/completions' \
    --header 'Content-Type: application/json' \
@ -109,8 +192,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 }'
 ```
 Removes any field with `user_api_key_*` from metadata.
 ### Turn off all tracking/logging
--- a/docs/my-website/docs/proxy/request_headers.md
+++ b/docs/my-website/docs/proxy/request_headers.md
@ -6,6 +6,8 @@ Special headers that are supported by LiteLLM.
 `x-litellm-timeout` Optional[float]: The timeout for the request in seconds.
 `x-litellm-enable-message-redaction`: Optional[bool]: Don't log the message content to logging integrations. Just track spend. [Learn More](./logging#redact-messages-response-content)
 ## Anthropic Headers
 `anthropic-version` Optional[str]: The version of the Anthropic API to use.  
--- a/docs/my-website/img/message_redaction_logging.png
+++ b/docs/my-website/img/message_redaction_logging.png
--- a/docs/my-website/img/message_redaction_spend_logs.png
+++ b/docs/my-website/img/message_redaction_spend_logs.png
--- a/litellm/integrations/opik/opik.py
+++ b/litellm/integrations/opik/opik.py
@ -147,13 +147,11 @@ class OpikLogger(CustomBatchLogger):
                    f"OpikLogger - Error: {response.status_code} - {response.text}"
                )
            else:
-                verbose_logger.debug(
+                verbose_logger.info(
                    f"OpikLogger - {len(self.log_queue)} Opik events submitted"
                )
        except Exception as e:
-            verbose_logger.exception(
+            verbose_logger.exception(f"OpikLogger failed to send batch - {str(e)}")
                f"OpikLogger failed to send batch - {str(e)}\n{traceback.format_exc()}"
            )
    def _create_opik_headers(self):
        headers = {}
@ -165,7 +163,7 @@ class OpikLogger(CustomBatchLogger):
        return headers
    async def async_send_batch(self):
-        verbose_logger.exception("Calling async_send_batch")
+        verbose_logger.info("Calling async_send_batch")
        if not self.log_queue:
            return
@ -177,10 +175,12 @@ class OpikLogger(CustomBatchLogger):
            await self._submit_batch(
                url=self.trace_url, headers=self.headers, batch={"traces": traces}
            )
            verbose_logger.info(f"Sent {len(traces)} traces")
        if len(spans) > 0:
            await self._submit_batch(
                url=self.span_url, headers=self.headers, batch={"spans": spans}
            )
            verbose_logger.info(f"Sent {len(spans)} spans")
    def _create_opik_payload(  # noqa: PLR0915
        self, kwargs, response_obj, start_time, end_time
--- a/litellm/litellm_core_utils/redact_messages.py
+++ b/litellm/litellm_core_utils/redact_messages.py
@ -73,12 +73,9 @@ def perform_redaction(model_call_details: dict, result):
        return {"text": "redacted-by-litellm"}
-def redact_message_input_output_from_logging(
+def should_redact_message_logging(model_call_details: dict) -> bool:
    model_call_details: dict, result, input: Optional[Any] = None
 ):
    """
-    Removes messages, prompts, input, response from logging. This modifies the data in-place
+    Determine if message logging should be redacted.
    only redacts when litellm.turn_off_message_logging == True
    """
    _request_headers = (
        model_call_details.get("litellm_params", {}).get("metadata", {}) or {}
@ -86,25 +83,48 @@ def redact_message_input_output_from_logging(
    request_headers = _request_headers.get("headers", {})
    possible_request_headers = [
        "litellm-enable-message-redaction",  # old header. maintain backwards compatibility
        "x-litellm-enable-message-redaction",  # new header
    ]
    is_redaction_enabled_via_header = False
    for header in possible_request_headers:
        if bool(request_headers.get(header, False)):
            is_redaction_enabled_via_header = True
            break
    # check if user opted out of logging message/response to callbacks
    if (
        litellm.turn_off_message_logging is not True
-        and request_headers.get("litellm-enable-message-redaction", False) is not True
+        and is_redaction_enabled_via_header is not True
        and _get_turn_off_message_logging_from_dynamic_params(model_call_details)
        is not True
    ):
-        return result
+        return False
-    if request_headers and request_headers.get(
+    if request_headers and bool(
-        "litellm-disable-message-redaction", False
+        request_headers.get("litellm-disable-message-redaction", False)
    ):
-        return result
+        return False
    # user has OPTED OUT of message redaction
    if _get_turn_off_message_logging_from_dynamic_params(model_call_details) is False:
-        return result
+        return False
-    return perform_redaction(model_call_details, result)
+    return True
 def redact_message_input_output_from_logging(
    model_call_details: dict, result, input: Optional[Any] = None
 ) -> Any:
    """
    Removes messages, prompts, input, response from logging. This modifies the data in-place
    only redacts when litellm.turn_off_message_logging == True
    """
    if should_redact_message_logging(model_call_details):
        return perform_redaction(model_call_details, result)
    return result
 def _get_turn_off_message_logging_from_dynamic_params(
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -8904,16 +8904,5 @@
        "supports_function_calling": true,
        "mode": "chat",
        "supports_tool_choice": true
    },
    "hyperbolic/deepseek-v3": {
        "max_tokens": 20480,
        "max_input_tokens": 131072,
        "max_output_tokens": 20480,
        "litellm_provider": "openai",
        "input_cost_per_token": 0.00000025,
        "output_cost_per_token": 0.00000025,
        "mode": "chat",
        "supports_function_calling": true,
        "supports_response_schema": true
    }
 }
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -29,21 +29,4 @@ model_list:
 litellm_settings:
-  callbacks: ["langsmith"]
+  callbacks: ["opik"]
  disable_no_log_param: true
 general_settings:
  enable_jwt_auth: True
  litellm_jwtauth:
    object_id_jwt_field: "client_id" # can be either user / team, inferred from the role mapping
    roles_jwt_field: "resource_access.litellm-test-client-id.roles"
    role_mappings:
      - role: litellm.api.consumer
        internal_role: "team"
    enforce_rbac: true
  role_permissions: # default model + endpoint permissions for a role. 
    - role: team
      models: ["anthropic-claude"]
      routes: ["openai_routes"]