mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
Litellm staging (#8270)
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 15s
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 15s
* fix(opik.py): cleanup * docs(opik_integration.md): cleanup opik integration docs * fix(redact_messages.py): fix redact messages check header logic ensures stringified bool value in header is still asserted to true allows dynamic message redaction * feat(redact_messages.py): support `x-litellm-enable-message-redaction` request header allows dynamic message redaction
This commit is contained in:
parent
3c813b3a87
commit
8d3a942fbd
10 changed files with 258 additions and 66 deletions
|
@ -1,3 +1,5 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
import Image from '@theme/IdealImage';
|
||||
|
||||
# Comet Opik - Logging + Evals
|
||||
|
@ -21,17 +23,16 @@ Use just 4 lines of code, to instantly log your responses **across all providers
|
|||
Get your Opik API Key by signing up [here](https://www.comet.com/signup?utm_source=litelllm&utm_medium=docs&utm_content=api_key_cell)!
|
||||
|
||||
```python
|
||||
from litellm.integrations.opik.opik import OpikLogger
|
||||
import litellm
|
||||
|
||||
opik_logger = OpikLogger()
|
||||
litellm.callbacks = [opik_logger]
|
||||
litellm.callbacks = ["opik"]
|
||||
```
|
||||
|
||||
Full examples:
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm.integrations.opik.opik import OpikLogger
|
||||
import litellm
|
||||
import os
|
||||
|
||||
|
@ -43,8 +44,7 @@ os.environ["OPIK_WORKSPACE"] = ""
|
|||
os.environ["OPENAI_API_KEY"] = ""
|
||||
|
||||
# set "opik" as a callback, litellm will send the data to an Opik server (such as comet.com)
|
||||
opik_logger = OpikLogger()
|
||||
litellm.callbacks = [opik_logger]
|
||||
litellm.callbacks = ["opik"]
|
||||
|
||||
# openai call
|
||||
response = litellm.completion(
|
||||
|
@ -55,18 +55,16 @@ response = litellm.completion(
|
|||
)
|
||||
```
|
||||
|
||||
If you are liteLLM within a function tracked using Opik's `@track` decorator,
|
||||
If you are using liteLLM within a function tracked using Opik's `@track` decorator,
|
||||
you will need provide the `current_span_data` field in the metadata attribute
|
||||
so that the LLM call is assigned to the correct trace:
|
||||
|
||||
```python
|
||||
from opik import track
|
||||
from opik.opik_context import get_current_span_data
|
||||
from litellm.integrations.opik.opik import OpikLogger
|
||||
import litellm
|
||||
|
||||
opik_logger = OpikLogger()
|
||||
litellm.callbacks = [opik_logger]
|
||||
litellm.callbacks = ["opik"]
|
||||
|
||||
@track()
|
||||
def streaming_function(input):
|
||||
|
@ -87,6 +85,126 @@ response = streaming_function("Why is tracking and evaluation of LLMs important?
|
|||
chunks = list(response)
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="Proxy">
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo-testing
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["opik"]
|
||||
|
||||
environment_variables:
|
||||
OPIK_API_KEY: ""
|
||||
OPIK_WORKSPACE: ""
|
||||
```
|
||||
|
||||
2. Run proxy
|
||||
|
||||
```bash
|
||||
litellm --config config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-d '{
|
||||
"model": "gpt-3.5-turbo-testing",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in Boston today?"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Opik-Specific Parameters
|
||||
|
||||
These can be passed inside metadata with the `opik` key.
|
||||
|
||||
### Fields
|
||||
|
||||
- `project_name` - Name of the Opik project to send data to.
|
||||
- `current_span_data` - The current span data to be used for tracing.
|
||||
- `tags` - Tags to be used for tracing.
|
||||
|
||||
### Usage
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from opik import track
|
||||
from opik.opik_context import get_current_span_data
|
||||
import litellm
|
||||
|
||||
litellm.callbacks = ["opik"]
|
||||
|
||||
messages = [{"role": "user", "content": input}]
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=messages,
|
||||
metadata = {
|
||||
"opik": {
|
||||
"current_span_data": get_current_span_data(),
|
||||
"tags": ["streaming-test"],
|
||||
},
|
||||
}
|
||||
)
|
||||
return response
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="Proxy">
|
||||
|
||||
```bash
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-d '{
|
||||
"model": "gpt-3.5-turbo-testing",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in Boston today?"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"opik": {
|
||||
"current_span_data": "...",
|
||||
"tags": ["streaming-test"],
|
||||
},
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Support & Talk to Founders
|
||||
|
||||
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Logging
|
||||
|
||||
Log Proxy input, output, and exceptions using:
|
||||
|
@ -13,9 +17,7 @@ Log Proxy input, output, and exceptions using:
|
|||
- DynamoDB
|
||||
- etc.
|
||||
|
||||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
|
||||
## Getting the LiteLLM Call ID
|
||||
|
||||
|
@ -77,10 +79,13 @@ litellm_settings:
|
|||
|
||||
### Redact Messages, Response Content
|
||||
|
||||
Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to your logging provider, but request metadata will still be logged.
|
||||
Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to your logging provider, but request metadata - e.g. spend, will still be tracked.
|
||||
|
||||
<Tabs>
|
||||
|
||||
Example config.yaml
|
||||
<TabItem value="global" label="Global">
|
||||
|
||||
**1. Setup config.yaml **
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
|
@ -91,9 +96,87 @@ litellm_settings:
|
|||
turn_off_message_logging: True # 👈 Key Change
|
||||
```
|
||||
|
||||
If you have this feature turned on, you can override it for specific requests by
|
||||
**2. Send request**
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what llm are you"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="dynamic" label="Per Request">
|
||||
|
||||
:::info
|
||||
|
||||
Dynamic request message redaction is in BETA.
|
||||
|
||||
:::
|
||||
|
||||
Pass in a request header to enable message redaction for a request.
|
||||
|
||||
```
|
||||
x-litellm-enable-message-redaction: true
|
||||
```
|
||||
|
||||
Example config.yaml
|
||||
|
||||
**1. Setup config.yaml **
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
```
|
||||
|
||||
**2. Setup per request header**
|
||||
|
||||
```shell
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-zV5HlSIm8ihj1F9C_ZbB1g' \
|
||||
-H 'x-litellm-enable-message-redaction: true' \
|
||||
-d '{
|
||||
"model": "gpt-3.5-turbo-testing",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hey, how'\''s it going 1234?"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
**3. Check Logging Tool + Spend Logs**
|
||||
|
||||
**Logging Tool**
|
||||
|
||||
<Image img={require('../../img/message_redaction_logging.png')}/>
|
||||
|
||||
**Spend Logs**
|
||||
|
||||
<Image img={require('../../img/message_redaction_spend_logs.png')} />
|
||||
|
||||
|
||||
### Disable Message Redaction
|
||||
|
||||
If you have `litellm.turn_on_message_logging` turned on, you can override it for specific requests by
|
||||
setting a request header `LiteLLM-Disable-Message-Redaction: true`.
|
||||
|
||||
|
||||
```shell
|
||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||
--header 'Content-Type: application/json' \
|
||||
|
@ -109,8 +192,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
}'
|
||||
```
|
||||
|
||||
Removes any field with `user_api_key_*` from metadata.
|
||||
|
||||
|
||||
### Turn off all tracking/logging
|
||||
|
||||
|
|
|
@ -6,6 +6,8 @@ Special headers that are supported by LiteLLM.
|
|||
|
||||
`x-litellm-timeout` Optional[float]: The timeout for the request in seconds.
|
||||
|
||||
`x-litellm-enable-message-redaction`: Optional[bool]: Don't log the message content to logging integrations. Just track spend. [Learn More](./logging#redact-messages-response-content)
|
||||
|
||||
## Anthropic Headers
|
||||
|
||||
`anthropic-version` Optional[str]: The version of the Anthropic API to use.
|
||||
|
|
BIN
docs/my-website/img/message_redaction_logging.png
Normal file
BIN
docs/my-website/img/message_redaction_logging.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 259 KiB |
BIN
docs/my-website/img/message_redaction_spend_logs.png
Normal file
BIN
docs/my-website/img/message_redaction_spend_logs.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 220 KiB |
|
@ -147,13 +147,11 @@ class OpikLogger(CustomBatchLogger):
|
|||
f"OpikLogger - Error: {response.status_code} - {response.text}"
|
||||
)
|
||||
else:
|
||||
verbose_logger.debug(
|
||||
verbose_logger.info(
|
||||
f"OpikLogger - {len(self.log_queue)} Opik events submitted"
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.exception(
|
||||
f"OpikLogger failed to send batch - {str(e)}\n{traceback.format_exc()}"
|
||||
)
|
||||
verbose_logger.exception(f"OpikLogger failed to send batch - {str(e)}")
|
||||
|
||||
def _create_opik_headers(self):
|
||||
headers = {}
|
||||
|
@ -165,7 +163,7 @@ class OpikLogger(CustomBatchLogger):
|
|||
return headers
|
||||
|
||||
async def async_send_batch(self):
|
||||
verbose_logger.exception("Calling async_send_batch")
|
||||
verbose_logger.info("Calling async_send_batch")
|
||||
if not self.log_queue:
|
||||
return
|
||||
|
||||
|
@ -177,10 +175,12 @@ class OpikLogger(CustomBatchLogger):
|
|||
await self._submit_batch(
|
||||
url=self.trace_url, headers=self.headers, batch={"traces": traces}
|
||||
)
|
||||
verbose_logger.info(f"Sent {len(traces)} traces")
|
||||
if len(spans) > 0:
|
||||
await self._submit_batch(
|
||||
url=self.span_url, headers=self.headers, batch={"spans": spans}
|
||||
)
|
||||
verbose_logger.info(f"Sent {len(spans)} spans")
|
||||
|
||||
def _create_opik_payload( # noqa: PLR0915
|
||||
self, kwargs, response_obj, start_time, end_time
|
||||
|
|
|
@ -73,12 +73,9 @@ def perform_redaction(model_call_details: dict, result):
|
|||
return {"text": "redacted-by-litellm"}
|
||||
|
||||
|
||||
def redact_message_input_output_from_logging(
|
||||
model_call_details: dict, result, input: Optional[Any] = None
|
||||
):
|
||||
def should_redact_message_logging(model_call_details: dict) -> bool:
|
||||
"""
|
||||
Removes messages, prompts, input, response from logging. This modifies the data in-place
|
||||
only redacts when litellm.turn_off_message_logging == True
|
||||
Determine if message logging should be redacted.
|
||||
"""
|
||||
_request_headers = (
|
||||
model_call_details.get("litellm_params", {}).get("metadata", {}) or {}
|
||||
|
@ -86,25 +83,48 @@ def redact_message_input_output_from_logging(
|
|||
|
||||
request_headers = _request_headers.get("headers", {})
|
||||
|
||||
possible_request_headers = [
|
||||
"litellm-enable-message-redaction", # old header. maintain backwards compatibility
|
||||
"x-litellm-enable-message-redaction", # new header
|
||||
]
|
||||
|
||||
is_redaction_enabled_via_header = False
|
||||
for header in possible_request_headers:
|
||||
if bool(request_headers.get(header, False)):
|
||||
is_redaction_enabled_via_header = True
|
||||
break
|
||||
|
||||
# check if user opted out of logging message/response to callbacks
|
||||
if (
|
||||
litellm.turn_off_message_logging is not True
|
||||
and request_headers.get("litellm-enable-message-redaction", False) is not True
|
||||
and is_redaction_enabled_via_header is not True
|
||||
and _get_turn_off_message_logging_from_dynamic_params(model_call_details)
|
||||
is not True
|
||||
):
|
||||
return result
|
||||
return False
|
||||
|
||||
if request_headers and request_headers.get(
|
||||
"litellm-disable-message-redaction", False
|
||||
if request_headers and bool(
|
||||
request_headers.get("litellm-disable-message-redaction", False)
|
||||
):
|
||||
return result
|
||||
return False
|
||||
|
||||
# user has OPTED OUT of message redaction
|
||||
if _get_turn_off_message_logging_from_dynamic_params(model_call_details) is False:
|
||||
return result
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def redact_message_input_output_from_logging(
|
||||
model_call_details: dict, result, input: Optional[Any] = None
|
||||
) -> Any:
|
||||
"""
|
||||
Removes messages, prompts, input, response from logging. This modifies the data in-place
|
||||
only redacts when litellm.turn_off_message_logging == True
|
||||
"""
|
||||
if should_redact_message_logging(model_call_details):
|
||||
return perform_redaction(model_call_details, result)
|
||||
return result
|
||||
|
||||
|
||||
def _get_turn_off_message_logging_from_dynamic_params(
|
||||
|
|
|
@ -8904,16 +8904,5 @@
|
|||
"supports_function_calling": true,
|
||||
"mode": "chat",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"hyperbolic/deepseek-v3": {
|
||||
"max_tokens": 20480,
|
||||
"max_input_tokens": 131072,
|
||||
"max_output_tokens": 20480,
|
||||
"litellm_provider": "openai",
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.00000025,
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_response_schema": true
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -29,21 +29,4 @@ model_list:
|
|||
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["langsmith"]
|
||||
disable_no_log_param: true
|
||||
|
||||
general_settings:
|
||||
enable_jwt_auth: True
|
||||
litellm_jwtauth:
|
||||
object_id_jwt_field: "client_id" # can be either user / team, inferred from the role mapping
|
||||
roles_jwt_field: "resource_access.litellm-test-client-id.roles"
|
||||
role_mappings:
|
||||
- role: litellm.api.consumer
|
||||
internal_role: "team"
|
||||
enforce_rbac: true
|
||||
role_permissions: # default model + endpoint permissions for a role.
|
||||
- role: team
|
||||
models: ["anthropic-claude"]
|
||||
routes: ["openai_routes"]
|
||||
|
||||
|
||||
callbacks: ["opik"]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue