mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
(fix) GCS bucket logger - apply truncate_standard_logging_payload_content to standard_logging_payload and ensure GCS flushes queue on fails (#7519)
* fix async_send_batch for gcs * fix truncate GCS logger * test_truncate_standard_logging_payload
This commit is contained in:
parent
6ffdd5c250
commit
ec3bcf189f
5 changed files with 87 additions and 84 deletions
|
@ -293,3 +293,60 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
|||
except Exception:
|
||||
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
|
||||
pass
|
||||
|
||||
# Useful helpers for custom logger classes
|
||||
|
||||
def truncate_standard_logging_payload_content(
|
||||
self,
|
||||
standard_logging_object: StandardLoggingPayload,
|
||||
):
|
||||
"""
|
||||
Truncate error strings and message content in logging payload
|
||||
|
||||
Some loggers like DataDog/ GCS Bucket have a limit on the size of the payload. (1MB)
|
||||
|
||||
This function truncates the error string and the message content if they exceed a certain length.
|
||||
"""
|
||||
MAX_STR_LENGTH = 10_000
|
||||
|
||||
# Truncate fields that might exceed max length
|
||||
fields_to_truncate = ["error_str", "messages", "response"]
|
||||
for field in fields_to_truncate:
|
||||
self._truncate_field(
|
||||
standard_logging_object=standard_logging_object,
|
||||
field_name=field,
|
||||
max_length=MAX_STR_LENGTH,
|
||||
)
|
||||
|
||||
def _truncate_field(
|
||||
self,
|
||||
standard_logging_object: StandardLoggingPayload,
|
||||
field_name: str,
|
||||
max_length: int,
|
||||
) -> None:
|
||||
"""
|
||||
Helper function to truncate a field in the logging payload
|
||||
|
||||
This converts the field to a string and then truncates it if it exceeds the max length.
|
||||
|
||||
Why convert to string ?
|
||||
1. User was sending a poorly formatted list for `messages` field, we could not predict where they would send content
|
||||
- Converting to string and then truncating the logged content catches this
|
||||
2. We want to avoid modifying the original `messages`, `response`, and `error_str` in the logging payload since these are in kwargs and could be returned to the user
|
||||
"""
|
||||
field_value = standard_logging_object.get(field_name) # type: ignore
|
||||
if field_value:
|
||||
str_value = str(field_value)
|
||||
if len(str_value) > max_length:
|
||||
standard_logging_object[field_name] = self._truncate_text( # type: ignore
|
||||
text=str_value, max_length=max_length
|
||||
)
|
||||
|
||||
def _truncate_text(self, text: str, max_length: int) -> str:
|
||||
"""Truncate text if it exceeds max_length"""
|
||||
return (
|
||||
text[:max_length]
|
||||
+ "...truncated by litellm, this logger does not support large content"
|
||||
if len(text) > max_length
|
||||
else text
|
||||
)
|
||||
|
|
|
@ -256,10 +256,6 @@ class DataDogLogger(CustomBatchLogger):
|
|||
"""
|
||||
import json
|
||||
|
||||
from litellm.litellm_core_utils.litellm_logging import (
|
||||
truncate_standard_logging_payload_content,
|
||||
)
|
||||
|
||||
standard_logging_object: Optional[StandardLoggingPayload] = kwargs.get(
|
||||
"standard_logging_object", None
|
||||
)
|
||||
|
@ -271,7 +267,7 @@ class DataDogLogger(CustomBatchLogger):
|
|||
status = DataDogStatus.ERROR
|
||||
|
||||
# Build the initial payload
|
||||
truncate_standard_logging_payload_content(standard_logging_object)
|
||||
self.truncate_standard_logging_payload_content(standard_logging_object)
|
||||
json_payload = json.dumps(standard_logging_object, default=str)
|
||||
|
||||
verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
|
||||
|
|
|
@ -64,7 +64,7 @@ class GCSBucketLogger(GCSBucketBase):
|
|||
)
|
||||
if logging_payload is None:
|
||||
raise ValueError("standard_logging_object not found in kwargs")
|
||||
|
||||
self.truncate_standard_logging_payload_content(logging_payload)
|
||||
# Add to logging queue - this will be flushed periodically
|
||||
self.log_queue.append(
|
||||
GCSLogQueueItem(
|
||||
|
@ -88,7 +88,7 @@ class GCSBucketLogger(GCSBucketBase):
|
|||
)
|
||||
if logging_payload is None:
|
||||
raise ValueError("standard_logging_object not found in kwargs")
|
||||
|
||||
self.truncate_standard_logging_payload_content(logging_payload)
|
||||
# Add to logging queue - this will be flushed periodically
|
||||
self.log_queue.append(
|
||||
GCSLogQueueItem(
|
||||
|
@ -114,36 +114,38 @@ class GCSBucketLogger(GCSBucketBase):
|
|||
if not self.log_queue:
|
||||
return
|
||||
|
||||
try:
|
||||
for log_item in self.log_queue:
|
||||
logging_payload = log_item["payload"]
|
||||
kwargs = log_item["kwargs"]
|
||||
response_obj = log_item.get("response_obj", None) or {}
|
||||
|
||||
gcs_logging_config: GCSLoggingConfig = (
|
||||
await self.get_gcs_logging_config(kwargs)
|
||||
gcs_logging_config: GCSLoggingConfig = await self.get_gcs_logging_config(
|
||||
kwargs
|
||||
)
|
||||
headers = await self.construct_request_headers(
|
||||
vertex_instance=gcs_logging_config["vertex_instance"],
|
||||
service_account_json=gcs_logging_config["path_service_account"],
|
||||
)
|
||||
bucket_name = gcs_logging_config["bucket_name"]
|
||||
object_name = self._get_object_name(
|
||||
kwargs, logging_payload, response_obj
|
||||
)
|
||||
object_name = self._get_object_name(kwargs, logging_payload, response_obj)
|
||||
|
||||
try:
|
||||
await self._log_json_data_on_gcs(
|
||||
headers=headers,
|
||||
bucket_name=bucket_name,
|
||||
object_name=object_name,
|
||||
logging_payload=logging_payload,
|
||||
)
|
||||
except Exception as e:
|
||||
# don't let one log item fail the entire batch
|
||||
verbose_logger.exception(
|
||||
f"GCS Bucket error logging payload to GCS bucket: {str(e)}"
|
||||
)
|
||||
pass
|
||||
|
||||
# Clear the queue after processing
|
||||
self.log_queue.clear()
|
||||
|
||||
except Exception as e:
|
||||
verbose_logger.exception(f"GCS Bucket batch logging error: {str(e)}")
|
||||
|
||||
def _get_object_name(
|
||||
self, kwargs: Dict, logging_payload: StandardLoggingPayload, response_obj: Any
|
||||
) -> str:
|
||||
|
|
|
@ -3034,60 +3034,6 @@ def get_standard_logging_object_payload(
|
|||
return None
|
||||
|
||||
|
||||
def truncate_standard_logging_payload_content(
|
||||
standard_logging_object: StandardLoggingPayload,
|
||||
):
|
||||
"""
|
||||
Truncate error strings and message content in logging payload
|
||||
|
||||
Some loggers like DataDog have a limit on the size of the payload. (1MB)
|
||||
|
||||
This function truncates the error string and the message content if they exceed a certain length.
|
||||
"""
|
||||
MAX_STR_LENGTH = 10_000
|
||||
|
||||
# Truncate fields that might exceed max length
|
||||
fields_to_truncate = ["error_str", "messages", "response"]
|
||||
for field in fields_to_truncate:
|
||||
_truncate_field(
|
||||
standard_logging_object=standard_logging_object,
|
||||
field_name=field,
|
||||
max_length=MAX_STR_LENGTH,
|
||||
)
|
||||
|
||||
|
||||
def _truncate_text(text: str, max_length: int) -> str:
|
||||
"""Truncate text if it exceeds max_length"""
|
||||
return (
|
||||
text[:max_length]
|
||||
+ "...truncated by litellm, this logger does not support large content"
|
||||
if len(text) > max_length
|
||||
else text
|
||||
)
|
||||
|
||||
|
||||
def _truncate_field(
|
||||
standard_logging_object: StandardLoggingPayload, field_name: str, max_length: int
|
||||
) -> None:
|
||||
"""
|
||||
Helper function to truncate a field in the logging payload
|
||||
|
||||
This converts the field to a string and then truncates it if it exceeds the max length.
|
||||
|
||||
Why convert to string ?
|
||||
1. User was sending a poorly formatted list for `messages` field, we could not predict where they would send content
|
||||
- Converting to string and then truncating the logged content catches this
|
||||
2. We want to avoid modifying the original `messages`, `response`, and `error_str` in the logging payload since these are in kwargs and could be returned to the user
|
||||
"""
|
||||
field_value = standard_logging_object.get(field_name) # type: ignore
|
||||
if field_value:
|
||||
str_value = str(field_value)
|
||||
if len(str_value) > max_length:
|
||||
standard_logging_object[field_name] = _truncate_text( # type: ignore
|
||||
text=str_value, max_length=max_length
|
||||
)
|
||||
|
||||
|
||||
def get_standard_logging_metadata(
|
||||
metadata: Optional[Dict[str, Any]]
|
||||
) -> StandardLoggingMetadata:
|
||||
|
|
|
@ -28,9 +28,10 @@ from create_mock_standard_logging_payload import (
|
|||
)
|
||||
from litellm.litellm_core_utils.litellm_logging import (
|
||||
StandardLoggingPayloadSetup,
|
||||
truncate_standard_logging_payload_content,
|
||||
)
|
||||
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_obj,expected_values",
|
||||
|
@ -332,6 +333,7 @@ def test_truncate_standard_logging_payload():
|
|||
1. original messages, response, and error_str should NOT BE MODIFIED, since these are from kwargs
|
||||
2. the `messages`, `response`, and `error_str` in new standard_logging_payload should be truncated
|
||||
"""
|
||||
_custom_logger = CustomLogger()
|
||||
standard_logging_payload: StandardLoggingPayload = (
|
||||
create_standard_logging_payload_with_long_content()
|
||||
)
|
||||
|
@ -342,7 +344,7 @@ def test_truncate_standard_logging_payload():
|
|||
original_error_str = standard_logging_payload["error_str"]
|
||||
len_original_error_str = len(str(original_error_str))
|
||||
|
||||
truncate_standard_logging_payload_content(standard_logging_payload)
|
||||
_custom_logger.truncate_standard_logging_payload_content(standard_logging_payload)
|
||||
|
||||
# Original messages, response, and error_str should NOT BE MODIFIED
|
||||
assert standard_logging_payload["messages"] != original_messages
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue