Add datadog health check support + fix bedrock converse cost tracking w/ region name specified (#7958)

* fix(bedrock/converse_handler.py): fix bedrock region name on async calls

* fix(utils.py): fix split model handling

Fixes bedrock cost calculation when region name is given

* feat(_health_endpoints.py): support health checking datadog integration

Closes https://github.com/BerriAI/litellm/issues/7921
This commit is contained in:
Krish Dholakia 2025-01-23 22:17:09 -08:00 committed by GitHub
parent a835baacfc
commit c6e9240405
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 254 additions and 33 deletions

View file

@ -15,12 +15,14 @@ For batching specific details see CustomBatchLogger class
import asyncio
import datetime
import json
import os
import traceback
import uuid
from datetime import datetime as datetimeObj
from typing import Any, List, Optional, Union
import httpx
from httpx import Response
import litellm
@ -31,14 +33,20 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
)
from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus
from litellm.types.integrations.datadog import *
from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import StandardLoggingPayload
from ..base_health_check import HealthCheckIntegration
DD_MAX_BATCH_SIZE = 1000 # max number of logs DD API can accept
class DataDogLogger(CustomBatchLogger):
class DataDogLogger(
CustomBatchLogger,
HealthCheckIntegration,
):
# Class variables or attributes
def __init__(
self,
@ -235,6 +243,25 @@ class DataDogLogger(CustomBatchLogger):
if len(self.log_queue) >= self.batch_size:
await self.async_send_batch()
def _create_datadog_logging_payload_helper(
self,
standard_logging_object: StandardLoggingPayload,
status: DataDogStatus,
) -> DatadogPayload:
json_payload = json.dumps(standard_logging_object, default=str)
verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
dd_payload = DatadogPayload(
ddsource=self._get_datadog_source(),
ddtags=self._get_datadog_tags(
standard_logging_object=standard_logging_object
),
hostname=self._get_datadog_hostname(),
message=json_payload,
service=self._get_datadog_service(),
status=status,
)
return dd_payload
def create_datadog_logging_payload(
self,
kwargs: Union[dict, Any],
@ -254,7 +281,6 @@ class DataDogLogger(CustomBatchLogger):
Returns:
DatadogPayload: defined in types.py
"""
import json
standard_logging_object: Optional[StandardLoggingPayload] = kwargs.get(
"standard_logging_object", None
@ -268,18 +294,9 @@ class DataDogLogger(CustomBatchLogger):
# Build the initial payload
self.truncate_standard_logging_payload_content(standard_logging_object)
json_payload = json.dumps(standard_logging_object, default=str)
verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
dd_payload = DatadogPayload(
ddsource=self._get_datadog_source(),
ddtags=self._get_datadog_tags(
standard_logging_object=standard_logging_object
),
hostname=self._get_datadog_hostname(),
message=json_payload,
service=self._get_datadog_service(),
dd_payload = self._create_datadog_logging_payload_helper(
standard_logging_object=standard_logging_object,
status=status,
)
return dd_payload
@ -293,6 +310,7 @@ class DataDogLogger(CustomBatchLogger):
"Datadog recommends sending your logs compressed. Add the Content-Encoding: gzip header to the request when sending"
"""
import gzip
import json
@ -493,3 +511,35 @@ class DataDogLogger(CustomBatchLogger):
@staticmethod
def _get_datadog_pod_name():
return os.getenv("POD_NAME", "unknown")
async def async_health_check(self) -> IntegrationHealthCheckStatus:
"""
Check if the service is healthy
"""
from litellm.litellm_core_utils.litellm_logging import (
create_dummy_standard_logging_payload,
)
standard_logging_object = create_dummy_standard_logging_payload()
dd_payload = self._create_datadog_logging_payload_helper(
standard_logging_object=standard_logging_object,
status=DataDogStatus.INFO,
)
log_queue = [dd_payload]
response = await self.async_send_compressed_data(log_queue)
try:
response.raise_for_status()
return IntegrationHealthCheckStatus(
status="healthy",
error_message=None,
)
except httpx.HTTPStatusError as e:
return IntegrationHealthCheckStatus(
status="unhealthy",
error_message=e.response.text,
)
except Exception as e:
return IntegrationHealthCheckStatus(
status="unhealthy",
error_message=str(e),
)