(Bug Fix + Better Observability) - BudgetResetJob: (#8562)

* use class ResetBudgetJob

* refactor reset budget job

* update reset_budget job

* refactor reset budget job

* fix LiteLLM_UserTable

* refactor reset budget job

* add telemetry for reset budget job

* dd - log service success/failure on DD

* add detailed reset budget reset info on DD

* initialize_scheduled_background_jobs

* refactor reset budget job

* trigger service failure hook when fails to reset a budget for team, key, user

* fix resetBudgetJob

* unit testing for ResetBudgetJob

* test_duration_in_seconds_basic

* testing for triggering service logging

* fix logs on test teams fail

* remove unused imports

* fix import duration in s

* duration_in_seconds
This commit is contained in:
Ishaan Jaff 2025-02-15 16:13:08 -08:00 committed by GitHub
parent a8717ea124
commit c8d31a209b
11 changed files with 1107 additions and 87 deletions

View file

@ -35,12 +35,18 @@ from litellm.llms.custom_httpx.http_handler import (
)
from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus
from litellm.types.integrations.datadog import *
from litellm.types.services import ServiceLoggerPayload
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
from litellm.types.utils import StandardLoggingPayload
from ..additional_logging_utils import AdditionalLoggingUtils
DD_MAX_BATCH_SIZE = 1000 # max number of logs DD API can accept
# max number of logs DD API can accept
DD_MAX_BATCH_SIZE = 1000
# specify what ServiceTypes are logged as success events to DD. (We don't want to spam DD traces with large number of service types)
DD_LOGGED_SUCCESS_SERVICE_TYPES = [
ServiceTypes.RESET_BUDGET_JOB,
]
class DataDogLogger(
@ -340,18 +346,16 @@ class DataDogLogger(
- example - Redis is failing / erroring, will be logged on DataDog
"""
try:
import json
_payload_dict = payload.model_dump()
_payload_dict.update(event_metadata or {})
_dd_message_str = json.dumps(_payload_dict, default=str)
_dd_payload = DatadogPayload(
ddsource="litellm",
ddtags="",
hostname="",
ddsource=self._get_datadog_source(),
ddtags=self._get_datadog_tags(),
hostname=self._get_datadog_hostname(),
message=_dd_message_str,
service="litellm-server",
service=self._get_datadog_service(),
status=DataDogStatus.WARN,
)
@ -377,7 +381,30 @@ class DataDogLogger(
No user has asked for this so far, this might be spammy on datatdog. If need arises we can implement this
"""
return
try:
# intentionally done. Don't want to log all service types to DD
if payload.service not in DD_LOGGED_SUCCESS_SERVICE_TYPES:
return
_payload_dict = payload.model_dump()
_payload_dict.update(event_metadata or {})
_dd_message_str = json.dumps(_payload_dict, default=str)
_dd_payload = DatadogPayload(
ddsource=self._get_datadog_source(),
ddtags=self._get_datadog_tags(),
hostname=self._get_datadog_hostname(),
message=_dd_message_str,
service=self._get_datadog_service(),
status=DataDogStatus.INFO,
)
self.log_queue.append(_dd_payload)
except Exception as e:
verbose_logger.exception(
f"Datadog: Logger - Exception in async_service_failure_hook: {e}"
)
def _create_v0_logging_payload(
self,