use type for DatadogProxyFailureHookJsonMessage

fix _handle_logging_authentication_error
v0 log auth exceptions on DD
2024-11-26 17:06:59 -08:00 · 2024-11-26 17:03:46 -08:00 · 2024-11-26 16:28:17 -08:00 · 2024-11-26 16:26:42 -08:00
4 changed files with 103 additions and 21 deletions
--- a/litellm/integrations/datadog/datadog.py
+++ b/litellm/integrations/datadog/datadog.py
@ -32,12 +32,14 @@ from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
 )
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.services import ServiceLoggerPayload
-from .types import DD_ERRORS, DatadogPayload, DataDogStatus
+from .types import *
 from .utils import make_json_serializable
 DD_MAX_BATCH_SIZE = 1000  # max number of logs DD API can accept
 DD_SOURCE_NAME = "litellm"
 class DataDogLogger(CustomBatchLogger):
@ -382,3 +384,54 @@ class DataDogLogger(CustomBatchLogger):
        No user has asked for this so far, this might be spammy on datatdog. If need arises we can implement this
        """
        return
    async def async_post_call_failure_hook(
        self,
        request_data: dict,
        original_exception: Exception,
        user_api_key_dict: UserAPIKeyAuth,
    ):
        """
        Async Proxy Post Call Failure Hook
        Logs client side errors when using LiteLLM Proxy
        Args:
            kwargs (Dict[str, Any]): Original request kwargs
            response_obj (Optional[Any]): Response object if any
            start_time (datetime.datetime): Start time of request
            end_time (datetime.datetime): End time of request
            error (str): Error message
        """
        import json
        try:
            verbose_logger.debug(
                "Datadog: Logging - Enters failure logging function for model %s",
                request_data,
            )
            _json_message = DatadogProxyFailureHookJsonMessage(
                exception=str(original_exception),
                traceback=traceback.format_exc(),
                request_data=request_data,
                user_api_key_dict=user_api_key_dict.model_dump(),
            )
            dd_payload = DatadogPayload(
                ddsource=DD_SOURCE_NAME,
                ddtags="",
                hostname="",
                message=json.dumps(_json_message),
                service="litellm-server",
                status=DataDogStatus.ERROR,
            )
            self.log_queue.append(dd_payload)
            verbose_logger.debug(
                f"Datadog, failure event added to queue. Will flush in {self.flush_interval} seconds..."
            )
        except Exception as e:
            verbose_logger.exception(
                f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
            )
            pass
--- a/litellm/integrations/datadog/types.py
+++ b/litellm/integrations/datadog/types.py
@ -19,3 +19,10 @@ class DatadogPayload(TypedDict, total=False):
 class DD_ERRORS(Enum):
    DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`"
 class DatadogProxyFailureHookJsonMessage(TypedDict, total=False):
    exception: str
    traceback: str
    request_data: dict
    user_api_key_dict: dict
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@ -74,7 +74,7 @@ from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
 from litellm.proxy.auth.route_checks import RouteChecks
 from litellm.proxy.auth.service_account_checks import service_account_checks
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
-from litellm.proxy.utils import _to_ns
+from litellm.proxy.utils import ProxyLogging, _hash_token_if_needed, _to_ns
 from litellm.types.services import ServiceTypes
 user_api_key_service_logger_obj = ServiceLogging()  # used for tracking latency on OTEL
@ -220,6 +220,7 @@ async def user_api_key_auth(  # noqa: PLR0915
    )
    parent_otel_span: Optional[Span] = None
    valid_token: Optional[UserAPIKeyAuth] = None
    start_time = datetime.now()
    try:
        route: str = get_request_route(request=request)
@ -1197,13 +1198,16 @@ async def user_api_key_auth(  # noqa: PLR0915
            extra={"requester_ip": requester_ip},
        )
-        # Log this exception to OTEL
+        asyncio.create_task(
-        if open_telemetry_logger is not None:
+            _handle_logging_authentication_error(
-            await open_telemetry_logger.async_post_call_failure_hook(  # type: ignore
+                api_key=api_key,
                parent_otel_span=parent_otel_span,
                valid_token=valid_token,
                original_exception=e,
-                request_data={},
+                request=request,
-                user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
+                proxy_logging_obj=proxy_logging_obj,
            )
        )
        if isinstance(e, litellm.BudgetExceededError):
            raise ProxyException(
@ -1229,6 +1233,35 @@ async def user_api_key_auth(  # noqa: PLR0915
        )
 async def _handle_logging_authentication_error(
    api_key: str,
    parent_otel_span: Optional[Span],
    valid_token: Optional[UserAPIKeyAuth],
    original_exception: Exception,
    request: Request,
    proxy_logging_obj: ProxyLogging,
 ):
    """
    Handle logging of Authentication Errors on Custom Loggers - OpenTelemetry, Datadog, etc.
    """
    request_data = await request.json()
    if valid_token is None:
        valid_token = UserAPIKeyAuth(parent_otel_span=parent_otel_span)
    valid_token.token = _hash_token_if_needed(token=api_key)
    valid_token.api_key = None
    # Log this exception to OTEL, other custom loggers
    asyncio.create_task(
        proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=valid_token,
            original_exception=original_exception,
            request_data=request_data,
        )
    )
    pass
 def _return_user_api_key_auth_obj(
    user_obj: Optional[LiteLLM_UserTable],
    api_key: str,
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -8,18 +8,7 @@ model_list:
      model: anthropic/fake
      api_base: https://exampleanthropicendpoint-production.up.railway.app/
 router_settings:
  provider_budget_config: 
    openai: 
      budget_limit: 0.3 # float of $ value budget for time period
      time_period: 1d # can be 1d, 2d, 30d 
    anthropic:
      budget_limit: 5
      time_period: 1d
  redis_host: os.environ/REDIS_HOST
  redis_port: os.environ/REDIS_PORT
  redis_password: os.environ/REDIS_PASSWORD
 litellm_settings:
-  callbacks: ["prometheus"]
+  callbacks: ["datadog"] # will log success & failures 
-  success_callback: ["langfuse"]
+  service_callbacks: ["datadog"] # will log DB fails / exceptions 
  turn_off_message_logging: True # will redact message / response content
Author	SHA1	Message	Date
Ishaan Jaff	6586718b61	use type for DatadogProxyFailureHookJsonMessage	2024-11-26 17:06:59 -08:00
Ishaan Jaff	86d76dc1d4	fix _handle_logging_authentication_error	2024-11-26 17:03:46 -08:00
Ishaan Jaff	d7033a3564	v0 log auth exceptions on DD	2024-11-26 16:28:17 -08:00
Ishaan Jaff	df0b9adc44	use helper to handle logging Auth fails on custom loggers	2024-11-26 16:26:42 -08:00