use type for DatadogProxyFailureHookJsonMessage

fix _handle_logging_authentication_error
v0 log auth exceptions on DD
2024-11-26 17:06:59 -08:00 · 2024-11-26 17:03:46 -08:00 · 2024-11-26 16:28:17 -08:00 · 2024-11-26 16:26:42 -08:00
4 changed files with 103 additions and 21 deletions
--- a/litellm/integrations/datadog/datadog.py
+++ b/litellm/integrations/datadog/datadog.py
@ -32,12 +32,14 @@ from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
 )
+from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.services import ServiceLoggerPayload

-from .types import DD_ERRORS, DatadogPayload, DataDogStatus
+from .types import *
 from .utils import make_json_serializable

 DD_MAX_BATCH_SIZE = 1000  # max number of logs DD API can accept
+DD_SOURCE_NAME = "litellm"


 class DataDogLogger(CustomBatchLogger):
@ -382,3 +384,54 @@ class DataDogLogger(CustomBatchLogger):
        No user has asked for this so far, this might be spammy on datatdog. If need arises we can implement this
        """
        return
+
+    async def async_post_call_failure_hook(
+        self,
+        request_data: dict,
+        original_exception: Exception,
+        user_api_key_dict: UserAPIKeyAuth,
+    ):
+        """
+        Async Proxy Post Call Failure Hook
+
+        Logs client side errors when using LiteLLM Proxy
+
+        Args:
+            kwargs (Dict[str, Any]): Original request kwargs
+            response_obj (Optional[Any]): Response object if any
+            start_time (datetime.datetime): Start time of request
+            end_time (datetime.datetime): End time of request
+            error (str): Error message
+        """
+        import json
+
+        try:
+            verbose_logger.debug(
+                "Datadog: Logging - Enters failure logging function for model %s",
+                request_data,
+            )
+            _json_message = DatadogProxyFailureHookJsonMessage(
+                exception=str(original_exception),
+                traceback=traceback.format_exc(),
+                request_data=request_data,
+                user_api_key_dict=user_api_key_dict.model_dump(),
+            )
+
+            dd_payload = DatadogPayload(
+                ddsource=DD_SOURCE_NAME,
+                ddtags="",
+                hostname="",
+                message=json.dumps(_json_message),
+                service="litellm-server",
+                status=DataDogStatus.ERROR,
+            )
+
+            self.log_queue.append(dd_payload)
+            verbose_logger.debug(
+                f"Datadog, failure event added to queue. Will flush in {self.flush_interval} seconds..."
+            )
+        except Exception as e:
+            verbose_logger.exception(
+                f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
+            )
+            pass
--- a/litellm/integrations/datadog/types.py
+++ b/litellm/integrations/datadog/types.py
@ -19,3 +19,10 @@ class DatadogPayload(TypedDict, total=False):

 class DD_ERRORS(Enum):
    DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`"
+
+
+class DatadogProxyFailureHookJsonMessage(TypedDict, total=False):
+    exception: str
+    traceback: str
+    request_data: dict
+    user_api_key_dict: dict
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@ -74,7 +74,7 @@ from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
 from litellm.proxy.auth.route_checks import RouteChecks
 from litellm.proxy.auth.service_account_checks import service_account_checks
 from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
-from litellm.proxy.utils import _to_ns
+from litellm.proxy.utils import ProxyLogging, _hash_token_if_needed, _to_ns
 from litellm.types.services import ServiceTypes

 user_api_key_service_logger_obj = ServiceLogging()  # used for tracking latency on OTEL
@ -220,6 +220,7 @@ async def user_api_key_auth(  # noqa: PLR0915
    )

    parent_otel_span: Optional[Span] = None
+    valid_token: Optional[UserAPIKeyAuth] = None
    start_time = datetime.now()
    try:
        route: str = get_request_route(request=request)
@ -1197,13 +1198,16 @@ async def user_api_key_auth(  # noqa: PLR0915
            extra={"requester_ip": requester_ip},
        )

-        # Log this exception to OTEL
-        if open_telemetry_logger is not None:
-            await open_telemetry_logger.async_post_call_failure_hook(  # type: ignore
+        asyncio.create_task(
+            _handle_logging_authentication_error(
+                api_key=api_key,
+                parent_otel_span=parent_otel_span,
+                valid_token=valid_token,
                original_exception=e,
-                request_data={},
-                user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
+                request=request,
+                proxy_logging_obj=proxy_logging_obj,
            )
+        )

        if isinstance(e, litellm.BudgetExceededError):
            raise ProxyException(
@ -1229,6 +1233,35 @@ async def user_api_key_auth(  # noqa: PLR0915
        )


+async def _handle_logging_authentication_error(
+    api_key: str,
+    parent_otel_span: Optional[Span],
+    valid_token: Optional[UserAPIKeyAuth],
+    original_exception: Exception,
+    request: Request,
+    proxy_logging_obj: ProxyLogging,
+):
+    """
+    Handle logging of Authentication Errors on Custom Loggers - OpenTelemetry, Datadog, etc.
+    """
+
+    request_data = await request.json()
+    if valid_token is None:
+        valid_token = UserAPIKeyAuth(parent_otel_span=parent_otel_span)
+
+    valid_token.token = _hash_token_if_needed(token=api_key)
+    valid_token.api_key = None
+    # Log this exception to OTEL, other custom loggers
+    asyncio.create_task(
+        proxy_logging_obj.post_call_failure_hook(
+            user_api_key_dict=valid_token,
+            original_exception=original_exception,
+            request_data=request_data,
+        )
+    )
+    pass
+
+
 def _return_user_api_key_auth_obj(
    user_obj: Optional[LiteLLM_UserTable],
    api_key: str,
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -8,18 +8,7 @@ model_list:
      model: anthropic/fake
      api_base: https://exampleanthropicendpoint-production.up.railway.app/

-router_settings:
-  provider_budget_config: 
-    openai: 
-      budget_limit: 0.3 # float of $ value budget for time period
-      time_period: 1d # can be 1d, 2d, 30d 
-    anthropic:
-      budget_limit: 5
-      time_period: 1d
-  redis_host: os.environ/REDIS_HOST
-  redis_port: os.environ/REDIS_PORT
-  redis_password: os.environ/REDIS_PASSWORD
-
 litellm_settings:
-  callbacks: ["prometheus"]
-  success_callback: ["langfuse"]
+  callbacks: ["datadog"] # will log success & failures 
+  service_callbacks: ["datadog"] # will log DB fails / exceptions 
+  turn_off_message_logging: True # will redact message / response content
Author	SHA1	Message	Date
Ishaan Jaff	6586718b61	use type for DatadogProxyFailureHookJsonMessage	2024-11-26 17:06:59 -08:00
Ishaan Jaff	86d76dc1d4	fix _handle_logging_authentication_error	2024-11-26 17:03:46 -08:00
Ishaan Jaff	d7033a3564	v0 log auth exceptions on DD	2024-11-26 16:28:17 -08:00
Ishaan Jaff	df0b9adc44	use helper to handle logging Auth fails on custom loggers	2024-11-26 16:26:42 -08:00