From 7d6e45f78e10722e2b505d684eb700bc64b0b5c6 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 26 Nov 2024 17:45:20 -0800 Subject: [PATCH 01/10] add async_log_failure_event for dd --- litellm/integrations/datadog/datadog.py | 39 ++++++++++++++++++------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index 40044ce9f..0085a1ea5 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -33,6 +33,7 @@ from litellm.llms.custom_httpx.http_handler import ( httpxSpecialProvider, ) from litellm.types.services import ServiceLoggerPayload +from litellm.types.utils import StandardLoggingPayload from .types import DD_ERRORS, DatadogPayload, DataDogStatus from .utils import make_json_serializable @@ -106,20 +107,20 @@ class DataDogLogger(CustomBatchLogger): verbose_logger.debug( "Datadog: Logging - Enters logging function for model %s", kwargs ) - dd_payload = self.create_datadog_logging_payload( - kwargs=kwargs, - response_obj=response_obj, - start_time=start_time, - end_time=end_time, - ) + await self._log_async_event(kwargs, response_obj, start_time, end_time) - self.log_queue.append(dd_payload) + except Exception as e: + verbose_logger.exception( + f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}" + ) + pass + + async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): + try: verbose_logger.debug( - f"Datadog, event added to queue. Will flush in {self.flush_interval} seconds..." + "Datadog: Logging - Enters logging function for model %s", kwargs ) - - if len(self.log_queue) >= self.batch_size: - await self.async_send_batch() + await self._log_async_event(kwargs, response_obj, start_time, end_time) except Exception as e: verbose_logger.exception( @@ -215,6 +216,22 @@ class DataDogLogger(CustomBatchLogger): pass pass + async def _log_async_event(self, kwargs, response_obj, start_time, end_time): + dd_payload = self.create_datadog_logging_payload( + kwargs=kwargs, + response_obj=response_obj, + start_time=start_time, + end_time=end_time, + ) + + self.log_queue.append(dd_payload) + verbose_logger.debug( + f"Datadog, event added to queue. Will flush in {self.flush_interval} seconds..." + ) + + if len(self.log_queue) >= self.batch_size: + await self.async_send_batch() + def create_datadog_logging_payload( self, kwargs: Union[dict, Any], From 6d0424881a257f13e3d9b3e1dac21d8b3182feeb Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 26 Nov 2024 17:53:01 -0800 Subject: [PATCH 02/10] use standard logging payload for DD logging --- litellm/integrations/datadog/datadog.py | 72 ++++++++----------------- 1 file changed, 22 insertions(+), 50 deletions(-) diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index 0085a1ea5..83dc34341 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -253,59 +253,31 @@ class DataDogLogger(CustomBatchLogger): """ import json - litellm_params = kwargs.get("litellm_params", {}) - metadata = ( - litellm_params.get("metadata", {}) or {} - ) # if litellm_params['metadata'] == None - messages = kwargs.get("messages") - optional_params = kwargs.get("optional_params", {}) - call_type = kwargs.get("call_type", "litellm.completion") - cache_hit = kwargs.get("cache_hit", False) - usage = response_obj["usage"] - id = response_obj.get("id", str(uuid.uuid4())) - usage = dict(usage) - try: - response_time = (end_time - start_time).total_seconds() * 1000 - except Exception: - response_time = None - - try: - response_obj = dict(response_obj) - except Exception: - response_obj = response_obj - - # Clean Metadata before logging - never log raw metadata - # the raw metadata can contain circular references which leads to infinite recursion - # we clean out all extra litellm metadata params before logging - clean_metadata = {} - if isinstance(metadata, dict): - for key, value in metadata.items(): - # clean litellm metadata before logging - if key in [ - "endpoint", - "caching_groups", - "previous_models", - ]: - continue - else: - clean_metadata[key] = value + standard_logging_object: Optional[StandardLoggingPayload] = kwargs.get( + "standard_logging_object", None + ) + if standard_logging_object is None: + raise ValueError("standard_logging_object not found in kwargs") + _start_time: float = standard_logging_object.get("startTime", 0) + _end_time: float = standard_logging_object.get("endTime", 0) + response_time_seconds: float = _end_time - _start_time # Build the initial payload payload = { - "id": id, - "call_type": call_type, - "cache_hit": cache_hit, - "start_time": start_time, - "end_time": end_time, - "response_time": response_time, - "model": kwargs.get("model", ""), - "user": kwargs.get("user", ""), - "model_parameters": optional_params, - "spend": kwargs.get("response_cost", 0), - "messages": messages, - "response": response_obj, - "usage": usage, - "metadata": clean_metadata, + "id": standard_logging_object.get("id"), + "call_type": standard_logging_object.get("call_type"), + "cache_hit": standard_logging_object.get("cache_hit"), + "start_time": _start_time, + "end_time": _end_time, + "response_time": response_time_seconds, + "model": standard_logging_object.get("model"), + "user": standard_logging_object.get("end_user"), + "model_parameters": standard_logging_object.get("model_parameters"), + "spend": standard_logging_object.get("response_cost"), + "messages": standard_logging_object.get("messages"), + "response": standard_logging_object.get("response"), + "usage": standard_logging_object.get("usage"), + "metadata": standard_logging_object.get("metadata"), } make_json_serializable(payload) From d2b6054f0c694a16c1864f2c2838bb3b4706ad58 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 26 Nov 2024 17:58:46 -0800 Subject: [PATCH 03/10] use standard logging payload for DD --- litellm/integrations/datadog/datadog.py | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index 83dc34341..01283ac2d 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -258,30 +258,10 @@ class DataDogLogger(CustomBatchLogger): ) if standard_logging_object is None: raise ValueError("standard_logging_object not found in kwargs") - _start_time: float = standard_logging_object.get("startTime", 0) - _end_time: float = standard_logging_object.get("endTime", 0) - response_time_seconds: float = _end_time - _start_time # Build the initial payload - payload = { - "id": standard_logging_object.get("id"), - "call_type": standard_logging_object.get("call_type"), - "cache_hit": standard_logging_object.get("cache_hit"), - "start_time": _start_time, - "end_time": _end_time, - "response_time": response_time_seconds, - "model": standard_logging_object.get("model"), - "user": standard_logging_object.get("end_user"), - "model_parameters": standard_logging_object.get("model_parameters"), - "spend": standard_logging_object.get("response_cost"), - "messages": standard_logging_object.get("messages"), - "response": standard_logging_object.get("response"), - "usage": standard_logging_object.get("usage"), - "metadata": standard_logging_object.get("metadata"), - } - - make_json_serializable(payload) - json_payload = json.dumps(payload) + make_json_serializable(standard_logging_object) + json_payload = json.dumps(standard_logging_object) verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload) From 48c4b272f7625005a702c5a9ff1340322716fead Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 26 Nov 2024 18:10:03 -0800 Subject: [PATCH 04/10] fix use SLP status --- litellm/integrations/datadog/datadog.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index 01283ac2d..a100c2863 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -259,6 +259,10 @@ class DataDogLogger(CustomBatchLogger): if standard_logging_object is None: raise ValueError("standard_logging_object not found in kwargs") + status = DataDogStatus.INFO + if standard_logging_object.get("status") == "failure": + status = DataDogStatus.WARN + # Build the initial payload make_json_serializable(standard_logging_object) json_payload = json.dumps(standard_logging_object) @@ -271,7 +275,7 @@ class DataDogLogger(CustomBatchLogger): hostname="", message=json_payload, service="litellm-server", - status=DataDogStatus.INFO, + status=status, ) return dd_payload From 72562ddf2b90ecfa52ad017685b9924b48676ba4 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 26 Nov 2024 18:17:58 -0800 Subject: [PATCH 05/10] allow opting into _create_v0_logging_payload --- litellm/__init__.py | 1 + litellm/integrations/datadog/datadog.py | 105 ++++++++++++++++++++++-- 2 files changed, 100 insertions(+), 6 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index 65b1b3465..43f91fe58 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -68,6 +68,7 @@ callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = langfuse_default_tags: Optional[List[str]] = None langsmith_batch_size: Optional[int] = None argilla_batch_size: Optional[int] = None +datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload argilla_transformation_object: Optional[Dict[str, Any]] = None _async_input_callback: List[Callable] = ( [] diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index a100c2863..c5454a273 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -182,12 +182,20 @@ class DataDogLogger(CustomBatchLogger): verbose_logger.debug( "Datadog: Logging - Enters logging function for model %s", kwargs ) - dd_payload = self.create_datadog_logging_payload( - kwargs=kwargs, - response_obj=response_obj, - start_time=start_time, - end_time=end_time, - ) + if litellm.datadog_use_v1 is True: + dd_payload = self._create_v0_logging_payload( + kwargs=kwargs, + response_obj=response_obj, + start_time=start_time, + end_time=end_time, + ) + else: + dd_payload = self.create_datadog_logging_payload( + kwargs=kwargs, + response_obj=response_obj, + start_time=start_time, + end_time=end_time, + ) response = self.sync_client.post( url=self.intake_url, @@ -355,3 +363,88 @@ class DataDogLogger(CustomBatchLogger): No user has asked for this so far, this might be spammy on datatdog. If need arises we can implement this """ return + + def _create_v0_logging_payload( + self, + kwargs: Union[dict, Any], + response_obj: Any, + start_time: datetime.datetime, + end_time: datetime.datetime, + ) -> DatadogPayload: + """ + Note: This is our V1 Version of DataDog Logging Payload + + + (Not Recommended) If you want this to get logged set `litellm.datadog_use_v1 = True` + """ + import json + + litellm_params = kwargs.get("litellm_params", {}) + metadata = ( + litellm_params.get("metadata", {}) or {} + ) # if litellm_params['metadata'] == None + messages = kwargs.get("messages") + optional_params = kwargs.get("optional_params", {}) + call_type = kwargs.get("call_type", "litellm.completion") + cache_hit = kwargs.get("cache_hit", False) + usage = response_obj["usage"] + id = response_obj.get("id", str(uuid.uuid4())) + usage = dict(usage) + try: + response_time = (end_time - start_time).total_seconds() * 1000 + except Exception: + response_time = None + + try: + response_obj = dict(response_obj) + except Exception: + response_obj = response_obj + + # Clean Metadata before logging - never log raw metadata + # the raw metadata can contain circular references which leads to infinite recursion + # we clean out all extra litellm metadata params before logging + clean_metadata = {} + if isinstance(metadata, dict): + for key, value in metadata.items(): + # clean litellm metadata before logging + if key in [ + "endpoint", + "caching_groups", + "previous_models", + ]: + continue + else: + clean_metadata[key] = value + + # Build the initial payload + payload = { + "id": id, + "call_type": call_type, + "cache_hit": cache_hit, + "start_time": start_time, + "end_time": end_time, + "response_time": response_time, + "model": kwargs.get("model", ""), + "user": kwargs.get("user", ""), + "model_parameters": optional_params, + "spend": kwargs.get("response_cost", 0), + "messages": messages, + "response": response_obj, + "usage": usage, + "metadata": clean_metadata, + } + + make_json_serializable(payload) + json_payload = json.dumps(payload) + + verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload) + + dd_payload = DatadogPayload( + ddsource=os.getenv("DD_SOURCE", "litellm"), + ddtags="", + hostname="", + message=json_payload, + service="litellm-server", + status=DataDogStatus.INFO, + ) + return dd_payload From f9a40e5db3315eca41324aea7bf6d035b7fdbed2 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 26 Nov 2024 18:35:14 -0800 Subject: [PATCH 06/10] add unit tests for DD logging payload --- litellm/integrations/datadog/datadog.py | 2 +- tests/logging_callback_tests/test_datadog.py | 138 +++++++++++++++++++ 2 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 tests/logging_callback_tests/test_datadog.py diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index c5454a273..527b6f87d 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -269,7 +269,7 @@ class DataDogLogger(CustomBatchLogger): status = DataDogStatus.INFO if standard_logging_object.get("status") == "failure": - status = DataDogStatus.WARN + status = DataDogStatus.ERROR # Build the initial payload make_json_serializable(standard_logging_object) diff --git a/tests/logging_callback_tests/test_datadog.py b/tests/logging_callback_tests/test_datadog.py new file mode 100644 index 000000000..8e3ff0de6 --- /dev/null +++ b/tests/logging_callback_tests/test_datadog.py @@ -0,0 +1,138 @@ +import io +import os +import sys + + +sys.path.insert(0, os.path.abspath("../..")) + +import asyncio +import gzip +import json +import logging +import time +from unittest.mock import AsyncMock, patch + +import pytest + +import litellm +from litellm import completion +from litellm._logging import verbose_logger +from litellm.integrations.datadog.datadog import DataDogLogger, DataDogStatus +from datetime import datetime, timedelta +from litellm.types.integrations.datadog_llm_obs import * +from litellm.types.utils import ( + StandardLoggingPayload, + StandardLoggingModelInformation, + StandardLoggingMetadata, + StandardLoggingHiddenParams, +) + +verbose_logger.setLevel(logging.DEBUG) + + +def create_standard_logging_payload() -> StandardLoggingPayload: + return StandardLoggingPayload( + id="test_id", + call_type="completion", + response_cost=0.1, + response_cost_failure_debug_info=None, + status="success", + total_tokens=30, + prompt_tokens=20, + completion_tokens=10, + startTime=1234567890.0, + endTime=1234567891.0, + completionStartTime=1234567890.5, + model_map_information=StandardLoggingModelInformation( + model_map_key="gpt-3.5-turbo", model_map_value=None + ), + model="gpt-3.5-turbo", + model_id="model-123", + model_group="openai-gpt", + api_base="https://api.openai.com", + metadata=StandardLoggingMetadata( + user_api_key_hash="test_hash", + user_api_key_org_id=None, + user_api_key_alias="test_alias", + user_api_key_team_id="test_team", + user_api_key_user_id="test_user", + user_api_key_team_alias="test_team_alias", + spend_logs_metadata=None, + requester_ip_address="127.0.0.1", + requester_metadata=None, + ), + cache_hit=False, + cache_key=None, + saved_cache_cost=0.0, + request_tags=[], + end_user=None, + requester_ip_address="127.0.0.1", + messages=[{"role": "user", "content": "Hello, world!"}], + response={"choices": [{"message": {"content": "Hi there!"}}]}, + error_str=None, + model_parameters={"stream": True}, + hidden_params=StandardLoggingHiddenParams( + model_id="model-123", + cache_key=None, + api_base="https://api.openai.com", + response_cost="0.1", + additional_headers=None, + ), + ) + + +@pytest.mark.asyncio +async def test_create_datadog_logging_payload(): + """Test creating a DataDog logging payload from a standard logging object""" + dd_logger = DataDogLogger() + standard_payload = create_standard_logging_payload() + + # Create mock kwargs with the standard logging object + kwargs = {"standard_logging_object": standard_payload} + + # Test payload creation + dd_payload = dd_logger.create_datadog_logging_payload( + kwargs=kwargs, + response_obj=None, + start_time=datetime.now(), + end_time=datetime.now(), + ) + + # Verify payload structure + assert dd_payload["ddsource"] == os.getenv("DD_SOURCE", "litellm") + assert dd_payload["service"] == "litellm-server" + assert dd_payload["status"] == DataDogStatus.INFO + + # verify the message field == standard_payload + dict_payload = json.loads(dd_payload["message"]) + assert dict_payload == standard_payload + + +@pytest.mark.asyncio +async def test_datadog_failure_logging(): + """Test logging a failure event to DataDog""" + dd_logger = DataDogLogger() + standard_payload = create_standard_logging_payload() + standard_payload["status"] = "failure" # Set status to failure + standard_payload["error_str"] = "Test error" + + kwargs = {"standard_logging_object": standard_payload} + + dd_payload = dd_logger.create_datadog_logging_payload( + kwargs=kwargs, + response_obj=None, + start_time=datetime.now(), + end_time=datetime.now(), + ) + + assert ( + dd_payload["status"] == DataDogStatus.ERROR + ) # Verify failure maps to warning status + + # verify the message field == standard_payload + dict_payload = json.loads(dd_payload["message"]) + assert dict_payload == standard_payload + + # verify error_str is in the message field + assert "error_str" in dict_payload + assert dict_payload["error_str"] == "Test error" From f916597303b4e7af71af4e2dd952f41696e6d3c0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 26 Nov 2024 18:59:25 -0800 Subject: [PATCH 07/10] add async_post_call_failure_hook --- litellm/integrations/datadog/datadog.py | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index 527b6f87d..1df037ba4 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -32,6 +32,7 @@ from litellm.llms.custom_httpx.http_handler import ( get_async_httpx_client, httpxSpecialProvider, ) +from litellm.proxy._types import UserAPIKeyAuth from litellm.types.services import ServiceLoggerPayload from litellm.types.utils import StandardLoggingPayload @@ -348,6 +349,37 @@ class DataDogLogger(CustomBatchLogger): ) pass + async def async_post_call_failure_hook( + self, + request_data: dict, + original_exception: Exception, + user_api_key_dict: UserAPIKeyAuth, + ): + import json + + _exception_payload = { + "error_str": str(original_exception), + "error_class": str(original_exception.__class__.__name__), + "status_code": getattr(original_exception, "status_code", None), + "traceback": traceback.format_exc(), + "user_api_key_dict": user_api_key_dict.model_dump(), + } + + json_payload = json.dumps(_exception_payload) + + verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload) + + dd_payload = DatadogPayload( + ddsource=os.getenv("DD_SOURCE", "litellm"), + ddtags="", + hostname="", + message=json_payload, + service="litellm-server", + status=DataDogStatus.ERROR, + ) + + self.log_queue.append(dd_payload) + async def async_service_success_hook( self, payload: ServiceLoggerPayload, From 90168de961a03dba52e646568baccc4e0dd487af Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 26 Nov 2024 19:09:16 -0800 Subject: [PATCH 08/10] use correct loc for types --- litellm/integrations/datadog/datadog.py | 2 +- .../datadog/types.py => types/integrations/datadog.py} | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) rename litellm/{integrations/datadog/types.py => types/integrations/datadog.py} (76%) diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index 1df037ba4..9f95780aa 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -33,10 +33,10 @@ from litellm.llms.custom_httpx.http_handler import ( httpxSpecialProvider, ) from litellm.proxy._types import UserAPIKeyAuth +from litellm.types.integrations.datadog import * from litellm.types.services import ServiceLoggerPayload from litellm.types.utils import StandardLoggingPayload -from .types import DD_ERRORS, DatadogPayload, DataDogStatus from .utils import make_json_serializable DD_MAX_BATCH_SIZE = 1000 # max number of logs DD API can accept diff --git a/litellm/integrations/datadog/types.py b/litellm/types/integrations/datadog.py similarity index 76% rename from litellm/integrations/datadog/types.py rename to litellm/types/integrations/datadog.py index 87aa3ce17..4d11daa75 100644 --- a/litellm/integrations/datadog/types.py +++ b/litellm/types/integrations/datadog.py @@ -19,3 +19,10 @@ class DatadogPayload(TypedDict, total=False): class DD_ERRORS(Enum): DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`" + + +class DatadogProxyFailureHookJsonMessage(TypedDict, total=False): + exception: str + traceback: str + request_data: dict + user_api_key_dict: dict From 2803661ed87ebcabbbb07cf1487e1b1908eda446 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 26 Nov 2024 19:12:38 -0800 Subject: [PATCH 09/10] add async_post_call_failure_hook --- litellm/integrations/datadog/datadog.py | 16 +++++++--------- litellm/types/integrations/datadog.py | 5 +++-- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index 9f95780aa..6ee1bc5e7 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -357,18 +357,16 @@ class DataDogLogger(CustomBatchLogger): ): import json - _exception_payload = { - "error_str": str(original_exception), - "error_class": str(original_exception.__class__.__name__), - "status_code": getattr(original_exception, "status_code", None), - "traceback": traceback.format_exc(), - "user_api_key_dict": user_api_key_dict.model_dump(), - } + _exception_payload = DatadogProxyFailureHookJsonMessage( + exception=str(original_exception), + error_class=str(original_exception.__class__.__name__), + status_code=getattr(original_exception, "status_code", None), + traceback=traceback.format_exc(), + user_api_key_dict=user_api_key_dict.model_dump(), + ) json_payload = json.dumps(_exception_payload) - verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload) - dd_payload = DatadogPayload( ddsource=os.getenv("DD_SOURCE", "litellm"), ddtags="", diff --git a/litellm/types/integrations/datadog.py b/litellm/types/integrations/datadog.py index 4d11daa75..79d4eded4 100644 --- a/litellm/types/integrations/datadog.py +++ b/litellm/types/integrations/datadog.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import TypedDict +from typing import Optional, TypedDict class DataDogStatus(str, Enum): @@ -23,6 +23,7 @@ class DD_ERRORS(Enum): class DatadogProxyFailureHookJsonMessage(TypedDict, total=False): exception: str + error_class: str + status_code: Optional[int] traceback: str - request_data: dict user_api_key_dict: dict From cbaaa1feea15ad50b43c6a2604ff1be08892ce85 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 26 Nov 2024 19:24:47 -0800 Subject: [PATCH 10/10] fix async_log_proxy_authentication_errors --- litellm/proxy/auth/user_api_key_auth.py | 12 ++++--- litellm/proxy/proxy_config.yaml | 17 ++------- litellm/proxy/utils.py | 46 +++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 21 deletions(-) diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py index d19215245..d95016cd9 100644 --- a/litellm/proxy/auth/user_api_key_auth.py +++ b/litellm/proxy/auth/user_api_key_auth.py @@ -1197,13 +1197,15 @@ async def user_api_key_auth( # noqa: PLR0915 extra={"requester_ip": requester_ip}, ) - # Log this exception to OTEL - if open_telemetry_logger is not None: - await open_telemetry_logger.async_post_call_failure_hook( # type: ignore + # Log this exception to OTEL, Datadog, All Custom Loggers + asyncio.create_task( + proxy_logging_obj.async_log_proxy_authentication_errors( original_exception=e, - request_data={}, - user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span), + request=request, + parent_otel_span=parent_otel_span, + api_key=api_key, ) + ) if isinstance(e, litellm.BudgetExceededError): raise ProxyException( diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 40cd86c5c..86697c186 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -8,18 +8,7 @@ model_list: model: anthropic/fake api_base: https://exampleanthropicendpoint-production.up.railway.app/ -router_settings: - provider_budget_config: - openai: - budget_limit: 0.3 # float of $ value budget for time period - time_period: 1d # can be 1d, 2d, 30d - anthropic: - budget_limit: 5 - time_period: 1d - redis_host: os.environ/REDIS_HOST - redis_port: os.environ/REDIS_PORT - redis_password: os.environ/REDIS_PASSWORD - litellm_settings: - callbacks: ["prometheus"] - success_callback: ["langfuse"] \ No newline at end of file + callbacks: ["datadog"] # will log success & failures + service_callbacks: ["datadog"] # will log DB fails / exceptions + turn_off_message_logging: True # will redact message / response content \ No newline at end of file diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 2a298af21..8e8cb5446 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -854,6 +854,20 @@ class ProxyLogging: ), ).start() + await self._run_post_call_failure_hook_custom_loggers( + original_exception=original_exception, + request_data=request_data, + user_api_key_dict=user_api_key_dict, + ) + + return + + async def _run_post_call_failure_hook_custom_loggers( + self, + original_exception: Exception, + request_data: dict, + user_api_key_dict: UserAPIKeyAuth, + ): for callback in litellm.callbacks: try: _callback: Optional[CustomLogger] = None @@ -872,7 +886,35 @@ class ProxyLogging: except Exception as e: raise e - return + async def async_log_proxy_authentication_errors( + self, + original_exception: Exception, + request: Request, + parent_otel_span: Optional[Any], + api_key: str, + ): + """ + Handler for Logging Authentication Errors on LiteLLM Proxy + + Why not use post_call_failure_hook? + - `post_call_failure_hook` calls `litellm_logging_obj.async_failure_handler`. This led to the Exception being logged twice + + What does this handler do? + - Logs Authentication Errors (like invalid API Key passed) to CustomLogger compatible classes + - calls CustomLogger.async_post_call_failure_hook + """ + + user_api_key_dict = UserAPIKeyAuth( + parent_otel_span=parent_otel_span, + token=_hash_token_if_needed(token=api_key), + ) + request_data = await request.json() + await self._run_post_call_failure_hook_custom_loggers( + original_exception=original_exception, + request_data=request_data, + user_api_key_dict=user_api_key_dict, + ) + pass async def post_call_success_hook( self, @@ -986,7 +1028,7 @@ class ProxyLogging: ### DB CONNECTOR ### # Define the retry decorator with backoff strategy -# Function to be called whenever a retry is about to happen +# Function to be called whenever a retry is about to happen def on_backoff(details): # The 'tries' key in the details dictionary contains the number of completed tries print_verbose(f"Backing off... this was attempt #{details['tries']}")