Compare commits

...
Sign in to create a new pull request.

4 commits

Author SHA1 Message Date
Ishaan Jaff
6586718b61 use type for DatadogProxyFailureHookJsonMessage 2024-11-26 17:06:59 -08:00
Ishaan Jaff
86d76dc1d4 fix _handle_logging_authentication_error 2024-11-26 17:03:46 -08:00
Ishaan Jaff
d7033a3564 v0 log auth exceptions on DD 2024-11-26 16:28:17 -08:00
Ishaan Jaff
df0b9adc44 use helper to handle logging Auth fails on custom loggers 2024-11-26 16:26:42 -08:00
4 changed files with 103 additions and 21 deletions

View file

@ -32,12 +32,14 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
)
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.services import ServiceLoggerPayload
from .types import DD_ERRORS, DatadogPayload, DataDogStatus
from .types import *
from .utils import make_json_serializable
DD_MAX_BATCH_SIZE = 1000 # max number of logs DD API can accept
DD_SOURCE_NAME = "litellm"
class DataDogLogger(CustomBatchLogger):
@ -382,3 +384,54 @@ class DataDogLogger(CustomBatchLogger):
No user has asked for this so far, this might be spammy on datatdog. If need arises we can implement this
"""
return
async def async_post_call_failure_hook(
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
"""
Async Proxy Post Call Failure Hook
Logs client side errors when using LiteLLM Proxy
Args:
kwargs (Dict[str, Any]): Original request kwargs
response_obj (Optional[Any]): Response object if any
start_time (datetime.datetime): Start time of request
end_time (datetime.datetime): End time of request
error (str): Error message
"""
import json
try:
verbose_logger.debug(
"Datadog: Logging - Enters failure logging function for model %s",
request_data,
)
_json_message = DatadogProxyFailureHookJsonMessage(
exception=str(original_exception),
traceback=traceback.format_exc(),
request_data=request_data,
user_api_key_dict=user_api_key_dict.model_dump(),
)
dd_payload = DatadogPayload(
ddsource=DD_SOURCE_NAME,
ddtags="",
hostname="",
message=json.dumps(_json_message),
service="litellm-server",
status=DataDogStatus.ERROR,
)
self.log_queue.append(dd_payload)
verbose_logger.debug(
f"Datadog, failure event added to queue. Will flush in {self.flush_interval} seconds..."
)
except Exception as e:
verbose_logger.exception(
f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
)
pass

View file

@ -19,3 +19,10 @@ class DatadogPayload(TypedDict, total=False):
class DD_ERRORS(Enum):
DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`"
class DatadogProxyFailureHookJsonMessage(TypedDict, total=False):
exception: str
traceback: str
request_data: dict
user_api_key_dict: dict

View file

@ -74,7 +74,7 @@ from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
from litellm.proxy.auth.route_checks import RouteChecks
from litellm.proxy.auth.service_account_checks import service_account_checks
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from litellm.proxy.utils import _to_ns
from litellm.proxy.utils import ProxyLogging, _hash_token_if_needed, _to_ns
from litellm.types.services import ServiceTypes
user_api_key_service_logger_obj = ServiceLogging() # used for tracking latency on OTEL
@ -220,6 +220,7 @@ async def user_api_key_auth( # noqa: PLR0915
)
parent_otel_span: Optional[Span] = None
valid_token: Optional[UserAPIKeyAuth] = None
start_time = datetime.now()
try:
route: str = get_request_route(request=request)
@ -1197,13 +1198,16 @@ async def user_api_key_auth( # noqa: PLR0915
extra={"requester_ip": requester_ip},
)
# Log this exception to OTEL
if open_telemetry_logger is not None:
await open_telemetry_logger.async_post_call_failure_hook( # type: ignore
asyncio.create_task(
_handle_logging_authentication_error(
api_key=api_key,
parent_otel_span=parent_otel_span,
valid_token=valid_token,
original_exception=e,
request_data={},
user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
request=request,
proxy_logging_obj=proxy_logging_obj,
)
)
if isinstance(e, litellm.BudgetExceededError):
raise ProxyException(
@ -1229,6 +1233,35 @@ async def user_api_key_auth( # noqa: PLR0915
)
async def _handle_logging_authentication_error(
api_key: str,
parent_otel_span: Optional[Span],
valid_token: Optional[UserAPIKeyAuth],
original_exception: Exception,
request: Request,
proxy_logging_obj: ProxyLogging,
):
"""
Handle logging of Authentication Errors on Custom Loggers - OpenTelemetry, Datadog, etc.
"""
request_data = await request.json()
if valid_token is None:
valid_token = UserAPIKeyAuth(parent_otel_span=parent_otel_span)
valid_token.token = _hash_token_if_needed(token=api_key)
valid_token.api_key = None
# Log this exception to OTEL, other custom loggers
asyncio.create_task(
proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=valid_token,
original_exception=original_exception,
request_data=request_data,
)
)
pass
def _return_user_api_key_auth_obj(
user_obj: Optional[LiteLLM_UserTable],
api_key: str,

View file

@ -8,18 +8,7 @@ model_list:
model: anthropic/fake
api_base: https://exampleanthropicendpoint-production.up.railway.app/
router_settings:
provider_budget_config:
openai:
budget_limit: 0.3 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d
anthropic:
budget_limit: 5
time_period: 1d
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD
litellm_settings:
callbacks: ["prometheus"]
success_callback: ["langfuse"]
callbacks: ["datadog"] # will log success & failures
service_callbacks: ["datadog"] # will log DB fails / exceptions
turn_off_message_logging: True # will redact message / response content