Compare commits

...
Sign in to create a new pull request.

4 commits

Author SHA1 Message Date
Ishaan Jaff
6586718b61 use type for DatadogProxyFailureHookJsonMessage 2024-11-26 17:06:59 -08:00
Ishaan Jaff
86d76dc1d4 fix _handle_logging_authentication_error 2024-11-26 17:03:46 -08:00
Ishaan Jaff
d7033a3564 v0 log auth exceptions on DD 2024-11-26 16:28:17 -08:00
Ishaan Jaff
df0b9adc44 use helper to handle logging Auth fails on custom loggers 2024-11-26 16:26:42 -08:00
4 changed files with 103 additions and 21 deletions

View file

@ -32,12 +32,14 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.services import ServiceLoggerPayload from litellm.types.services import ServiceLoggerPayload
from .types import DD_ERRORS, DatadogPayload, DataDogStatus from .types import *
from .utils import make_json_serializable from .utils import make_json_serializable
DD_MAX_BATCH_SIZE = 1000 # max number of logs DD API can accept DD_MAX_BATCH_SIZE = 1000 # max number of logs DD API can accept
DD_SOURCE_NAME = "litellm"
class DataDogLogger(CustomBatchLogger): class DataDogLogger(CustomBatchLogger):
@ -382,3 +384,54 @@ class DataDogLogger(CustomBatchLogger):
No user has asked for this so far, this might be spammy on datatdog. If need arises we can implement this No user has asked for this so far, this might be spammy on datatdog. If need arises we can implement this
""" """
return return
async def async_post_call_failure_hook(
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
"""
Async Proxy Post Call Failure Hook
Logs client side errors when using LiteLLM Proxy
Args:
kwargs (Dict[str, Any]): Original request kwargs
response_obj (Optional[Any]): Response object if any
start_time (datetime.datetime): Start time of request
end_time (datetime.datetime): End time of request
error (str): Error message
"""
import json
try:
verbose_logger.debug(
"Datadog: Logging - Enters failure logging function for model %s",
request_data,
)
_json_message = DatadogProxyFailureHookJsonMessage(
exception=str(original_exception),
traceback=traceback.format_exc(),
request_data=request_data,
user_api_key_dict=user_api_key_dict.model_dump(),
)
dd_payload = DatadogPayload(
ddsource=DD_SOURCE_NAME,
ddtags="",
hostname="",
message=json.dumps(_json_message),
service="litellm-server",
status=DataDogStatus.ERROR,
)
self.log_queue.append(dd_payload)
verbose_logger.debug(
f"Datadog, failure event added to queue. Will flush in {self.flush_interval} seconds..."
)
except Exception as e:
verbose_logger.exception(
f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
)
pass

View file

@ -19,3 +19,10 @@ class DatadogPayload(TypedDict, total=False):
class DD_ERRORS(Enum): class DD_ERRORS(Enum):
DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`" DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`"
class DatadogProxyFailureHookJsonMessage(TypedDict, total=False):
exception: str
traceback: str
request_data: dict
user_api_key_dict: dict

View file

@ -74,7 +74,7 @@ from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
from litellm.proxy.auth.route_checks import RouteChecks from litellm.proxy.auth.route_checks import RouteChecks
from litellm.proxy.auth.service_account_checks import service_account_checks from litellm.proxy.auth.service_account_checks import service_account_checks
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from litellm.proxy.utils import _to_ns from litellm.proxy.utils import ProxyLogging, _hash_token_if_needed, _to_ns
from litellm.types.services import ServiceTypes from litellm.types.services import ServiceTypes
user_api_key_service_logger_obj = ServiceLogging() # used for tracking latency on OTEL user_api_key_service_logger_obj = ServiceLogging() # used for tracking latency on OTEL
@ -220,6 +220,7 @@ async def user_api_key_auth( # noqa: PLR0915
) )
parent_otel_span: Optional[Span] = None parent_otel_span: Optional[Span] = None
valid_token: Optional[UserAPIKeyAuth] = None
start_time = datetime.now() start_time = datetime.now()
try: try:
route: str = get_request_route(request=request) route: str = get_request_route(request=request)
@ -1197,13 +1198,16 @@ async def user_api_key_auth( # noqa: PLR0915
extra={"requester_ip": requester_ip}, extra={"requester_ip": requester_ip},
) )
# Log this exception to OTEL asyncio.create_task(
if open_telemetry_logger is not None: _handle_logging_authentication_error(
await open_telemetry_logger.async_post_call_failure_hook( # type: ignore api_key=api_key,
parent_otel_span=parent_otel_span,
valid_token=valid_token,
original_exception=e, original_exception=e,
request_data={}, request=request,
user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span), proxy_logging_obj=proxy_logging_obj,
) )
)
if isinstance(e, litellm.BudgetExceededError): if isinstance(e, litellm.BudgetExceededError):
raise ProxyException( raise ProxyException(
@ -1229,6 +1233,35 @@ async def user_api_key_auth( # noqa: PLR0915
) )
async def _handle_logging_authentication_error(
api_key: str,
parent_otel_span: Optional[Span],
valid_token: Optional[UserAPIKeyAuth],
original_exception: Exception,
request: Request,
proxy_logging_obj: ProxyLogging,
):
"""
Handle logging of Authentication Errors on Custom Loggers - OpenTelemetry, Datadog, etc.
"""
request_data = await request.json()
if valid_token is None:
valid_token = UserAPIKeyAuth(parent_otel_span=parent_otel_span)
valid_token.token = _hash_token_if_needed(token=api_key)
valid_token.api_key = None
# Log this exception to OTEL, other custom loggers
asyncio.create_task(
proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=valid_token,
original_exception=original_exception,
request_data=request_data,
)
)
pass
def _return_user_api_key_auth_obj( def _return_user_api_key_auth_obj(
user_obj: Optional[LiteLLM_UserTable], user_obj: Optional[LiteLLM_UserTable],
api_key: str, api_key: str,

View file

@ -8,18 +8,7 @@ model_list:
model: anthropic/fake model: anthropic/fake
api_base: https://exampleanthropicendpoint-production.up.railway.app/ api_base: https://exampleanthropicendpoint-production.up.railway.app/
router_settings:
provider_budget_config:
openai:
budget_limit: 0.3 # float of $ value budget for time period
time_period: 1d # can be 1d, 2d, 30d
anthropic:
budget_limit: 5
time_period: 1d
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD
litellm_settings: litellm_settings:
callbacks: ["prometheus"] callbacks: ["datadog"] # will log success & failures
success_callback: ["langfuse"] service_callbacks: ["datadog"] # will log DB fails / exceptions
turn_off_message_logging: True # will redact message / response content