(feat) log proxy auth errors on datadog (#6931)

* add new dd type for auth errors

* add async_log_proxy_authentication_errors

* fix comment

* use async_log_proxy_authentication_errors

* test_datadog_post_call_failure_hook

* test_async_log_proxy_authentication_errors
This commit is contained in:
Ishaan Jaff 2024-11-26 20:26:57 -08:00 committed by GitHub
parent aea68cbeb6
commit 4bc06392db
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 241 additions and 9 deletions

View file

@ -32,10 +32,11 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
)
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.datadog import *
from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import StandardLoggingPayload
from .types import DD_ERRORS, DatadogPayload, DataDogStatus
from .utils import make_json_serializable
DD_MAX_BATCH_SIZE = 1000 # max number of logs DD API can accept
@ -364,6 +365,38 @@ class DataDogLogger(CustomBatchLogger):
"""
return
async def async_post_call_failure_hook(
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
"""
Handles Proxy Errors (not-related to LLM API), ex: Authentication Errors
"""
import json
_exception_payload = DatadogProxyFailureHookJsonMessage(
exception=str(original_exception),
error_class=str(original_exception.__class__.__name__),
status_code=getattr(original_exception, "status_code", None),
traceback=traceback.format_exc(),
user_api_key_dict=user_api_key_dict.model_dump(),
)
json_payload = json.dumps(_exception_payload)
verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
dd_payload = DatadogPayload(
ddsource=os.getenv("DD_SOURCE", "litellm"),
ddtags="",
hostname="",
message=json_payload,
service="litellm-server",
status=DataDogStatus.ERROR,
)
self.log_queue.append(dd_payload)
def _create_v0_logging_payload(
self,
kwargs: Union[dict, Any],

View file

@ -2032,7 +2032,6 @@
"tool_use_system_prompt_tokens": 264,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_pdf_input": true,
"supports_response_schema": true
},
"claude-3-opus-20240229": {
@ -2098,6 +2097,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 159,
"supports_assistant_prefill": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_response_schema": true
},

View file

@ -1197,12 +1197,14 @@ async def user_api_key_auth( # noqa: PLR0915
extra={"requester_ip": requester_ip},
)
# Log this exception to OTEL
if open_telemetry_logger is not None:
await open_telemetry_logger.async_post_call_failure_hook( # type: ignore
# Log this exception to OTEL, Datadog etc
asyncio.create_task(
proxy_logging_obj.async_log_proxy_authentication_errors(
original_exception=e,
request_data={},
user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
request=request,
parent_otel_span=parent_otel_span,
api_key=api_key,
)
)
if isinstance(e, litellm.BudgetExceededError):

View file

@ -854,6 +854,20 @@ class ProxyLogging:
),
).start()
await self._run_post_call_failure_hook_custom_loggers(
original_exception=original_exception,
request_data=request_data,
user_api_key_dict=user_api_key_dict,
)
return
async def _run_post_call_failure_hook_custom_loggers(
self,
original_exception: Exception,
request_data: dict,
user_api_key_dict: UserAPIKeyAuth,
):
for callback in litellm.callbacks:
try:
_callback: Optional[CustomLogger] = None
@ -872,7 +886,34 @@ class ProxyLogging:
except Exception as e:
raise e
return
async def async_log_proxy_authentication_errors(
self,
original_exception: Exception,
request: Request,
parent_otel_span: Optional[Any],
api_key: str,
):
"""
Handler for Logging Authentication Errors on LiteLLM Proxy
Why not use post_call_failure_hook?
- `post_call_failure_hook` calls `litellm_logging_obj.async_failure_handler`. This led to the Exception being logged twice
What does this handler do?
- Logs Authentication Errors (like invalid API Key passed) to CustomLogger compatible classes (OTEL, Datadog etc)
- calls CustomLogger.async_post_call_failure_hook
"""
user_api_key_dict = UserAPIKeyAuth(
parent_otel_span=parent_otel_span,
token=_hash_token_if_needed(token=api_key),
)
request_data = await request.json()
await self._run_post_call_failure_hook_custom_loggers(
original_exception=original_exception,
request_data=request_data,
user_api_key_dict=user_api_key_dict,
)
pass
async def post_call_success_hook(
self,

View file

@ -1,5 +1,5 @@
from enum import Enum
from typing import TypedDict
from typing import Optional, TypedDict
class DataDogStatus(str, Enum):
@ -19,3 +19,11 @@ class DatadogPayload(TypedDict, total=False):
class DD_ERRORS(Enum):
DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`"
class DatadogProxyFailureHookJsonMessage(TypedDict, total=False):
exception: str
error_class: str
status_code: Optional[int]
traceback: str
user_api_key_dict: dict

View file

@ -344,3 +344,81 @@ async def test_datadog_logging():
await asyncio.sleep(5)
except Exception as e:
print(e)
@pytest.mark.asyncio
async def test_datadog_post_call_failure_hook():
"""Test logging proxy failures (e.g., authentication errors) to DataDog"""
try:
from litellm.integrations.datadog.datadog import DataDogLogger
os.environ["DD_SITE"] = "https://fake.datadoghq.com"
os.environ["DD_API_KEY"] = "anything"
dd_logger = DataDogLogger()
# Create a mock for the async_client's post method
mock_post = AsyncMock()
mock_post.return_value.status_code = 202
mock_post.return_value.text = "Accepted"
dd_logger.async_client.post = mock_post
# Create a test exception
class AuthenticationError(Exception):
def __init__(self):
self.status_code = 401
super().__init__("Invalid API key")
test_exception = AuthenticationError()
# Create test request data and user API key dict
request_data = {
"model": "gpt-4",
"messages": [{"role": "user", "content": "Hello"}],
}
user_api_key_dict = UserAPIKeyAuth(
api_key="fake_key", user_id="test_user", team_id="test_team"
)
# Call the failure hook
await dd_logger.async_post_call_failure_hook(
request_data=request_data,
original_exception=test_exception,
user_api_key_dict=user_api_key_dict,
)
# Wait for the periodic flush
await asyncio.sleep(6)
# Assert that the mock was called
assert mock_post.called, "HTTP request was not made"
# Get the arguments of the last call
args, kwargs = mock_post.call_args
# Verify endpoint
assert kwargs["url"].endswith("/api/v2/logs"), "Incorrect DataDog endpoint"
# Decode and verify payload
body = kwargs["data"]
with gzip.open(io.BytesIO(body), "rb") as f:
body = f.read().decode("utf-8")
body = json.loads(body)
assert len(body) == 1, "Expected one log entry"
log_entry = body[0]
assert log_entry["status"] == "error", "Expected error status"
assert log_entry["service"] == "litellm-server"
# Verify message content
message = json.loads(log_entry["message"])
print("logged message", json.dumps(message, indent=2))
assert message["exception"] == "Invalid API key"
assert message["error_class"] == "AuthenticationError"
assert message["status_code"] == 401
assert "traceback" in message
assert message["user_api_key_dict"]["api_key"] == "fake_key"
except Exception as e:
pytest.fail(f"Test failed with exception: {str(e)}")

View file

@ -2125,3 +2125,73 @@ async def test_proxy_server_prisma_setup_invalid_db():
if _old_db_url:
os.environ["DATABASE_URL"] = _old_db_url
@pytest.mark.asyncio
async def test_async_log_proxy_authentication_errors():
"""
Test if async_log_proxy_authentication_errors correctly logs authentication errors through custom loggers
"""
import json
from fastapi import Request
from litellm.proxy.utils import ProxyLogging
from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
# Create a mock custom logger to verify it's called
class MockCustomLogger(CustomLogger):
def __init__(self):
self.called = False
self.exception_logged = None
self.request_data_logged = None
self.user_api_key_dict_logged = None
async def async_post_call_failure_hook(
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
self.called = True
self.exception_logged = original_exception
self.request_data_logged = request_data
print("logged request_data", request_data)
if isinstance(request_data, AsyncMock):
self.request_data_logged = (
await request_data()
) # get the actual value from AsyncMock
else:
self.request_data_logged = request_data
self.user_api_key_dict_logged = user_api_key_dict
# Create test data
test_data = {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
# Create a mock request
request = Request(scope={"type": "http", "method": "POST"})
request._json = AsyncMock(return_value=test_data)
# Create a test exception
test_exception = Exception("Invalid API Key")
# Initialize ProxyLogging
mock_logger = MockCustomLogger()
litellm.callbacks = [mock_logger]
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
# Call the method
await proxy_logging_obj.async_log_proxy_authentication_errors(
original_exception=test_exception,
request=request,
parent_otel_span=None,
api_key="test-key",
)
# Verify the mock logger was called with correct parameters
assert mock_logger.called == True
assert mock_logger.exception_logged == test_exception
assert mock_logger.request_data_logged == test_data
assert mock_logger.user_api_key_dict_logged is not None
assert (
mock_logger.user_api_key_dict_logged.token is not None
) # token should be hashed