forked from phoenix/litellm-mirror
(feat) log proxy auth errors on datadog (#6931)
* add new dd type for auth errors * add async_log_proxy_authentication_errors * fix comment * use async_log_proxy_authentication_errors * test_datadog_post_call_failure_hook * test_async_log_proxy_authentication_errors
This commit is contained in:
parent
aea68cbeb6
commit
4bc06392db
7 changed files with 241 additions and 9 deletions
|
@ -32,10 +32,11 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
get_async_httpx_client,
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.types.integrations.datadog import *
|
||||
from litellm.types.services import ServiceLoggerPayload
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
||||
from .types import DD_ERRORS, DatadogPayload, DataDogStatus
|
||||
from .utils import make_json_serializable
|
||||
|
||||
DD_MAX_BATCH_SIZE = 1000 # max number of logs DD API can accept
|
||||
|
@ -364,6 +365,38 @@ class DataDogLogger(CustomBatchLogger):
|
|||
"""
|
||||
return
|
||||
|
||||
async def async_post_call_failure_hook(
|
||||
self,
|
||||
request_data: dict,
|
||||
original_exception: Exception,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
):
|
||||
"""
|
||||
Handles Proxy Errors (not-related to LLM API), ex: Authentication Errors
|
||||
"""
|
||||
import json
|
||||
|
||||
_exception_payload = DatadogProxyFailureHookJsonMessage(
|
||||
exception=str(original_exception),
|
||||
error_class=str(original_exception.__class__.__name__),
|
||||
status_code=getattr(original_exception, "status_code", None),
|
||||
traceback=traceback.format_exc(),
|
||||
user_api_key_dict=user_api_key_dict.model_dump(),
|
||||
)
|
||||
|
||||
json_payload = json.dumps(_exception_payload)
|
||||
verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
|
||||
dd_payload = DatadogPayload(
|
||||
ddsource=os.getenv("DD_SOURCE", "litellm"),
|
||||
ddtags="",
|
||||
hostname="",
|
||||
message=json_payload,
|
||||
service="litellm-server",
|
||||
status=DataDogStatus.ERROR,
|
||||
)
|
||||
|
||||
self.log_queue.append(dd_payload)
|
||||
|
||||
def _create_v0_logging_payload(
|
||||
self,
|
||||
kwargs: Union[dict, Any],
|
||||
|
|
|
@ -2032,7 +2032,6 @@
|
|||
"tool_use_system_prompt_tokens": 264,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
"claude-3-opus-20240229": {
|
||||
|
@ -2098,6 +2097,7 @@
|
|||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
|
|
|
@ -1197,12 +1197,14 @@ async def user_api_key_auth( # noqa: PLR0915
|
|||
extra={"requester_ip": requester_ip},
|
||||
)
|
||||
|
||||
# Log this exception to OTEL
|
||||
if open_telemetry_logger is not None:
|
||||
await open_telemetry_logger.async_post_call_failure_hook( # type: ignore
|
||||
# Log this exception to OTEL, Datadog etc
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.async_log_proxy_authentication_errors(
|
||||
original_exception=e,
|
||||
request_data={},
|
||||
user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
|
||||
request=request,
|
||||
parent_otel_span=parent_otel_span,
|
||||
api_key=api_key,
|
||||
)
|
||||
)
|
||||
|
||||
if isinstance(e, litellm.BudgetExceededError):
|
||||
|
|
|
@ -854,6 +854,20 @@ class ProxyLogging:
|
|||
),
|
||||
).start()
|
||||
|
||||
await self._run_post_call_failure_hook_custom_loggers(
|
||||
original_exception=original_exception,
|
||||
request_data=request_data,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
async def _run_post_call_failure_hook_custom_loggers(
|
||||
self,
|
||||
original_exception: Exception,
|
||||
request_data: dict,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
):
|
||||
for callback in litellm.callbacks:
|
||||
try:
|
||||
_callback: Optional[CustomLogger] = None
|
||||
|
@ -872,7 +886,34 @@ class ProxyLogging:
|
|||
except Exception as e:
|
||||
raise e
|
||||
|
||||
return
|
||||
async def async_log_proxy_authentication_errors(
|
||||
self,
|
||||
original_exception: Exception,
|
||||
request: Request,
|
||||
parent_otel_span: Optional[Any],
|
||||
api_key: str,
|
||||
):
|
||||
"""
|
||||
Handler for Logging Authentication Errors on LiteLLM Proxy
|
||||
Why not use post_call_failure_hook?
|
||||
- `post_call_failure_hook` calls `litellm_logging_obj.async_failure_handler`. This led to the Exception being logged twice
|
||||
|
||||
What does this handler do?
|
||||
- Logs Authentication Errors (like invalid API Key passed) to CustomLogger compatible classes (OTEL, Datadog etc)
|
||||
- calls CustomLogger.async_post_call_failure_hook
|
||||
"""
|
||||
|
||||
user_api_key_dict = UserAPIKeyAuth(
|
||||
parent_otel_span=parent_otel_span,
|
||||
token=_hash_token_if_needed(token=api_key),
|
||||
)
|
||||
request_data = await request.json()
|
||||
await self._run_post_call_failure_hook_custom_loggers(
|
||||
original_exception=original_exception,
|
||||
request_data=request_data,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
pass
|
||||
|
||||
async def post_call_success_hook(
|
||||
self,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from enum import Enum
|
||||
from typing import TypedDict
|
||||
from typing import Optional, TypedDict
|
||||
|
||||
|
||||
class DataDogStatus(str, Enum):
|
||||
|
@ -19,3 +19,11 @@ class DatadogPayload(TypedDict, total=False):
|
|||
|
||||
class DD_ERRORS(Enum):
|
||||
DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`"
|
||||
|
||||
|
||||
class DatadogProxyFailureHookJsonMessage(TypedDict, total=False):
|
||||
exception: str
|
||||
error_class: str
|
||||
status_code: Optional[int]
|
||||
traceback: str
|
||||
user_api_key_dict: dict
|
|
@ -344,3 +344,81 @@ async def test_datadog_logging():
|
|||
await asyncio.sleep(5)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_datadog_post_call_failure_hook():
|
||||
"""Test logging proxy failures (e.g., authentication errors) to DataDog"""
|
||||
try:
|
||||
from litellm.integrations.datadog.datadog import DataDogLogger
|
||||
|
||||
os.environ["DD_SITE"] = "https://fake.datadoghq.com"
|
||||
os.environ["DD_API_KEY"] = "anything"
|
||||
dd_logger = DataDogLogger()
|
||||
|
||||
# Create a mock for the async_client's post method
|
||||
mock_post = AsyncMock()
|
||||
mock_post.return_value.status_code = 202
|
||||
mock_post.return_value.text = "Accepted"
|
||||
dd_logger.async_client.post = mock_post
|
||||
|
||||
# Create a test exception
|
||||
class AuthenticationError(Exception):
|
||||
def __init__(self):
|
||||
self.status_code = 401
|
||||
super().__init__("Invalid API key")
|
||||
|
||||
test_exception = AuthenticationError()
|
||||
|
||||
# Create test request data and user API key dict
|
||||
request_data = {
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
}
|
||||
|
||||
user_api_key_dict = UserAPIKeyAuth(
|
||||
api_key="fake_key", user_id="test_user", team_id="test_team"
|
||||
)
|
||||
|
||||
# Call the failure hook
|
||||
await dd_logger.async_post_call_failure_hook(
|
||||
request_data=request_data,
|
||||
original_exception=test_exception,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
# Wait for the periodic flush
|
||||
await asyncio.sleep(6)
|
||||
|
||||
# Assert that the mock was called
|
||||
assert mock_post.called, "HTTP request was not made"
|
||||
|
||||
# Get the arguments of the last call
|
||||
args, kwargs = mock_post.call_args
|
||||
|
||||
# Verify endpoint
|
||||
assert kwargs["url"].endswith("/api/v2/logs"), "Incorrect DataDog endpoint"
|
||||
|
||||
# Decode and verify payload
|
||||
body = kwargs["data"]
|
||||
with gzip.open(io.BytesIO(body), "rb") as f:
|
||||
body = f.read().decode("utf-8")
|
||||
|
||||
body = json.loads(body)
|
||||
assert len(body) == 1, "Expected one log entry"
|
||||
|
||||
log_entry = body[0]
|
||||
assert log_entry["status"] == "error", "Expected error status"
|
||||
assert log_entry["service"] == "litellm-server"
|
||||
|
||||
# Verify message content
|
||||
message = json.loads(log_entry["message"])
|
||||
print("logged message", json.dumps(message, indent=2))
|
||||
assert message["exception"] == "Invalid API key"
|
||||
assert message["error_class"] == "AuthenticationError"
|
||||
assert message["status_code"] == 401
|
||||
assert "traceback" in message
|
||||
assert message["user_api_key_dict"]["api_key"] == "fake_key"
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Test failed with exception: {str(e)}")
|
||||
|
|
|
@ -2125,3 +2125,73 @@ async def test_proxy_server_prisma_setup_invalid_db():
|
|||
|
||||
if _old_db_url:
|
||||
os.environ["DATABASE_URL"] = _old_db_url
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_log_proxy_authentication_errors():
|
||||
"""
|
||||
Test if async_log_proxy_authentication_errors correctly logs authentication errors through custom loggers
|
||||
"""
|
||||
import json
|
||||
from fastapi import Request
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
from litellm.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
# Create a mock custom logger to verify it's called
|
||||
class MockCustomLogger(CustomLogger):
|
||||
def __init__(self):
|
||||
self.called = False
|
||||
self.exception_logged = None
|
||||
self.request_data_logged = None
|
||||
self.user_api_key_dict_logged = None
|
||||
|
||||
async def async_post_call_failure_hook(
|
||||
self,
|
||||
request_data: dict,
|
||||
original_exception: Exception,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
):
|
||||
self.called = True
|
||||
self.exception_logged = original_exception
|
||||
self.request_data_logged = request_data
|
||||
print("logged request_data", request_data)
|
||||
if isinstance(request_data, AsyncMock):
|
||||
self.request_data_logged = (
|
||||
await request_data()
|
||||
) # get the actual value from AsyncMock
|
||||
else:
|
||||
self.request_data_logged = request_data
|
||||
self.user_api_key_dict_logged = user_api_key_dict
|
||||
|
||||
# Create test data
|
||||
test_data = {"model": "gpt-4", "messages": [{"role": "user", "content": "Hello"}]}
|
||||
|
||||
# Create a mock request
|
||||
request = Request(scope={"type": "http", "method": "POST"})
|
||||
request._json = AsyncMock(return_value=test_data)
|
||||
|
||||
# Create a test exception
|
||||
test_exception = Exception("Invalid API Key")
|
||||
|
||||
# Initialize ProxyLogging
|
||||
mock_logger = MockCustomLogger()
|
||||
litellm.callbacks = [mock_logger]
|
||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||
|
||||
# Call the method
|
||||
await proxy_logging_obj.async_log_proxy_authentication_errors(
|
||||
original_exception=test_exception,
|
||||
request=request,
|
||||
parent_otel_span=None,
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
# Verify the mock logger was called with correct parameters
|
||||
assert mock_logger.called == True
|
||||
assert mock_logger.exception_logged == test_exception
|
||||
assert mock_logger.request_data_logged == test_data
|
||||
assert mock_logger.user_api_key_dict_logged is not None
|
||||
assert (
|
||||
mock_logger.user_api_key_dict_logged.token is not None
|
||||
) # token should be hashed
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue