forked from phoenix/litellm-mirror
(feat) log error class, function_name on prometheus service failure hook + only log DB related failures on DB service hook (#6650)
* log error on prometheus service failure hook * use a more accurate function name for wrapper that handles logging db metrics * fix log_db_metrics * test_log_db_metrics_failure_error_types * fix linting * fix auth checks
This commit is contained in:
parent
ae385cfcdc
commit
eb47117800
8 changed files with 249 additions and 110 deletions
|
@ -17,23 +17,25 @@ import pytest
|
|||
import litellm
|
||||
from litellm import completion
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.proxy.utils import log_to_opentelemetry, ServiceTypes
|
||||
from litellm.proxy.utils import log_db_metrics, ServiceTypes
|
||||
from datetime import datetime
|
||||
import httpx
|
||||
from prisma.errors import ClientNotConnectedError
|
||||
|
||||
|
||||
# Test async function to decorate
|
||||
@log_to_opentelemetry
|
||||
@log_db_metrics
|
||||
async def sample_db_function(*args, **kwargs):
|
||||
return "success"
|
||||
|
||||
|
||||
@log_to_opentelemetry
|
||||
@log_db_metrics
|
||||
async def sample_proxy_function(*args, **kwargs):
|
||||
return "success"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_log_to_opentelemetry_success():
|
||||
async def test_log_db_metrics_success():
|
||||
# Mock the proxy_logging_obj
|
||||
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
|
||||
# Setup mock
|
||||
|
@ -61,14 +63,14 @@ async def test_log_to_opentelemetry_success():
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_log_to_opentelemetry_duration():
|
||||
async def test_log_db_metrics_duration():
|
||||
# Mock the proxy_logging_obj
|
||||
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
|
||||
# Setup mock
|
||||
mock_proxy_logging.service_logging_obj.async_service_success_hook = AsyncMock()
|
||||
|
||||
# Add a delay to the function to test duration
|
||||
@log_to_opentelemetry
|
||||
@log_db_metrics
|
||||
async def delayed_function(**kwargs):
|
||||
await asyncio.sleep(1) # 1 second delay
|
||||
return "success"
|
||||
|
@ -95,23 +97,28 @@ async def test_log_to_opentelemetry_duration():
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_log_to_opentelemetry_failure():
|
||||
async def test_log_db_metrics_failure():
|
||||
"""
|
||||
should log a failure if a prisma error is raised
|
||||
"""
|
||||
# Mock the proxy_logging_obj
|
||||
from prisma.errors import ClientNotConnectedError
|
||||
|
||||
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
|
||||
# Setup mock
|
||||
mock_proxy_logging.service_logging_obj.async_service_failure_hook = AsyncMock()
|
||||
|
||||
# Create a failing function
|
||||
@log_to_opentelemetry
|
||||
@log_db_metrics
|
||||
async def failing_function(**kwargs):
|
||||
raise ValueError("Test error")
|
||||
raise ClientNotConnectedError()
|
||||
|
||||
# Call the decorated function and expect it to raise
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
with pytest.raises(ClientNotConnectedError) as exc_info:
|
||||
await failing_function(parent_otel_span="test_span")
|
||||
|
||||
# Assertions
|
||||
assert str(exc_info.value) == "Test error"
|
||||
assert "Client is not connected to the query engine" in str(exc_info.value)
|
||||
mock_proxy_logging.service_logging_obj.async_service_failure_hook.assert_called_once()
|
||||
call_args = (
|
||||
mock_proxy_logging.service_logging_obj.async_service_failure_hook.call_args[
|
||||
|
@ -125,4 +132,56 @@ async def test_log_to_opentelemetry_failure():
|
|||
assert isinstance(call_args["duration"], float)
|
||||
assert isinstance(call_args["start_time"], datetime)
|
||||
assert isinstance(call_args["end_time"], datetime)
|
||||
assert isinstance(call_args["error"], ValueError)
|
||||
assert isinstance(call_args["error"], ClientNotConnectedError)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"exception,should_log",
|
||||
[
|
||||
(ValueError("Generic error"), False),
|
||||
(KeyError("Missing key"), False),
|
||||
(TypeError("Type error"), False),
|
||||
(httpx.ConnectError("Failed to connect"), True),
|
||||
(httpx.TimeoutException("Request timed out"), True),
|
||||
(ClientNotConnectedError(), True), # Prisma error
|
||||
],
|
||||
)
|
||||
async def test_log_db_metrics_failure_error_types(exception, should_log):
|
||||
"""
|
||||
Why Test?
|
||||
Users were seeing that non-DB errors were being logged as DB Service Failures
|
||||
Example a failure to read a value from cache was being logged as a DB Service Failure
|
||||
|
||||
|
||||
Parameterized test to verify:
|
||||
- DB-related errors (Prisma, httpx) are logged as service failures
|
||||
- Non-DB errors (ValueError, KeyError, etc.) are not logged
|
||||
"""
|
||||
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
|
||||
mock_proxy_logging.service_logging_obj.async_service_failure_hook = AsyncMock()
|
||||
|
||||
@log_db_metrics
|
||||
async def failing_function(**kwargs):
|
||||
raise exception
|
||||
|
||||
# Call the function and expect it to raise the exception
|
||||
with pytest.raises(type(exception)):
|
||||
await failing_function(parent_otel_span="test_span")
|
||||
|
||||
if should_log:
|
||||
# Assert failure was logged for DB-related errors
|
||||
mock_proxy_logging.service_logging_obj.async_service_failure_hook.assert_called_once()
|
||||
call_args = mock_proxy_logging.service_logging_obj.async_service_failure_hook.call_args[
|
||||
1
|
||||
]
|
||||
assert call_args["service"] == ServiceTypes.DB
|
||||
assert call_args["call_type"] == "failing_function"
|
||||
assert call_args["parent_otel_span"] == "test_span"
|
||||
assert isinstance(call_args["duration"], float)
|
||||
assert isinstance(call_args["start_time"], datetime)
|
||||
assert isinstance(call_args["end_time"], datetime)
|
||||
assert isinstance(call_args["error"], type(exception))
|
||||
else:
|
||||
# Assert failure was NOT logged for non-DB errors
|
||||
mock_proxy_logging.service_logging_obj.async_service_failure_hook.assert_not_called()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue