forked from phoenix/litellm-mirror
(fix) Prometheus - Log Postgres DB latency, status on prometheus (#6484)
* fix logging DB fails on prometheus * unit testing log to otel wrapper * unit testing for service logger + prometheus * use LATENCY buckets for service logging * fix service logging
This commit is contained in:
parent
4f8a3fd4cf
commit
69b1bc1f1e
5 changed files with 229 additions and 49 deletions
|
@ -11,6 +11,8 @@ import pytest
|
|||
from litellm import acompletion, Cache
|
||||
from litellm._service_logger import ServiceLogging
|
||||
from litellm.integrations.prometheus_services import PrometheusServicesLogger
|
||||
from litellm.proxy.utils import ServiceTypes
|
||||
from unittest.mock import patch, AsyncMock
|
||||
import litellm
|
||||
|
||||
"""
|
||||
|
@ -139,3 +141,72 @@ async def test_router_with_caching():
|
|||
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occured - {str(e)}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_service_logger_db_monitoring():
|
||||
"""
|
||||
Test prometheus monitoring for database operations
|
||||
"""
|
||||
litellm.service_callback = ["prometheus_system"]
|
||||
sl = ServiceLogging()
|
||||
|
||||
# Create spy on prometheus logger's async_service_success_hook
|
||||
with patch.object(
|
||||
sl.prometheusServicesLogger,
|
||||
"async_service_success_hook",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_prometheus_success:
|
||||
# Test DB success monitoring
|
||||
await sl.async_service_success_hook(
|
||||
service=ServiceTypes.DB,
|
||||
duration=0.3,
|
||||
call_type="query",
|
||||
event_metadata={"query_type": "SELECT", "table": "api_keys"},
|
||||
)
|
||||
|
||||
# Assert prometheus logger's success hook was called
|
||||
mock_prometheus_success.assert_called_once()
|
||||
# Optionally verify the payload
|
||||
actual_payload = mock_prometheus_success.call_args[1]["payload"]
|
||||
print("actual_payload sent to prometheus: ", actual_payload)
|
||||
assert actual_payload.service == ServiceTypes.DB
|
||||
assert actual_payload.duration == 0.3
|
||||
assert actual_payload.call_type == "query"
|
||||
assert actual_payload.is_error is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_service_logger_db_monitoring_failure():
|
||||
"""
|
||||
Test prometheus monitoring for failed database operations
|
||||
"""
|
||||
litellm.service_callback = ["prometheus_system"]
|
||||
sl = ServiceLogging()
|
||||
|
||||
# Create spy on prometheus logger's async_service_failure_hook
|
||||
with patch.object(
|
||||
sl.prometheusServicesLogger,
|
||||
"async_service_failure_hook",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_prometheus_failure:
|
||||
# Test DB failure monitoring
|
||||
test_error = Exception("Database connection failed")
|
||||
await sl.async_service_failure_hook(
|
||||
service=ServiceTypes.DB,
|
||||
duration=0.3,
|
||||
error=test_error,
|
||||
call_type="query",
|
||||
event_metadata={"query_type": "SELECT", "table": "api_keys"},
|
||||
)
|
||||
|
||||
# Assert prometheus logger's failure hook was called
|
||||
mock_prometheus_failure.assert_called_once()
|
||||
# Verify the payload
|
||||
actual_payload = mock_prometheus_failure.call_args[1]["payload"]
|
||||
print("actual_payload sent to prometheus: ", actual_payload)
|
||||
assert actual_payload.service == ServiceTypes.DB
|
||||
assert actual_payload.duration == 0.3
|
||||
assert actual_payload.call_type == "query"
|
||||
assert actual_payload.is_error is True
|
||||
assert actual_payload.error == "Database connection failed"
|
||||
|
|
128
tests/logging_callback_tests/test_log_db_redis_services.py
Normal file
128
tests/logging_callback_tests/test_log_db_redis_services.py
Normal file
|
@ -0,0 +1,128 @@
|
|||
import io
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
|
||||
import asyncio
|
||||
import gzip
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.proxy.utils import log_to_opentelemetry, ServiceTypes
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
# Test async function to decorate
|
||||
@log_to_opentelemetry
|
||||
async def sample_db_function(*args, **kwargs):
|
||||
return "success"
|
||||
|
||||
|
||||
@log_to_opentelemetry
|
||||
async def sample_proxy_function(*args, **kwargs):
|
||||
return "success"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_log_to_opentelemetry_success():
|
||||
# Mock the proxy_logging_obj
|
||||
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
|
||||
# Setup mock
|
||||
mock_proxy_logging.service_logging_obj.async_service_success_hook = AsyncMock()
|
||||
|
||||
# Call the decorated function
|
||||
result = await sample_db_function(parent_otel_span="test_span")
|
||||
|
||||
# Assertions
|
||||
assert result == "success"
|
||||
mock_proxy_logging.service_logging_obj.async_service_success_hook.assert_called_once()
|
||||
call_args = (
|
||||
mock_proxy_logging.service_logging_obj.async_service_success_hook.call_args[
|
||||
1
|
||||
]
|
||||
)
|
||||
|
||||
assert call_args["service"] == ServiceTypes.DB
|
||||
assert call_args["call_type"] == "sample_db_function"
|
||||
assert call_args["parent_otel_span"] == "test_span"
|
||||
assert isinstance(call_args["duration"], float)
|
||||
assert isinstance(call_args["start_time"], datetime)
|
||||
assert isinstance(call_args["end_time"], datetime)
|
||||
assert "function_name" in call_args["event_metadata"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_log_to_opentelemetry_duration():
|
||||
# Mock the proxy_logging_obj
|
||||
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
|
||||
# Setup mock
|
||||
mock_proxy_logging.service_logging_obj.async_service_success_hook = AsyncMock()
|
||||
|
||||
# Add a delay to the function to test duration
|
||||
@log_to_opentelemetry
|
||||
async def delayed_function(**kwargs):
|
||||
await asyncio.sleep(1) # 1 second delay
|
||||
return "success"
|
||||
|
||||
# Call the decorated function
|
||||
start = time.time()
|
||||
result = await delayed_function(parent_otel_span="test_span")
|
||||
end = time.time()
|
||||
|
||||
# Get the actual duration
|
||||
actual_duration = end - start
|
||||
|
||||
# Get the logged duration from the mock call
|
||||
call_args = (
|
||||
mock_proxy_logging.service_logging_obj.async_service_success_hook.call_args[
|
||||
1
|
||||
]
|
||||
)
|
||||
logged_duration = call_args["duration"]
|
||||
|
||||
# Assert the logged duration is approximately equal to actual duration (within 0.1 seconds)
|
||||
assert abs(logged_duration - actual_duration) < 0.1
|
||||
assert result == "success"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_log_to_opentelemetry_failure():
|
||||
# Mock the proxy_logging_obj
|
||||
with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
|
||||
# Setup mock
|
||||
mock_proxy_logging.service_logging_obj.async_service_failure_hook = AsyncMock()
|
||||
|
||||
# Create a failing function
|
||||
@log_to_opentelemetry
|
||||
async def failing_function(**kwargs):
|
||||
raise ValueError("Test error")
|
||||
|
||||
# Call the decorated function and expect it to raise
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
await failing_function(parent_otel_span="test_span")
|
||||
|
||||
# Assertions
|
||||
assert str(exc_info.value) == "Test error"
|
||||
mock_proxy_logging.service_logging_obj.async_service_failure_hook.assert_called_once()
|
||||
call_args = (
|
||||
mock_proxy_logging.service_logging_obj.async_service_failure_hook.call_args[
|
||||
1
|
||||
]
|
||||
)
|
||||
|
||||
assert call_args["service"] == ServiceTypes.DB
|
||||
assert call_args["call_type"] == "failing_function"
|
||||
assert call_args["parent_otel_span"] == "test_span"
|
||||
assert isinstance(call_args["duration"], float)
|
||||
assert isinstance(call_args["start_time"], datetime)
|
||||
assert isinstance(call_args["end_time"], datetime)
|
||||
assert isinstance(call_args["error"], ValueError)
|
Loading…
Add table
Add a link
Reference in a new issue