(testing) increase prometheus.py test coverage to 90% (#6466)

* testing for failure events prometheus

* set set_llm_deployment_failure_metrics

* test_async_post_call_failure_hook

* unit testing for all prometheus functions

* fix linting
This commit is contained in:
Ishaan Jaff 2024-10-28 18:08:05 +04:00 committed by GitHub
parent fb9fb3467d
commit 151991c66d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 461 additions and 17 deletions

View file

@ -397,7 +397,10 @@ class PrometheusLogger(CustomLogger):
# input, output, total token metrics
self._increment_token_metrics(
standard_logging_payload=standard_logging_payload,
# why type ignore below?
# 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
# 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
standard_logging_payload=standard_logging_payload, # type: ignore
end_user_id=end_user_id,
user_api_key=user_api_key,
user_api_key_alias=user_api_key_alias,
@ -432,7 +435,10 @@ class PrometheusLogger(CustomLogger):
user_api_key_alias=user_api_key_alias,
user_api_team=user_api_team,
user_api_team_alias=user_api_team_alias,
standard_logging_payload=standard_logging_payload,
# why type ignore below?
# 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
# 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
standard_logging_payload=standard_logging_payload, # type: ignore
)
# set x-ratelimit headers
@ -757,24 +763,31 @@ class PrometheusLogger(CustomLogger):
pass
def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
"""
Sets Failure metrics when an LLM API call fails
- mark the deployment as partial outage
- increment deployment failure responses metric
- increment deployment total requests metric
Args:
request_kwargs: dict
"""
try:
verbose_logger.debug("setting remaining tokens requests metric")
standard_logging_payload: StandardLoggingPayload = request_kwargs.get(
"standard_logging_object", {}
)
_response_headers = request_kwargs.get("response_headers")
_litellm_params = request_kwargs.get("litellm_params", {}) or {}
_metadata = _litellm_params.get("metadata", {})
litellm_model_name = request_kwargs.get("model", None)
api_base = _metadata.get("api_base", None)
model_group = _metadata.get("model_group", None)
if api_base is None:
api_base = _litellm_params.get("api_base", None)
llm_provider = _litellm_params.get("custom_llm_provider", None)
_model_info = _metadata.get("model_info") or {}
model_id = _model_info.get("id", None)
model_group = standard_logging_payload.get("model_group", None)
api_base = standard_logging_payload.get("api_base", None)
model_id = standard_logging_payload.get("model_id", None)
exception: Exception = request_kwargs.get("exception", None)
llm_provider = _litellm_params.get("custom_llm_provider", None)
"""
log these labels
["litellm_model_name", "model_id", "api_base", "api_provider"]
@ -1061,8 +1074,8 @@ class PrometheusLogger(CustomLogger):
self,
state: int,
litellm_model_name: str,
model_id: str,
api_base: str,
model_id: Optional[str],
api_base: Optional[str],
api_provider: str,
):
self.litellm_deployment_state.labels(
@ -1083,8 +1096,8 @@ class PrometheusLogger(CustomLogger):
def set_deployment_partial_outage(
self,
litellm_model_name: str,
model_id: str,
api_base: str,
model_id: Optional[str],
api_base: Optional[str],
api_provider: str,
):
self.set_litellm_deployment_state(
@ -1094,8 +1107,8 @@ class PrometheusLogger(CustomLogger):
def set_deployment_complete_outage(
self,
litellm_model_name: str,
model_id: str,
api_base: str,
model_id: Optional[str],
api_base: Optional[str],
api_provider: str,
):
self.set_litellm_deployment_state(

View file

@ -26,6 +26,7 @@ import pytest
from unittest.mock import MagicMock, patch
from datetime import datetime, timedelta
from litellm.integrations.prometheus import PrometheusLogger
from litellm.proxy._types import UserAPIKeyAuth
verbose_logger.setLevel(logging.DEBUG)
@ -67,6 +68,7 @@ def create_standard_logging_payload() -> StandardLoggingPayload:
user_api_key_team_id="test_team",
user_api_key_user_id="test_user",
user_api_key_team_alias="test_team_alias",
user_api_key_org_id=None,
spend_logs_metadata=None,
requester_ip_address="127.0.0.1",
requester_metadata=None,
@ -342,3 +344,432 @@ def test_increment_top_level_request_and_spend_metrics(prometheus_logger):
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
)
prometheus_logger.litellm_spend_metric.labels().inc.assert_called_once_with(0.1)
@pytest.mark.asyncio
async def test_async_log_failure_event(prometheus_logger):
# NOTE: almost all params for this metric are read from standard logging payload
standard_logging_object = create_standard_logging_payload()
kwargs = {
"model": "gpt-3.5-turbo",
"litellm_params": {
"custom_llm_provider": "openai",
},
"start_time": datetime.now(),
"completion_start_time": datetime.now(),
"api_call_start_time": datetime.now(),
"end_time": datetime.now() + timedelta(seconds=1),
"standard_logging_object": standard_logging_object,
"exception": Exception("Test error"),
}
response_obj = MagicMock()
# Mock the metrics
prometheus_logger.litellm_llm_api_failed_requests_metric = MagicMock()
prometheus_logger.litellm_deployment_failure_responses = MagicMock()
prometheus_logger.litellm_deployment_total_requests = MagicMock()
prometheus_logger.set_deployment_partial_outage = MagicMock()
await prometheus_logger.async_log_failure_event(
kwargs, response_obj, kwargs["start_time"], kwargs["end_time"]
)
# litellm_llm_api_failed_requests_metric incremented
"""
Expected metrics
end_user_id,
user_api_key,
user_api_key_alias,
model,
user_api_team,
user_api_team_alias,
user_id,
"""
prometheus_logger.litellm_llm_api_failed_requests_metric.labels.assert_called_once_with(
None,
"test_hash",
"test_alias",
"gpt-3.5-turbo",
"test_team",
"test_team_alias",
"test_user",
)
prometheus_logger.litellm_llm_api_failed_requests_metric.labels().inc.assert_called_once()
# deployment should be marked in partial outage
prometheus_logger.set_deployment_partial_outage.assert_called_once_with(
litellm_model_name="gpt-3.5-turbo",
model_id="model-123",
api_base="https://api.openai.com",
api_provider="openai",
)
# deployment failure responses incremented
prometheus_logger.litellm_deployment_failure_responses.labels.assert_called_once_with(
litellm_model_name="gpt-3.5-turbo",
model_id="model-123",
api_base="https://api.openai.com",
api_provider="openai",
exception_status="None",
exception_class="Exception",
requested_model="openai-gpt", # passed in standard logging payload
hashed_api_key="test_hash",
api_key_alias="test_alias",
team="test_team",
team_alias="test_team_alias",
)
prometheus_logger.litellm_deployment_failure_responses.labels().inc.assert_called_once()
# deployment total requests incremented
prometheus_logger.litellm_deployment_total_requests.labels.assert_called_once_with(
litellm_model_name="gpt-3.5-turbo",
model_id="model-123",
api_base="https://api.openai.com",
api_provider="openai",
requested_model="openai-gpt", # passed in standard logging payload
hashed_api_key="test_hash",
api_key_alias="test_alias",
team="test_team",
team_alias="test_team_alias",
)
prometheus_logger.litellm_deployment_total_requests.labels().inc.assert_called_once()
@pytest.mark.asyncio
async def test_async_post_call_failure_hook(prometheus_logger):
"""
Test for the async_post_call_failure_hook method
it should increment the litellm_proxy_failed_requests_metric and litellm_proxy_total_requests_metric
"""
# Mock the prometheus metrics
prometheus_logger.litellm_proxy_failed_requests_metric = MagicMock()
prometheus_logger.litellm_proxy_total_requests_metric = MagicMock()
# Create test data
request_data = {"model": "gpt-3.5-turbo"}
original_exception = litellm.RateLimitError(
message="Test error", llm_provider="openai", model="gpt-3.5-turbo"
)
user_api_key_dict = UserAPIKeyAuth(
api_key="test_key",
key_alias="test_alias",
team_id="test_team",
team_alias="test_team_alias",
user_id="test_user",
end_user_id="test_end_user",
)
# Call the function
await prometheus_logger.async_post_call_failure_hook(
request_data=request_data,
original_exception=original_exception,
user_api_key_dict=user_api_key_dict,
)
# Assert failed requests metric was incremented with correct labels
prometheus_logger.litellm_proxy_failed_requests_metric.labels.assert_called_once_with(
end_user="test_end_user",
hashed_api_key="test_key",
api_key_alias="test_alias",
requested_model="gpt-3.5-turbo",
team="test_team",
team_alias="test_team_alias",
user="test_user",
exception_status=429,
exception_class="RateLimitError",
)
prometheus_logger.litellm_proxy_failed_requests_metric.labels().inc.assert_called_once()
# Assert total requests metric was incremented with correct labels
prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with(
"test_end_user",
"test_key",
"test_alias",
"gpt-3.5-turbo",
"test_team",
"test_team_alias",
"test_user",
)
prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once()
@pytest.mark.asyncio
async def test_async_post_call_success_hook(prometheus_logger):
"""
Test for the async_post_call_success_hook method
it should increment the litellm_proxy_total_requests_metric
"""
# Mock the prometheus metric
prometheus_logger.litellm_proxy_total_requests_metric = MagicMock()
# Create test data
data = {"model": "gpt-3.5-turbo"}
user_api_key_dict = UserAPIKeyAuth(
api_key="test_key",
key_alias="test_alias",
team_id="test_team",
team_alias="test_team_alias",
user_id="test_user",
end_user_id="test_end_user",
)
response = {"choices": [{"message": {"content": "test response"}}]}
# Call the function
await prometheus_logger.async_post_call_success_hook(
data=data, user_api_key_dict=user_api_key_dict, response=response
)
# Assert total requests metric was incremented with correct labels
prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with(
"test_end_user",
"test_key",
"test_alias",
"gpt-3.5-turbo",
"test_team",
"test_team_alias",
"test_user",
)
prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once()
def test_set_llm_deployment_success_metrics(prometheus_logger):
# Mock all the metrics used in the method
prometheus_logger.litellm_remaining_requests_metric = MagicMock()
prometheus_logger.litellm_remaining_tokens_metric = MagicMock()
prometheus_logger.litellm_deployment_success_responses = MagicMock()
prometheus_logger.litellm_deployment_total_requests = MagicMock()
prometheus_logger.litellm_deployment_latency_per_output_token = MagicMock()
prometheus_logger.set_deployment_healthy = MagicMock()
standard_logging_payload = create_standard_logging_payload()
# Create test data
request_kwargs = {
"model": "gpt-3.5-turbo",
"response_headers": {
"x-ratelimit-remaining-requests": 123,
"x-ratelimit-remaining-tokens": 4321,
},
"litellm_params": {
"custom_llm_provider": "openai",
"metadata": {"model_info": {"id": "model-123"}},
},
"standard_logging_object": standard_logging_payload,
}
start_time = datetime.now()
end_time = start_time + timedelta(seconds=1)
output_tokens = 10
# Call the function
prometheus_logger.set_llm_deployment_success_metrics(
request_kwargs=request_kwargs,
start_time=start_time,
end_time=end_time,
output_tokens=output_tokens,
)
# Verify remaining requests metric
prometheus_logger.litellm_remaining_requests_metric.labels.assert_called_once_with(
"openai-gpt", # model_group / requested model from create_standard_logging_payload()
"openai", # llm provider
"https://api.openai.com", # api base
"gpt-3.5-turbo", # actual model used - litellm model name
standard_logging_payload["metadata"]["user_api_key_hash"],
standard_logging_payload["metadata"]["user_api_key_alias"],
)
prometheus_logger.litellm_remaining_requests_metric.labels().set.assert_called_once_with(
123
)
# Verify remaining tokens metric
prometheus_logger.litellm_remaining_tokens_metric.labels.assert_called_once_with(
"openai-gpt", # model_group / requested model from create_standard_logging_payload()
"openai", # llm provider
"https://api.openai.com", # api base
"gpt-3.5-turbo", # actual model used - litellm model name
standard_logging_payload["metadata"]["user_api_key_hash"],
standard_logging_payload["metadata"]["user_api_key_alias"],
)
prometheus_logger.litellm_remaining_tokens_metric.labels().set.assert_called_once_with(
4321
)
# Verify deployment healthy state
prometheus_logger.set_deployment_healthy.assert_called_once_with(
litellm_model_name="gpt-3.5-turbo",
model_id="model-123",
api_base="https://api.openai.com",
api_provider="openai",
)
# Verify success responses metric
prometheus_logger.litellm_deployment_success_responses.labels.assert_called_once_with(
litellm_model_name="gpt-3.5-turbo",
model_id="model-123",
api_base="https://api.openai.com",
api_provider="openai",
requested_model="openai-gpt", # requested model from create_standard_logging_payload()
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
team=standard_logging_payload["metadata"]["user_api_key_team_id"],
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
)
prometheus_logger.litellm_deployment_success_responses.labels().inc.assert_called_once()
# Verify total requests metric
prometheus_logger.litellm_deployment_total_requests.labels.assert_called_once_with(
litellm_model_name="gpt-3.5-turbo",
model_id="model-123",
api_base="https://api.openai.com",
api_provider="openai",
requested_model="openai-gpt", # requested model from create_standard_logging_payload()
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
team=standard_logging_payload["metadata"]["user_api_key_team_id"],
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
)
prometheus_logger.litellm_deployment_total_requests.labels().inc.assert_called_once()
# Verify latency per output token metric
prometheus_logger.litellm_deployment_latency_per_output_token.labels.assert_called_once_with(
litellm_model_name="gpt-3.5-turbo",
model_id="model-123",
api_base="https://api.openai.com",
api_provider="openai",
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
team=standard_logging_payload["metadata"]["user_api_key_team_id"],
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
)
# Calculate expected latency per token (1 second / 10 tokens = 0.1 seconds per token)
expected_latency_per_token = 0.1
prometheus_logger.litellm_deployment_latency_per_output_token.labels().observe.assert_called_once_with(
expected_latency_per_token
)
@pytest.mark.asyncio
async def test_log_success_fallback_event(prometheus_logger):
prometheus_logger.litellm_deployment_successful_fallbacks = MagicMock()
original_model_group = "gpt-3.5-turbo"
kwargs = {
"model": "gpt-4",
"metadata": {
"user_api_key_hash": "test_hash",
"user_api_key_alias": "test_alias",
"user_api_key_team_id": "test_team",
"user_api_key_team_alias": "test_team_alias",
},
}
original_exception = litellm.RateLimitError(
message="Test error", llm_provider="openai", model="gpt-3.5-turbo"
)
await prometheus_logger.log_success_fallback_event(
original_model_group=original_model_group,
kwargs=kwargs,
original_exception=original_exception,
)
prometheus_logger.litellm_deployment_successful_fallbacks.labels.assert_called_once_with(
requested_model=original_model_group,
fallback_model="gpt-4",
hashed_api_key="test_hash",
api_key_alias="test_alias",
team="test_team",
team_alias="test_team_alias",
exception_status="429",
exception_class="RateLimitError",
)
prometheus_logger.litellm_deployment_successful_fallbacks.labels().inc.assert_called_once()
@pytest.mark.asyncio
async def test_log_failure_fallback_event(prometheus_logger):
prometheus_logger.litellm_deployment_failed_fallbacks = MagicMock()
original_model_group = "gpt-3.5-turbo"
kwargs = {
"model": "gpt-4",
"metadata": {
"user_api_key_hash": "test_hash",
"user_api_key_alias": "test_alias",
"user_api_key_team_id": "test_team",
"user_api_key_team_alias": "test_team_alias",
},
}
original_exception = litellm.RateLimitError(
message="Test error", llm_provider="openai", model="gpt-3.5-turbo"
)
await prometheus_logger.log_failure_fallback_event(
original_model_group=original_model_group,
kwargs=kwargs,
original_exception=original_exception,
)
prometheus_logger.litellm_deployment_failed_fallbacks.labels.assert_called_once_with(
requested_model=original_model_group,
fallback_model="gpt-4",
hashed_api_key="test_hash",
api_key_alias="test_alias",
team="test_team",
team_alias="test_team_alias",
exception_status="429",
exception_class="RateLimitError",
)
prometheus_logger.litellm_deployment_failed_fallbacks.labels().inc.assert_called_once()
def test_deployment_state_management(prometheus_logger):
prometheus_logger.litellm_deployment_state = MagicMock()
test_params = {
"litellm_model_name": "gpt-3.5-turbo",
"model_id": "model-123",
"api_base": "https://api.openai.com",
"api_provider": "openai",
}
# Test set_deployment_healthy (state=0)
prometheus_logger.set_deployment_healthy(**test_params)
prometheus_logger.litellm_deployment_state.labels.assert_called_with(
test_params["litellm_model_name"],
test_params["model_id"],
test_params["api_base"],
test_params["api_provider"],
)
prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(0)
# Test set_deployment_partial_outage (state=1)
prometheus_logger.set_deployment_partial_outage(**test_params)
prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(1)
# Test set_deployment_complete_outage (state=2)
prometheus_logger.set_deployment_complete_outage(**test_params)
prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(2)
def test_increment_deployment_cooled_down(prometheus_logger):
prometheus_logger.litellm_deployment_cooled_down = MagicMock()
prometheus_logger.increment_deployment_cooled_down(
litellm_model_name="gpt-3.5-turbo",
model_id="model-123",
api_base="https://api.openai.com",
api_provider="openai",
exception_status="429",
)
prometheus_logger.litellm_deployment_cooled_down.labels.assert_called_once_with(
"gpt-3.5-turbo", "model-123", "https://api.openai.com", "openai", "429"
)
prometheus_logger.litellm_deployment_cooled_down.labels().inc.assert_called_once()