litellm-mirror/tests/logging_callback_tests/test_prometheus_unit_tests.py
Ishaan Jaff cdda7c243f
(refactor) prometheus async_log_success_event to be under 100 LOC (#6416)
* unit testig for prometheus

* unit testing for success metrics

* use 1 helper for _increment_token_metrics

* use helper for _increment_remaining_budget_metrics

* use _increment_remaining_budget_metrics

* use _increment_top_level_request_and_spend_metrics

* use helper for _set_latency_metrics

* remove noqa violation

* fix test prometheus

* test prometheus

* unit testing for all prometheus helper functions

* fix prom unit tests

* fix unit tests prometheus

* fix unit test prom
2024-10-24 16:41:09 +04:00

344 lines
12 KiB
Python

import io
import os
import sys
sys.path.insert(0, os.path.abspath("../.."))
import asyncio
import logging
import uuid
import pytest
from prometheus_client import REGISTRY, CollectorRegistry
import litellm
from litellm import completion
from litellm._logging import verbose_logger
from litellm.integrations.prometheus import PrometheusLogger
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.types.utils import (
StandardLoggingPayload,
StandardLoggingMetadata,
StandardLoggingHiddenParams,
StandardLoggingModelInformation,
)
import pytest
from unittest.mock import MagicMock, patch
from datetime import datetime, timedelta
from litellm.integrations.prometheus import PrometheusLogger
verbose_logger.setLevel(logging.DEBUG)
litellm.set_verbose = True
import time
@pytest.fixture
def prometheus_logger():
collectors = list(REGISTRY._collector_to_names.keys())
for collector in collectors:
REGISTRY.unregister(collector)
return PrometheusLogger()
def create_standard_logging_payload() -> StandardLoggingPayload:
return StandardLoggingPayload(
id="test_id",
call_type="completion",
response_cost=0.1,
response_cost_failure_debug_info=None,
status="success",
total_tokens=30,
prompt_tokens=20,
completion_tokens=10,
startTime=1234567890.0,
endTime=1234567891.0,
completionStartTime=1234567890.5,
model_map_information=StandardLoggingModelInformation(
model_map_key="gpt-3.5-turbo", model_map_value=None
),
model="gpt-3.5-turbo",
model_id="model-123",
model_group="openai-gpt",
api_base="https://api.openai.com",
metadata=StandardLoggingMetadata(
user_api_key_hash="test_hash",
user_api_key_alias="test_alias",
user_api_key_team_id="test_team",
user_api_key_user_id="test_user",
user_api_key_team_alias="test_team_alias",
spend_logs_metadata=None,
requester_ip_address="127.0.0.1",
requester_metadata=None,
),
cache_hit=False,
cache_key=None,
saved_cache_cost=0.0,
request_tags=[],
end_user=None,
requester_ip_address="127.0.0.1",
messages=[{"role": "user", "content": "Hello, world!"}],
response={"choices": [{"message": {"content": "Hi there!"}}]},
error_str=None,
model_parameters={"stream": True},
hidden_params=StandardLoggingHiddenParams(
model_id="model-123",
cache_key=None,
api_base="https://api.openai.com",
response_cost="0.1",
additional_headers=None,
),
)
def test_safe_get_remaining_budget(prometheus_logger):
assert prometheus_logger._safe_get_remaining_budget(100, 30) == 70
assert prometheus_logger._safe_get_remaining_budget(100, None) == 100
assert prometheus_logger._safe_get_remaining_budget(None, 30) == float("inf")
assert prometheus_logger._safe_get_remaining_budget(None, None) == float("inf")
@pytest.mark.asyncio
async def test_async_log_success_event(prometheus_logger):
standard_logging_object = create_standard_logging_payload()
kwargs = {
"model": "gpt-3.5-turbo",
"litellm_params": {
"metadata": {
"user_api_key": "test_key",
"user_api_key_user_id": "test_user",
"user_api_key_team_id": "test_team",
}
},
"start_time": datetime.now(),
"completion_start_time": datetime.now(),
"api_call_start_time": datetime.now(),
"end_time": datetime.now() + timedelta(seconds=1),
"standard_logging_object": standard_logging_object,
}
response_obj = MagicMock()
# Mock the prometheus client methods
# High Level Metrics - request/spend
prometheus_logger.litellm_requests_metric = MagicMock()
prometheus_logger.litellm_spend_metric = MagicMock()
# Token Metrics
prometheus_logger.litellm_tokens_metric = MagicMock()
prometheus_logger.litellm_input_tokens_metric = MagicMock()
prometheus_logger.litellm_output_tokens_metric = MagicMock()
# Remaining Budget Metrics
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
prometheus_logger.litellm_remaining_api_key_budget_metric = MagicMock()
# Virtual Key Rate limit Metrics
prometheus_logger.litellm_remaining_api_key_requests_for_model = MagicMock()
prometheus_logger.litellm_remaining_api_key_tokens_for_model = MagicMock()
# Latency Metrics
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
prometheus_logger.litellm_request_total_latency_metric = MagicMock()
await prometheus_logger.async_log_success_event(
kwargs, response_obj, kwargs["start_time"], kwargs["end_time"]
)
# Assert that the metrics were incremented
prometheus_logger.litellm_requests_metric.labels.assert_called()
prometheus_logger.litellm_spend_metric.labels.assert_called()
# Token Metrics
prometheus_logger.litellm_tokens_metric.labels.assert_called()
prometheus_logger.litellm_input_tokens_metric.labels.assert_called()
prometheus_logger.litellm_output_tokens_metric.labels.assert_called()
# Remaining Budget Metrics
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called()
prometheus_logger.litellm_remaining_api_key_budget_metric.labels.assert_called()
# Virtual Key Rate limit Metrics
prometheus_logger.litellm_remaining_api_key_requests_for_model.labels.assert_called()
prometheus_logger.litellm_remaining_api_key_tokens_for_model.labels.assert_called()
# Latency Metrics
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_called()
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called()
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called()
def test_increment_token_metrics(prometheus_logger):
"""
Test the increment_token_metrics method
input, output, and total tokens metrics are incremented by the values in the standard logging payload
"""
prometheus_logger.litellm_tokens_metric = MagicMock()
prometheus_logger.litellm_input_tokens_metric = MagicMock()
prometheus_logger.litellm_output_tokens_metric = MagicMock()
standard_logging_payload = create_standard_logging_payload()
standard_logging_payload["total_tokens"] = 100
standard_logging_payload["prompt_tokens"] = 50
standard_logging_payload["completion_tokens"] = 50
prometheus_logger._increment_token_metrics(
standard_logging_payload,
end_user_id="user1",
user_api_key="key1",
user_api_key_alias="alias1",
model="gpt-3.5-turbo",
user_api_team="team1",
user_api_team_alias="team_alias1",
user_id="user1",
)
prometheus_logger.litellm_tokens_metric.labels.assert_called_once_with(
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
)
prometheus_logger.litellm_tokens_metric.labels().inc.assert_called_once_with(100)
prometheus_logger.litellm_input_tokens_metric.labels.assert_called_once_with(
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
)
prometheus_logger.litellm_input_tokens_metric.labels().inc.assert_called_once_with(
50
)
prometheus_logger.litellm_output_tokens_metric.labels.assert_called_once_with(
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
)
prometheus_logger.litellm_output_tokens_metric.labels().inc.assert_called_once_with(
50
)
def test_increment_remaining_budget_metrics(prometheus_logger):
"""
Test the increment_remaining_budget_metrics method
team and api key budget metrics are set to the difference between max budget and spend
"""
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
prometheus_logger.litellm_remaining_api_key_budget_metric = MagicMock()
litellm_params = {
"metadata": {
"user_api_key_team_spend": 50,
"user_api_key_team_max_budget": 100,
"user_api_key_spend": 25,
"user_api_key_max_budget": 75,
}
}
prometheus_logger._increment_remaining_budget_metrics(
user_api_team="team1",
user_api_team_alias="team_alias1",
user_api_key="key1",
user_api_key_alias="alias1",
litellm_params=litellm_params,
)
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with(
"team1", "team_alias1"
)
prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with(
50
)
prometheus_logger.litellm_remaining_api_key_budget_metric.labels.assert_called_once_with(
"key1", "alias1"
)
prometheus_logger.litellm_remaining_api_key_budget_metric.labels().set.assert_called_once_with(
50
)
def test_set_latency_metrics(prometheus_logger):
"""
Test the set_latency_metrics method
time to first token, llm api latency, and request total latency metrics are set to the values in the standard logging payload
"""
standard_logging_payload = create_standard_logging_payload()
standard_logging_payload["model_parameters"] = {"stream": True}
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
prometheus_logger.litellm_request_total_latency_metric = MagicMock()
now = datetime.now()
kwargs = {
"end_time": now, # when the request ends
"start_time": now - timedelta(seconds=2), # when the request starts
"api_call_start_time": now - timedelta(seconds=1.5), # when the api call starts
"completion_start_time": now
- timedelta(seconds=1), # when the completion starts
}
prometheus_logger._set_latency_metrics(
kwargs=kwargs,
model="gpt-3.5-turbo",
user_api_key="key1",
user_api_key_alias="alias1",
user_api_team="team1",
user_api_team_alias="team_alias1",
standard_logging_payload=standard_logging_payload,
)
# completion_start_time - api_call_start_time
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_called_once_with(
"gpt-3.5-turbo", "key1", "alias1", "team1", "team_alias1"
)
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels().observe.assert_called_once_with(
0.5
)
# end_time - api_call_start_time
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called_once_with(
"gpt-3.5-turbo", "key1", "alias1", "team1", "team_alias1"
)
prometheus_logger.litellm_llm_api_latency_metric.labels().observe.assert_called_once_with(
1.5
)
# total latency for the request
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once_with(
"gpt-3.5-turbo", "key1", "alias1", "team1", "team_alias1"
)
prometheus_logger.litellm_request_total_latency_metric.labels().observe.assert_called_once_with(
2.0
)
def test_increment_top_level_request_and_spend_metrics(prometheus_logger):
"""
Test the increment_top_level_request_and_spend_metrics method
- litellm_requests_metric is incremented by 1
- litellm_spend_metric is incremented by the response cost in the standard logging payload
"""
prometheus_logger.litellm_requests_metric = MagicMock()
prometheus_logger.litellm_spend_metric = MagicMock()
prometheus_logger._increment_top_level_request_and_spend_metrics(
end_user_id="user1",
user_api_key="key1",
user_api_key_alias="alias1",
model="gpt-3.5-turbo",
user_api_team="team1",
user_api_team_alias="team_alias1",
user_id="user1",
response_cost=0.1,
)
prometheus_logger.litellm_requests_metric.labels.assert_called_once_with(
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
)
prometheus_logger.litellm_requests_metric.labels().inc.assert_called_once()
prometheus_logger.litellm_spend_metric.labels.assert_called_once_with(
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
)
prometheus_logger.litellm_spend_metric.labels().inc.assert_called_once_with(0.1)