mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
* refactor(prometheus.py): refactor to use a factory method for setting label values allows for enforcing end user id disabling on prometheus e2e * fix: fix linting error * fix(prometheus.py): ensure label factory drops end-user value if disabled by user * fix(prometheus.py): specify service_type in end user tracking get * test: fix test * test: add unit test for prometheus factory * test: improve test (cover flag not set scenario) * test(test_prometheus.py): e2e test covering if 'end_user_id' shows up in testing if disabled scrapes the `/metrics` endpoint and scans text to check if id appears in emitted metrics * fix(prometheus.py): stringify status code before logging it
822 lines
30 KiB
Python
822 lines
30 KiB
Python
import io
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.abspath("../.."))
|
|
|
|
import asyncio
|
|
import logging
|
|
import uuid
|
|
|
|
import pytest
|
|
from prometheus_client import REGISTRY, CollectorRegistry
|
|
|
|
import litellm
|
|
from litellm import completion
|
|
from litellm._logging import verbose_logger
|
|
from litellm.integrations.prometheus import PrometheusLogger
|
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
|
from litellm.types.utils import (
|
|
StandardLoggingPayload,
|
|
StandardLoggingMetadata,
|
|
StandardLoggingHiddenParams,
|
|
StandardLoggingModelInformation,
|
|
)
|
|
import pytest
|
|
from unittest.mock import MagicMock, patch
|
|
from datetime import datetime, timedelta
|
|
from litellm.integrations.prometheus import PrometheusLogger
|
|
from litellm.proxy._types import UserAPIKeyAuth
|
|
|
|
verbose_logger.setLevel(logging.DEBUG)
|
|
|
|
litellm.set_verbose = True
|
|
import time
|
|
|
|
|
|
@pytest.fixture
|
|
def prometheus_logger():
|
|
collectors = list(REGISTRY._collector_to_names.keys())
|
|
for collector in collectors:
|
|
REGISTRY.unregister(collector)
|
|
return PrometheusLogger()
|
|
|
|
|
|
def create_standard_logging_payload() -> StandardLoggingPayload:
|
|
return StandardLoggingPayload(
|
|
id="test_id",
|
|
call_type="completion",
|
|
stream=False,
|
|
response_cost=0.1,
|
|
response_cost_failure_debug_info=None,
|
|
status="success",
|
|
total_tokens=30,
|
|
prompt_tokens=20,
|
|
completion_tokens=10,
|
|
startTime=1234567890.0,
|
|
endTime=1234567891.0,
|
|
completionStartTime=1234567890.5,
|
|
model_map_information=StandardLoggingModelInformation(
|
|
model_map_key="gpt-3.5-turbo", model_map_value=None
|
|
),
|
|
model="gpt-3.5-turbo",
|
|
model_id="model-123",
|
|
model_group="openai-gpt",
|
|
custom_llm_provider="openai",
|
|
api_base="https://api.openai.com",
|
|
metadata=StandardLoggingMetadata(
|
|
user_api_key_hash="test_hash",
|
|
user_api_key_alias="test_alias",
|
|
user_api_key_team_id="test_team",
|
|
user_api_key_user_id="test_user",
|
|
user_api_key_team_alias="test_team_alias",
|
|
user_api_key_org_id=None,
|
|
spend_logs_metadata=None,
|
|
requester_ip_address="127.0.0.1",
|
|
requester_metadata=None,
|
|
user_api_key_end_user_id="test_end_user",
|
|
),
|
|
cache_hit=False,
|
|
cache_key=None,
|
|
saved_cache_cost=0.0,
|
|
request_tags=[],
|
|
end_user=None,
|
|
requester_ip_address="127.0.0.1",
|
|
messages=[{"role": "user", "content": "Hello, world!"}],
|
|
response={"choices": [{"message": {"content": "Hi there!"}}]},
|
|
error_str=None,
|
|
model_parameters={"stream": True},
|
|
hidden_params=StandardLoggingHiddenParams(
|
|
model_id="model-123",
|
|
cache_key=None,
|
|
api_base="https://api.openai.com",
|
|
response_cost="0.1",
|
|
additional_headers=None,
|
|
),
|
|
)
|
|
|
|
|
|
def test_safe_get_remaining_budget(prometheus_logger):
|
|
assert prometheus_logger._safe_get_remaining_budget(100, 30) == 70
|
|
assert prometheus_logger._safe_get_remaining_budget(100, None) == 100
|
|
assert prometheus_logger._safe_get_remaining_budget(None, 30) == float("inf")
|
|
assert prometheus_logger._safe_get_remaining_budget(None, None) == float("inf")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_log_success_event(prometheus_logger):
|
|
standard_logging_object = create_standard_logging_payload()
|
|
kwargs = {
|
|
"model": "gpt-3.5-turbo",
|
|
"litellm_params": {
|
|
"metadata": {
|
|
"user_api_key": "test_key",
|
|
"user_api_key_user_id": "test_user",
|
|
"user_api_key_team_id": "test_team",
|
|
"user_api_key_end_user_id": "test_end_user",
|
|
}
|
|
},
|
|
"start_time": datetime.now(),
|
|
"completion_start_time": datetime.now(),
|
|
"api_call_start_time": datetime.now(),
|
|
"end_time": datetime.now() + timedelta(seconds=1),
|
|
"standard_logging_object": standard_logging_object,
|
|
}
|
|
response_obj = MagicMock()
|
|
|
|
# Mock the prometheus client methods
|
|
|
|
# High Level Metrics - request/spend
|
|
prometheus_logger.litellm_requests_metric = MagicMock()
|
|
prometheus_logger.litellm_spend_metric = MagicMock()
|
|
|
|
# Token Metrics
|
|
prometheus_logger.litellm_tokens_metric = MagicMock()
|
|
prometheus_logger.litellm_input_tokens_metric = MagicMock()
|
|
prometheus_logger.litellm_output_tokens_metric = MagicMock()
|
|
|
|
# Remaining Budget Metrics
|
|
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
|
|
prometheus_logger.litellm_remaining_api_key_budget_metric = MagicMock()
|
|
|
|
# Virtual Key Rate limit Metrics
|
|
prometheus_logger.litellm_remaining_api_key_requests_for_model = MagicMock()
|
|
prometheus_logger.litellm_remaining_api_key_tokens_for_model = MagicMock()
|
|
|
|
# Latency Metrics
|
|
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
|
|
prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
|
|
prometheus_logger.litellm_request_total_latency_metric = MagicMock()
|
|
|
|
await prometheus_logger.async_log_success_event(
|
|
kwargs, response_obj, kwargs["start_time"], kwargs["end_time"]
|
|
)
|
|
|
|
# Assert that the metrics were incremented
|
|
prometheus_logger.litellm_requests_metric.labels.assert_called()
|
|
prometheus_logger.litellm_spend_metric.labels.assert_called()
|
|
|
|
# Token Metrics
|
|
prometheus_logger.litellm_tokens_metric.labels.assert_called()
|
|
prometheus_logger.litellm_input_tokens_metric.labels.assert_called()
|
|
prometheus_logger.litellm_output_tokens_metric.labels.assert_called()
|
|
|
|
# Remaining Budget Metrics
|
|
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called()
|
|
prometheus_logger.litellm_remaining_api_key_budget_metric.labels.assert_called()
|
|
|
|
# Virtual Key Rate limit Metrics
|
|
prometheus_logger.litellm_remaining_api_key_requests_for_model.labels.assert_called()
|
|
prometheus_logger.litellm_remaining_api_key_tokens_for_model.labels.assert_called()
|
|
|
|
# Latency Metrics
|
|
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_called()
|
|
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called()
|
|
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called()
|
|
|
|
|
|
def test_increment_token_metrics(prometheus_logger):
|
|
"""
|
|
Test the increment_token_metrics method
|
|
|
|
input, output, and total tokens metrics are incremented by the values in the standard logging payload
|
|
"""
|
|
prometheus_logger.litellm_tokens_metric = MagicMock()
|
|
prometheus_logger.litellm_input_tokens_metric = MagicMock()
|
|
prometheus_logger.litellm_output_tokens_metric = MagicMock()
|
|
|
|
standard_logging_payload = create_standard_logging_payload()
|
|
standard_logging_payload["total_tokens"] = 100
|
|
standard_logging_payload["prompt_tokens"] = 50
|
|
standard_logging_payload["completion_tokens"] = 50
|
|
|
|
prometheus_logger._increment_token_metrics(
|
|
standard_logging_payload,
|
|
end_user_id="user1",
|
|
user_api_key="key1",
|
|
user_api_key_alias="alias1",
|
|
model="gpt-3.5-turbo",
|
|
user_api_team="team1",
|
|
user_api_team_alias="team_alias1",
|
|
user_id="user1",
|
|
)
|
|
|
|
prometheus_logger.litellm_tokens_metric.labels.assert_called_once_with(
|
|
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
|
|
)
|
|
prometheus_logger.litellm_tokens_metric.labels().inc.assert_called_once_with(100)
|
|
|
|
prometheus_logger.litellm_input_tokens_metric.labels.assert_called_once_with(
|
|
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
|
|
)
|
|
prometheus_logger.litellm_input_tokens_metric.labels().inc.assert_called_once_with(
|
|
50
|
|
)
|
|
|
|
prometheus_logger.litellm_output_tokens_metric.labels.assert_called_once_with(
|
|
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
|
|
)
|
|
prometheus_logger.litellm_output_tokens_metric.labels().inc.assert_called_once_with(
|
|
50
|
|
)
|
|
|
|
|
|
def test_increment_remaining_budget_metrics(prometheus_logger):
|
|
"""
|
|
Test the increment_remaining_budget_metrics method
|
|
|
|
team and api key budget metrics are set to the difference between max budget and spend
|
|
"""
|
|
prometheus_logger.litellm_remaining_team_budget_metric = MagicMock()
|
|
prometheus_logger.litellm_remaining_api_key_budget_metric = MagicMock()
|
|
|
|
litellm_params = {
|
|
"metadata": {
|
|
"user_api_key_team_spend": 50,
|
|
"user_api_key_team_max_budget": 100,
|
|
"user_api_key_spend": 25,
|
|
"user_api_key_max_budget": 75,
|
|
}
|
|
}
|
|
|
|
prometheus_logger._increment_remaining_budget_metrics(
|
|
user_api_team="team1",
|
|
user_api_team_alias="team_alias1",
|
|
user_api_key="key1",
|
|
user_api_key_alias="alias1",
|
|
litellm_params=litellm_params,
|
|
)
|
|
|
|
prometheus_logger.litellm_remaining_team_budget_metric.labels.assert_called_once_with(
|
|
"team1", "team_alias1"
|
|
)
|
|
prometheus_logger.litellm_remaining_team_budget_metric.labels().set.assert_called_once_with(
|
|
50
|
|
)
|
|
|
|
prometheus_logger.litellm_remaining_api_key_budget_metric.labels.assert_called_once_with(
|
|
"key1", "alias1"
|
|
)
|
|
prometheus_logger.litellm_remaining_api_key_budget_metric.labels().set.assert_called_once_with(
|
|
50
|
|
)
|
|
|
|
|
|
def test_set_latency_metrics(prometheus_logger):
|
|
"""
|
|
Test the set_latency_metrics method
|
|
|
|
time to first token, llm api latency, and request total latency metrics are set to the values in the standard logging payload
|
|
"""
|
|
standard_logging_payload = create_standard_logging_payload()
|
|
standard_logging_payload["model_parameters"] = {"stream": True}
|
|
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
|
|
prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
|
|
prometheus_logger.litellm_request_total_latency_metric = MagicMock()
|
|
|
|
now = datetime.now()
|
|
kwargs = {
|
|
"end_time": now, # when the request ends
|
|
"start_time": now - timedelta(seconds=2), # when the request starts
|
|
"api_call_start_time": now - timedelta(seconds=1.5), # when the api call starts
|
|
"completion_start_time": now
|
|
- timedelta(seconds=1), # when the completion starts
|
|
}
|
|
|
|
prometheus_logger._set_latency_metrics(
|
|
kwargs=kwargs,
|
|
model="gpt-3.5-turbo",
|
|
user_api_key="key1",
|
|
user_api_key_alias="alias1",
|
|
user_api_team="team1",
|
|
user_api_team_alias="team_alias1",
|
|
standard_logging_payload=standard_logging_payload,
|
|
)
|
|
|
|
# completion_start_time - api_call_start_time
|
|
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_called_once_with(
|
|
"gpt-3.5-turbo", "key1", "alias1", "team1", "team_alias1"
|
|
)
|
|
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels().observe.assert_called_once_with(
|
|
0.5
|
|
)
|
|
|
|
# end_time - api_call_start_time
|
|
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called_once_with(
|
|
model="gpt-3.5-turbo",
|
|
hashed_api_key="key1",
|
|
api_key_alias="alias1",
|
|
team="team1",
|
|
team_alias="team_alias1",
|
|
user="test_user",
|
|
end_user="test_end_user",
|
|
requested_model="openai-gpt",
|
|
)
|
|
prometheus_logger.litellm_llm_api_latency_metric.labels().observe.assert_called_once_with(
|
|
1.5
|
|
)
|
|
|
|
# total latency for the request
|
|
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once_with(
|
|
end_user="test_end_user",
|
|
hashed_api_key="key1",
|
|
api_key_alias="alias1",
|
|
requested_model="openai-gpt",
|
|
team="team1",
|
|
team_alias="team_alias1",
|
|
user="test_user",
|
|
model="gpt-3.5-turbo",
|
|
)
|
|
prometheus_logger.litellm_request_total_latency_metric.labels().observe.assert_called_once_with(
|
|
2.0
|
|
)
|
|
|
|
|
|
def test_increment_top_level_request_and_spend_metrics(prometheus_logger):
|
|
"""
|
|
Test the increment_top_level_request_and_spend_metrics method
|
|
|
|
- litellm_requests_metric is incremented by 1
|
|
- litellm_spend_metric is incremented by the response cost in the standard logging payload
|
|
"""
|
|
prometheus_logger.litellm_requests_metric = MagicMock()
|
|
prometheus_logger.litellm_spend_metric = MagicMock()
|
|
|
|
prometheus_logger._increment_top_level_request_and_spend_metrics(
|
|
end_user_id="user1",
|
|
user_api_key="key1",
|
|
user_api_key_alias="alias1",
|
|
model="gpt-3.5-turbo",
|
|
user_api_team="team1",
|
|
user_api_team_alias="team_alias1",
|
|
user_id="user1",
|
|
response_cost=0.1,
|
|
)
|
|
|
|
prometheus_logger.litellm_requests_metric.labels.assert_called_once_with(
|
|
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
|
|
)
|
|
prometheus_logger.litellm_requests_metric.labels().inc.assert_called_once()
|
|
|
|
prometheus_logger.litellm_spend_metric.labels.assert_called_once_with(
|
|
"user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
|
|
)
|
|
prometheus_logger.litellm_spend_metric.labels().inc.assert_called_once_with(0.1)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_log_failure_event(prometheus_logger):
|
|
# NOTE: almost all params for this metric are read from standard logging payload
|
|
standard_logging_object = create_standard_logging_payload()
|
|
kwargs = {
|
|
"model": "gpt-3.5-turbo",
|
|
"litellm_params": {
|
|
"custom_llm_provider": "openai",
|
|
},
|
|
"start_time": datetime.now(),
|
|
"completion_start_time": datetime.now(),
|
|
"api_call_start_time": datetime.now(),
|
|
"end_time": datetime.now() + timedelta(seconds=1),
|
|
"standard_logging_object": standard_logging_object,
|
|
"exception": Exception("Test error"),
|
|
}
|
|
response_obj = MagicMock()
|
|
|
|
# Mock the metrics
|
|
prometheus_logger.litellm_llm_api_failed_requests_metric = MagicMock()
|
|
prometheus_logger.litellm_deployment_failure_responses = MagicMock()
|
|
prometheus_logger.litellm_deployment_total_requests = MagicMock()
|
|
prometheus_logger.set_deployment_partial_outage = MagicMock()
|
|
|
|
await prometheus_logger.async_log_failure_event(
|
|
kwargs, response_obj, kwargs["start_time"], kwargs["end_time"]
|
|
)
|
|
|
|
# litellm_llm_api_failed_requests_metric incremented
|
|
"""
|
|
Expected metrics
|
|
end_user_id,
|
|
user_api_key,
|
|
user_api_key_alias,
|
|
model,
|
|
user_api_team,
|
|
user_api_team_alias,
|
|
user_id,
|
|
"""
|
|
prometheus_logger.litellm_llm_api_failed_requests_metric.labels.assert_called_once_with(
|
|
None,
|
|
"test_hash",
|
|
"test_alias",
|
|
"gpt-3.5-turbo",
|
|
"test_team",
|
|
"test_team_alias",
|
|
"test_user",
|
|
)
|
|
prometheus_logger.litellm_llm_api_failed_requests_metric.labels().inc.assert_called_once()
|
|
|
|
# deployment should be marked in partial outage
|
|
prometheus_logger.set_deployment_partial_outage.assert_called_once_with(
|
|
litellm_model_name="gpt-3.5-turbo",
|
|
model_id="model-123",
|
|
api_base="https://api.openai.com",
|
|
api_provider="openai",
|
|
)
|
|
|
|
# deployment failure responses incremented
|
|
prometheus_logger.litellm_deployment_failure_responses.labels.assert_called_once_with(
|
|
litellm_model_name="gpt-3.5-turbo",
|
|
model_id="model-123",
|
|
api_base="https://api.openai.com",
|
|
api_provider="openai",
|
|
exception_status="None",
|
|
exception_class="Exception",
|
|
requested_model="openai-gpt", # passed in standard logging payload
|
|
hashed_api_key="test_hash",
|
|
api_key_alias="test_alias",
|
|
team="test_team",
|
|
team_alias="test_team_alias",
|
|
)
|
|
prometheus_logger.litellm_deployment_failure_responses.labels().inc.assert_called_once()
|
|
|
|
# deployment total requests incremented
|
|
prometheus_logger.litellm_deployment_total_requests.labels.assert_called_once_with(
|
|
litellm_model_name="gpt-3.5-turbo",
|
|
model_id="model-123",
|
|
api_base="https://api.openai.com",
|
|
api_provider="openai",
|
|
requested_model="openai-gpt", # passed in standard logging payload
|
|
hashed_api_key="test_hash",
|
|
api_key_alias="test_alias",
|
|
team="test_team",
|
|
team_alias="test_team_alias",
|
|
)
|
|
prometheus_logger.litellm_deployment_total_requests.labels().inc.assert_called_once()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_post_call_failure_hook(prometheus_logger):
|
|
"""
|
|
Test for the async_post_call_failure_hook method
|
|
|
|
it should increment the litellm_proxy_failed_requests_metric and litellm_proxy_total_requests_metric
|
|
"""
|
|
# Mock the prometheus metrics
|
|
prometheus_logger.litellm_proxy_failed_requests_metric = MagicMock()
|
|
prometheus_logger.litellm_proxy_total_requests_metric = MagicMock()
|
|
|
|
# Create test data
|
|
request_data = {"model": "gpt-3.5-turbo"}
|
|
|
|
original_exception = litellm.RateLimitError(
|
|
message="Test error", llm_provider="openai", model="gpt-3.5-turbo"
|
|
)
|
|
|
|
user_api_key_dict = UserAPIKeyAuth(
|
|
api_key="test_key",
|
|
key_alias="test_alias",
|
|
team_id="test_team",
|
|
team_alias="test_team_alias",
|
|
user_id="test_user",
|
|
end_user_id="test_end_user",
|
|
)
|
|
|
|
# Call the function
|
|
await prometheus_logger.async_post_call_failure_hook(
|
|
request_data=request_data,
|
|
original_exception=original_exception,
|
|
user_api_key_dict=user_api_key_dict,
|
|
)
|
|
|
|
# Assert failed requests metric was incremented with correct labels
|
|
prometheus_logger.litellm_proxy_failed_requests_metric.labels.assert_called_once_with(
|
|
end_user="test_end_user",
|
|
hashed_api_key="test_key",
|
|
api_key_alias="test_alias",
|
|
requested_model="gpt-3.5-turbo",
|
|
team="test_team",
|
|
team_alias="test_team_alias",
|
|
user="test_user",
|
|
exception_status=429,
|
|
exception_class="RateLimitError",
|
|
)
|
|
prometheus_logger.litellm_proxy_failed_requests_metric.labels().inc.assert_called_once()
|
|
|
|
# Assert total requests metric was incremented with correct labels
|
|
prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with(
|
|
end_user="test_end_user",
|
|
hashed_api_key="test_key",
|
|
api_key_alias="test_alias",
|
|
requested_model="gpt-3.5-turbo",
|
|
team="test_team",
|
|
team_alias="test_team_alias",
|
|
user="test_user",
|
|
status_code="429",
|
|
)
|
|
prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_post_call_success_hook(prometheus_logger):
|
|
"""
|
|
Test for the async_post_call_success_hook method
|
|
|
|
it should increment the litellm_proxy_total_requests_metric
|
|
"""
|
|
# Mock the prometheus metric
|
|
prometheus_logger.litellm_proxy_total_requests_metric = MagicMock()
|
|
|
|
# Create test data
|
|
data = {"model": "gpt-3.5-turbo"}
|
|
|
|
user_api_key_dict = UserAPIKeyAuth(
|
|
api_key="test_key",
|
|
key_alias="test_alias",
|
|
team_id="test_team",
|
|
team_alias="test_team_alias",
|
|
user_id="test_user",
|
|
end_user_id="test_end_user",
|
|
)
|
|
|
|
response = {"choices": [{"message": {"content": "test response"}}]}
|
|
|
|
# Call the function
|
|
await prometheus_logger.async_post_call_success_hook(
|
|
data=data, user_api_key_dict=user_api_key_dict, response=response
|
|
)
|
|
|
|
# Assert total requests metric was incremented with correct labels
|
|
prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with(
|
|
end_user="test_end_user",
|
|
hashed_api_key="test_key",
|
|
api_key_alias="test_alias",
|
|
requested_model="gpt-3.5-turbo",
|
|
team="test_team",
|
|
team_alias="test_team_alias",
|
|
user="test_user",
|
|
status_code="200",
|
|
)
|
|
prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once()
|
|
|
|
|
|
def test_set_llm_deployment_success_metrics(prometheus_logger):
|
|
# Mock all the metrics used in the method
|
|
prometheus_logger.litellm_remaining_requests_metric = MagicMock()
|
|
prometheus_logger.litellm_remaining_tokens_metric = MagicMock()
|
|
prometheus_logger.litellm_deployment_success_responses = MagicMock()
|
|
prometheus_logger.litellm_deployment_total_requests = MagicMock()
|
|
prometheus_logger.litellm_deployment_latency_per_output_token = MagicMock()
|
|
prometheus_logger.set_deployment_healthy = MagicMock()
|
|
|
|
standard_logging_payload = create_standard_logging_payload()
|
|
|
|
standard_logging_payload["hidden_params"]["additional_headers"] = {
|
|
"x_ratelimit_remaining_requests": 123,
|
|
"x_ratelimit_remaining_tokens": 4321,
|
|
}
|
|
|
|
# Create test data
|
|
request_kwargs = {
|
|
"model": "gpt-3.5-turbo",
|
|
"litellm_params": {
|
|
"custom_llm_provider": "openai",
|
|
"metadata": {"model_info": {"id": "model-123"}},
|
|
},
|
|
"standard_logging_object": standard_logging_payload,
|
|
}
|
|
|
|
start_time = datetime.now()
|
|
end_time = start_time + timedelta(seconds=1)
|
|
output_tokens = 10
|
|
|
|
# Call the function
|
|
prometheus_logger.set_llm_deployment_success_metrics(
|
|
request_kwargs=request_kwargs,
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
output_tokens=output_tokens,
|
|
)
|
|
|
|
# Verify remaining requests metric
|
|
prometheus_logger.litellm_remaining_requests_metric.labels.assert_called_once_with(
|
|
"openai-gpt", # model_group / requested model from create_standard_logging_payload()
|
|
"openai", # llm provider
|
|
"https://api.openai.com", # api base
|
|
"gpt-3.5-turbo", # actual model used - litellm model name
|
|
standard_logging_payload["metadata"]["user_api_key_hash"],
|
|
standard_logging_payload["metadata"]["user_api_key_alias"],
|
|
)
|
|
prometheus_logger.litellm_remaining_requests_metric.labels().set.assert_called_once_with(
|
|
123
|
|
)
|
|
|
|
# Verify remaining tokens metric
|
|
prometheus_logger.litellm_remaining_tokens_metric.labels.assert_called_once_with(
|
|
"openai-gpt", # model_group / requested model from create_standard_logging_payload()
|
|
"openai", # llm provider
|
|
"https://api.openai.com", # api base
|
|
"gpt-3.5-turbo", # actual model used - litellm model name
|
|
standard_logging_payload["metadata"]["user_api_key_hash"],
|
|
standard_logging_payload["metadata"]["user_api_key_alias"],
|
|
)
|
|
prometheus_logger.litellm_remaining_tokens_metric.labels().set.assert_called_once_with(
|
|
4321
|
|
)
|
|
|
|
# Verify deployment healthy state
|
|
prometheus_logger.set_deployment_healthy.assert_called_once_with(
|
|
litellm_model_name="gpt-3.5-turbo",
|
|
model_id="model-123",
|
|
api_base="https://api.openai.com",
|
|
api_provider="openai",
|
|
)
|
|
|
|
# Verify success responses metric
|
|
prometheus_logger.litellm_deployment_success_responses.labels.assert_called_once_with(
|
|
litellm_model_name="gpt-3.5-turbo",
|
|
model_id="model-123",
|
|
api_base="https://api.openai.com",
|
|
api_provider="openai",
|
|
requested_model="openai-gpt", # requested model from create_standard_logging_payload()
|
|
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
|
|
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
|
|
team=standard_logging_payload["metadata"]["user_api_key_team_id"],
|
|
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
|
|
)
|
|
prometheus_logger.litellm_deployment_success_responses.labels().inc.assert_called_once()
|
|
|
|
# Verify total requests metric
|
|
prometheus_logger.litellm_deployment_total_requests.labels.assert_called_once_with(
|
|
litellm_model_name="gpt-3.5-turbo",
|
|
model_id="model-123",
|
|
api_base="https://api.openai.com",
|
|
api_provider="openai",
|
|
requested_model="openai-gpt", # requested model from create_standard_logging_payload()
|
|
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
|
|
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
|
|
team=standard_logging_payload["metadata"]["user_api_key_team_id"],
|
|
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
|
|
)
|
|
prometheus_logger.litellm_deployment_total_requests.labels().inc.assert_called_once()
|
|
|
|
# Verify latency per output token metric
|
|
prometheus_logger.litellm_deployment_latency_per_output_token.labels.assert_called_once_with(
|
|
litellm_model_name="gpt-3.5-turbo",
|
|
model_id="model-123",
|
|
api_base="https://api.openai.com",
|
|
api_provider="openai",
|
|
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
|
|
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
|
|
team=standard_logging_payload["metadata"]["user_api_key_team_id"],
|
|
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
|
|
)
|
|
# Calculate expected latency per token (1 second / 10 tokens = 0.1 seconds per token)
|
|
expected_latency_per_token = 0.1
|
|
prometheus_logger.litellm_deployment_latency_per_output_token.labels().observe.assert_called_once_with(
|
|
expected_latency_per_token
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_log_success_fallback_event(prometheus_logger):
|
|
prometheus_logger.litellm_deployment_successful_fallbacks = MagicMock()
|
|
|
|
original_model_group = "gpt-3.5-turbo"
|
|
kwargs = {
|
|
"model": "gpt-4",
|
|
"metadata": {
|
|
"user_api_key_hash": "test_hash",
|
|
"user_api_key_alias": "test_alias",
|
|
"user_api_key_team_id": "test_team",
|
|
"user_api_key_team_alias": "test_team_alias",
|
|
},
|
|
}
|
|
original_exception = litellm.RateLimitError(
|
|
message="Test error", llm_provider="openai", model="gpt-3.5-turbo"
|
|
)
|
|
|
|
await prometheus_logger.log_success_fallback_event(
|
|
original_model_group=original_model_group,
|
|
kwargs=kwargs,
|
|
original_exception=original_exception,
|
|
)
|
|
|
|
prometheus_logger.litellm_deployment_successful_fallbacks.labels.assert_called_once_with(
|
|
requested_model=original_model_group,
|
|
fallback_model="gpt-4",
|
|
hashed_api_key="test_hash",
|
|
api_key_alias="test_alias",
|
|
team="test_team",
|
|
team_alias="test_team_alias",
|
|
exception_status="429",
|
|
exception_class="RateLimitError",
|
|
)
|
|
prometheus_logger.litellm_deployment_successful_fallbacks.labels().inc.assert_called_once()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_log_failure_fallback_event(prometheus_logger):
|
|
prometheus_logger.litellm_deployment_failed_fallbacks = MagicMock()
|
|
|
|
original_model_group = "gpt-3.5-turbo"
|
|
kwargs = {
|
|
"model": "gpt-4",
|
|
"metadata": {
|
|
"user_api_key_hash": "test_hash",
|
|
"user_api_key_alias": "test_alias",
|
|
"user_api_key_team_id": "test_team",
|
|
"user_api_key_team_alias": "test_team_alias",
|
|
},
|
|
}
|
|
original_exception = litellm.RateLimitError(
|
|
message="Test error", llm_provider="openai", model="gpt-3.5-turbo"
|
|
)
|
|
|
|
await prometheus_logger.log_failure_fallback_event(
|
|
original_model_group=original_model_group,
|
|
kwargs=kwargs,
|
|
original_exception=original_exception,
|
|
)
|
|
|
|
prometheus_logger.litellm_deployment_failed_fallbacks.labels.assert_called_once_with(
|
|
requested_model=original_model_group,
|
|
fallback_model="gpt-4",
|
|
hashed_api_key="test_hash",
|
|
api_key_alias="test_alias",
|
|
team="test_team",
|
|
team_alias="test_team_alias",
|
|
exception_status="429",
|
|
exception_class="RateLimitError",
|
|
)
|
|
prometheus_logger.litellm_deployment_failed_fallbacks.labels().inc.assert_called_once()
|
|
|
|
|
|
def test_deployment_state_management(prometheus_logger):
|
|
prometheus_logger.litellm_deployment_state = MagicMock()
|
|
|
|
test_params = {
|
|
"litellm_model_name": "gpt-3.5-turbo",
|
|
"model_id": "model-123",
|
|
"api_base": "https://api.openai.com",
|
|
"api_provider": "openai",
|
|
}
|
|
|
|
# Test set_deployment_healthy (state=0)
|
|
prometheus_logger.set_deployment_healthy(**test_params)
|
|
prometheus_logger.litellm_deployment_state.labels.assert_called_with(
|
|
test_params["litellm_model_name"],
|
|
test_params["model_id"],
|
|
test_params["api_base"],
|
|
test_params["api_provider"],
|
|
)
|
|
prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(0)
|
|
|
|
# Test set_deployment_partial_outage (state=1)
|
|
prometheus_logger.set_deployment_partial_outage(**test_params)
|
|
prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(1)
|
|
|
|
# Test set_deployment_complete_outage (state=2)
|
|
prometheus_logger.set_deployment_complete_outage(**test_params)
|
|
prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(2)
|
|
|
|
|
|
def test_increment_deployment_cooled_down(prometheus_logger):
|
|
prometheus_logger.litellm_deployment_cooled_down = MagicMock()
|
|
|
|
prometheus_logger.increment_deployment_cooled_down(
|
|
litellm_model_name="gpt-3.5-turbo",
|
|
model_id="model-123",
|
|
api_base="https://api.openai.com",
|
|
api_provider="openai",
|
|
exception_status="429",
|
|
)
|
|
|
|
prometheus_logger.litellm_deployment_cooled_down.labels.assert_called_once_with(
|
|
"gpt-3.5-turbo", "model-123", "https://api.openai.com", "openai", "429"
|
|
)
|
|
prometheus_logger.litellm_deployment_cooled_down.labels().inc.assert_called_once()
|
|
|
|
|
|
@pytest.mark.parametrize("disable_end_user_tracking", [True, False])
|
|
def test_prometheus_factory(monkeypatch, disable_end_user_tracking):
|
|
from litellm.integrations.prometheus import prometheus_label_factory
|
|
from litellm.types.integrations.prometheus import UserAPIKeyLabelValues
|
|
|
|
monkeypatch.setattr(
|
|
"litellm.disable_end_user_cost_tracking_prometheus_only",
|
|
disable_end_user_tracking,
|
|
)
|
|
|
|
enum_values = UserAPIKeyLabelValues(
|
|
end_user="test_end_user",
|
|
api_key_hash="test_hash",
|
|
api_key_alias="test_alias",
|
|
)
|
|
supported_labels = ["end_user", "api_key_hash", "api_key_alias"]
|
|
returned_dict = prometheus_label_factory(
|
|
supported_enum_labels=supported_labels, enum_values=enum_values
|
|
)
|
|
|
|
if disable_end_user_tracking:
|
|
assert returned_dict["end_user"] == None
|
|
else:
|
|
assert returned_dict["end_user"] == "test_end_user"
|