(testing) increase prometheus.py test coverage to 90% (#6466)

* testing for failure events prometheus * set set_llm_deployment_failure_metrics * test_async_post_call_failure_hook * unit testing for all prometheus functions * fix linting
2024-10-28 18:08:05 +04:00 · 2024-10-28 18:08:05 +04:00 · 151991c66d
commit 151991c66d
parent fb9fb3467d
2 changed files with 461 additions and 17 deletions
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -397,7 +397,10 @@ class PrometheusLogger(CustomLogger):

        # input, output, total token metrics
        self._increment_token_metrics(
-            standard_logging_payload=standard_logging_payload,
+            # why type ignore below?
+            # 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
+            # 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
+            standard_logging_payload=standard_logging_payload,  # type: ignore
            end_user_id=end_user_id,
            user_api_key=user_api_key,
            user_api_key_alias=user_api_key_alias,
@ -432,7 +435,10 @@ class PrometheusLogger(CustomLogger):
            user_api_key_alias=user_api_key_alias,
            user_api_team=user_api_team,
            user_api_team_alias=user_api_team_alias,
-            standard_logging_payload=standard_logging_payload,
+            # why type ignore below?
+            # 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
+            # 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
+            standard_logging_payload=standard_logging_payload,  # type: ignore
        )

        # set x-ratelimit headers
@ -757,24 +763,31 @@ class PrometheusLogger(CustomLogger):
            pass

    def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
+        """
+        Sets Failure metrics when an LLM API call fails
+
+        - mark the deployment as partial outage
+        - increment deployment failure responses metric
+        - increment deployment total requests metric
+
+        Args:
+            request_kwargs: dict
+
+        """
        try:
            verbose_logger.debug("setting remaining tokens requests metric")
            standard_logging_payload: StandardLoggingPayload = request_kwargs.get(
                "standard_logging_object", {}
            )
-            _response_headers = request_kwargs.get("response_headers")
            _litellm_params = request_kwargs.get("litellm_params", {}) or {}
-            _metadata = _litellm_params.get("metadata", {})
            litellm_model_name = request_kwargs.get("model", None)
-            api_base = _metadata.get("api_base", None)
-            model_group = _metadata.get("model_group", None)
-            if api_base is None:
-                api_base = _litellm_params.get("api_base", None)
-            llm_provider = _litellm_params.get("custom_llm_provider", None)
-            _model_info = _metadata.get("model_info") or {}
-            model_id = _model_info.get("id", None)
+            model_group = standard_logging_payload.get("model_group", None)
+            api_base = standard_logging_payload.get("api_base", None)
+            model_id = standard_logging_payload.get("model_id", None)
            exception: Exception = request_kwargs.get("exception", None)

+            llm_provider = _litellm_params.get("custom_llm_provider", None)
+
            """
            log these labels
            ["litellm_model_name", "model_id", "api_base", "api_provider"]
@ -1061,8 +1074,8 @@ class PrometheusLogger(CustomLogger):
        self,
        state: int,
        litellm_model_name: str,
-        model_id: str,
-        api_base: str,
+        model_id: Optional[str],
+        api_base: Optional[str],
        api_provider: str,
    ):
        self.litellm_deployment_state.labels(
@ -1083,8 +1096,8 @@ class PrometheusLogger(CustomLogger):
    def set_deployment_partial_outage(
        self,
        litellm_model_name: str,
-        model_id: str,
-        api_base: str,
+        model_id: Optional[str],
+        api_base: Optional[str],
        api_provider: str,
    ):
        self.set_litellm_deployment_state(
@ -1094,8 +1107,8 @@ class PrometheusLogger(CustomLogger):
    def set_deployment_complete_outage(
        self,
        litellm_model_name: str,
-        model_id: str,
-        api_base: str,
+        model_id: Optional[str],
+        api_base: Optional[str],
        api_provider: str,
    ):
        self.set_litellm_deployment_state(
--- a/tests/logging_callback_tests/test_prometheus_unit_tests.py
+++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py
@ -26,6 +26,7 @@ import pytest
 from unittest.mock import MagicMock, patch
 from datetime import datetime, timedelta
 from litellm.integrations.prometheus import PrometheusLogger
+from litellm.proxy._types import UserAPIKeyAuth

 verbose_logger.setLevel(logging.DEBUG)

@ -67,6 +68,7 @@ def create_standard_logging_payload() -> StandardLoggingPayload:
            user_api_key_team_id="test_team",
            user_api_key_user_id="test_user",
            user_api_key_team_alias="test_team_alias",
+            user_api_key_org_id=None,
            spend_logs_metadata=None,
            requester_ip_address="127.0.0.1",
            requester_metadata=None,
@ -342,3 +344,432 @@ def test_increment_top_level_request_and_spend_metrics(prometheus_logger):
        "user1", "key1", "alias1", "gpt-3.5-turbo", "team1", "team_alias1", "user1"
    )
    prometheus_logger.litellm_spend_metric.labels().inc.assert_called_once_with(0.1)
+
+
+@pytest.mark.asyncio
+async def test_async_log_failure_event(prometheus_logger):
+    # NOTE: almost all params for this metric are read from standard logging payload
+    standard_logging_object = create_standard_logging_payload()
+    kwargs = {
+        "model": "gpt-3.5-turbo",
+        "litellm_params": {
+            "custom_llm_provider": "openai",
+        },
+        "start_time": datetime.now(),
+        "completion_start_time": datetime.now(),
+        "api_call_start_time": datetime.now(),
+        "end_time": datetime.now() + timedelta(seconds=1),
+        "standard_logging_object": standard_logging_object,
+        "exception": Exception("Test error"),
+    }
+    response_obj = MagicMock()
+
+    # Mock the metrics
+    prometheus_logger.litellm_llm_api_failed_requests_metric = MagicMock()
+    prometheus_logger.litellm_deployment_failure_responses = MagicMock()
+    prometheus_logger.litellm_deployment_total_requests = MagicMock()
+    prometheus_logger.set_deployment_partial_outage = MagicMock()
+
+    await prometheus_logger.async_log_failure_event(
+        kwargs, response_obj, kwargs["start_time"], kwargs["end_time"]
+    )
+
+    # litellm_llm_api_failed_requests_metric incremented
+    """
+    Expected metrics
+    end_user_id,
+    user_api_key,
+    user_api_key_alias,
+    model,
+    user_api_team,
+    user_api_team_alias,
+    user_id,
+    """
+    prometheus_logger.litellm_llm_api_failed_requests_metric.labels.assert_called_once_with(
+        None,
+        "test_hash",
+        "test_alias",
+        "gpt-3.5-turbo",
+        "test_team",
+        "test_team_alias",
+        "test_user",
+    )
+    prometheus_logger.litellm_llm_api_failed_requests_metric.labels().inc.assert_called_once()
+
+    # deployment should be marked in partial outage
+    prometheus_logger.set_deployment_partial_outage.assert_called_once_with(
+        litellm_model_name="gpt-3.5-turbo",
+        model_id="model-123",
+        api_base="https://api.openai.com",
+        api_provider="openai",
+    )
+
+    # deployment failure responses incremented
+    prometheus_logger.litellm_deployment_failure_responses.labels.assert_called_once_with(
+        litellm_model_name="gpt-3.5-turbo",
+        model_id="model-123",
+        api_base="https://api.openai.com",
+        api_provider="openai",
+        exception_status="None",
+        exception_class="Exception",
+        requested_model="openai-gpt",  # passed in standard logging payload
+        hashed_api_key="test_hash",
+        api_key_alias="test_alias",
+        team="test_team",
+        team_alias="test_team_alias",
+    )
+    prometheus_logger.litellm_deployment_failure_responses.labels().inc.assert_called_once()
+
+    # deployment total requests incremented
+    prometheus_logger.litellm_deployment_total_requests.labels.assert_called_once_with(
+        litellm_model_name="gpt-3.5-turbo",
+        model_id="model-123",
+        api_base="https://api.openai.com",
+        api_provider="openai",
+        requested_model="openai-gpt",  # passed in standard logging payload
+        hashed_api_key="test_hash",
+        api_key_alias="test_alias",
+        team="test_team",
+        team_alias="test_team_alias",
+    )
+    prometheus_logger.litellm_deployment_total_requests.labels().inc.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_async_post_call_failure_hook(prometheus_logger):
+    """
+    Test for the async_post_call_failure_hook method
+
+    it should increment the litellm_proxy_failed_requests_metric and litellm_proxy_total_requests_metric
+    """
+    # Mock the prometheus metrics
+    prometheus_logger.litellm_proxy_failed_requests_metric = MagicMock()
+    prometheus_logger.litellm_proxy_total_requests_metric = MagicMock()
+
+    # Create test data
+    request_data = {"model": "gpt-3.5-turbo"}
+
+    original_exception = litellm.RateLimitError(
+        message="Test error", llm_provider="openai", model="gpt-3.5-turbo"
+    )
+
+    user_api_key_dict = UserAPIKeyAuth(
+        api_key="test_key",
+        key_alias="test_alias",
+        team_id="test_team",
+        team_alias="test_team_alias",
+        user_id="test_user",
+        end_user_id="test_end_user",
+    )
+
+    # Call the function
+    await prometheus_logger.async_post_call_failure_hook(
+        request_data=request_data,
+        original_exception=original_exception,
+        user_api_key_dict=user_api_key_dict,
+    )
+
+    # Assert failed requests metric was incremented with correct labels
+    prometheus_logger.litellm_proxy_failed_requests_metric.labels.assert_called_once_with(
+        end_user="test_end_user",
+        hashed_api_key="test_key",
+        api_key_alias="test_alias",
+        requested_model="gpt-3.5-turbo",
+        team="test_team",
+        team_alias="test_team_alias",
+        user="test_user",
+        exception_status=429,
+        exception_class="RateLimitError",
+    )
+    prometheus_logger.litellm_proxy_failed_requests_metric.labels().inc.assert_called_once()
+
+    # Assert total requests metric was incremented with correct labels
+    prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with(
+        "test_end_user",
+        "test_key",
+        "test_alias",
+        "gpt-3.5-turbo",
+        "test_team",
+        "test_team_alias",
+        "test_user",
+    )
+    prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_async_post_call_success_hook(prometheus_logger):
+    """
+    Test for the async_post_call_success_hook method
+
+    it should increment the litellm_proxy_total_requests_metric
+    """
+    # Mock the prometheus metric
+    prometheus_logger.litellm_proxy_total_requests_metric = MagicMock()
+
+    # Create test data
+    data = {"model": "gpt-3.5-turbo"}
+
+    user_api_key_dict = UserAPIKeyAuth(
+        api_key="test_key",
+        key_alias="test_alias",
+        team_id="test_team",
+        team_alias="test_team_alias",
+        user_id="test_user",
+        end_user_id="test_end_user",
+    )
+
+    response = {"choices": [{"message": {"content": "test response"}}]}
+
+    # Call the function
+    await prometheus_logger.async_post_call_success_hook(
+        data=data, user_api_key_dict=user_api_key_dict, response=response
+    )
+
+    # Assert total requests metric was incremented with correct labels
+    prometheus_logger.litellm_proxy_total_requests_metric.labels.assert_called_once_with(
+        "test_end_user",
+        "test_key",
+        "test_alias",
+        "gpt-3.5-turbo",
+        "test_team",
+        "test_team_alias",
+        "test_user",
+    )
+    prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once()
+
+
+def test_set_llm_deployment_success_metrics(prometheus_logger):
+    # Mock all the metrics used in the method
+    prometheus_logger.litellm_remaining_requests_metric = MagicMock()
+    prometheus_logger.litellm_remaining_tokens_metric = MagicMock()
+    prometheus_logger.litellm_deployment_success_responses = MagicMock()
+    prometheus_logger.litellm_deployment_total_requests = MagicMock()
+    prometheus_logger.litellm_deployment_latency_per_output_token = MagicMock()
+    prometheus_logger.set_deployment_healthy = MagicMock()
+
+    standard_logging_payload = create_standard_logging_payload()
+
+    # Create test data
+    request_kwargs = {
+        "model": "gpt-3.5-turbo",
+        "response_headers": {
+            "x-ratelimit-remaining-requests": 123,
+            "x-ratelimit-remaining-tokens": 4321,
+        },
+        "litellm_params": {
+            "custom_llm_provider": "openai",
+            "metadata": {"model_info": {"id": "model-123"}},
+        },
+        "standard_logging_object": standard_logging_payload,
+    }
+
+    start_time = datetime.now()
+    end_time = start_time + timedelta(seconds=1)
+    output_tokens = 10
+
+    # Call the function
+    prometheus_logger.set_llm_deployment_success_metrics(
+        request_kwargs=request_kwargs,
+        start_time=start_time,
+        end_time=end_time,
+        output_tokens=output_tokens,
+    )
+
+    # Verify remaining requests metric
+    prometheus_logger.litellm_remaining_requests_metric.labels.assert_called_once_with(
+        "openai-gpt",  # model_group / requested model from create_standard_logging_payload()
+        "openai",  # llm provider
+        "https://api.openai.com",  # api base
+        "gpt-3.5-turbo",  # actual model used - litellm model name
+        standard_logging_payload["metadata"]["user_api_key_hash"],
+        standard_logging_payload["metadata"]["user_api_key_alias"],
+    )
+    prometheus_logger.litellm_remaining_requests_metric.labels().set.assert_called_once_with(
+        123
+    )
+
+    # Verify remaining tokens metric
+    prometheus_logger.litellm_remaining_tokens_metric.labels.assert_called_once_with(
+        "openai-gpt",  # model_group / requested model from create_standard_logging_payload()
+        "openai",  # llm provider
+        "https://api.openai.com",  # api base
+        "gpt-3.5-turbo",  # actual model used - litellm model name
+        standard_logging_payload["metadata"]["user_api_key_hash"],
+        standard_logging_payload["metadata"]["user_api_key_alias"],
+    )
+    prometheus_logger.litellm_remaining_tokens_metric.labels().set.assert_called_once_with(
+        4321
+    )
+
+    # Verify deployment healthy state
+    prometheus_logger.set_deployment_healthy.assert_called_once_with(
+        litellm_model_name="gpt-3.5-turbo",
+        model_id="model-123",
+        api_base="https://api.openai.com",
+        api_provider="openai",
+    )
+
+    # Verify success responses metric
+    prometheus_logger.litellm_deployment_success_responses.labels.assert_called_once_with(
+        litellm_model_name="gpt-3.5-turbo",
+        model_id="model-123",
+        api_base="https://api.openai.com",
+        api_provider="openai",
+        requested_model="openai-gpt",  # requested model from create_standard_logging_payload()
+        hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
+        api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
+        team=standard_logging_payload["metadata"]["user_api_key_team_id"],
+        team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
+    )
+    prometheus_logger.litellm_deployment_success_responses.labels().inc.assert_called_once()
+
+    # Verify total requests metric
+    prometheus_logger.litellm_deployment_total_requests.labels.assert_called_once_with(
+        litellm_model_name="gpt-3.5-turbo",
+        model_id="model-123",
+        api_base="https://api.openai.com",
+        api_provider="openai",
+        requested_model="openai-gpt",  # requested model from create_standard_logging_payload()
+        hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
+        api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
+        team=standard_logging_payload["metadata"]["user_api_key_team_id"],
+        team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
+    )
+    prometheus_logger.litellm_deployment_total_requests.labels().inc.assert_called_once()
+
+    # Verify latency per output token metric
+    prometheus_logger.litellm_deployment_latency_per_output_token.labels.assert_called_once_with(
+        litellm_model_name="gpt-3.5-turbo",
+        model_id="model-123",
+        api_base="https://api.openai.com",
+        api_provider="openai",
+        hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
+        api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
+        team=standard_logging_payload["metadata"]["user_api_key_team_id"],
+        team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
+    )
+    # Calculate expected latency per token (1 second / 10 tokens = 0.1 seconds per token)
+    expected_latency_per_token = 0.1
+    prometheus_logger.litellm_deployment_latency_per_output_token.labels().observe.assert_called_once_with(
+        expected_latency_per_token
+    )
+
+
+@pytest.mark.asyncio
+async def test_log_success_fallback_event(prometheus_logger):
+    prometheus_logger.litellm_deployment_successful_fallbacks = MagicMock()
+
+    original_model_group = "gpt-3.5-turbo"
+    kwargs = {
+        "model": "gpt-4",
+        "metadata": {
+            "user_api_key_hash": "test_hash",
+            "user_api_key_alias": "test_alias",
+            "user_api_key_team_id": "test_team",
+            "user_api_key_team_alias": "test_team_alias",
+        },
+    }
+    original_exception = litellm.RateLimitError(
+        message="Test error", llm_provider="openai", model="gpt-3.5-turbo"
+    )
+
+    await prometheus_logger.log_success_fallback_event(
+        original_model_group=original_model_group,
+        kwargs=kwargs,
+        original_exception=original_exception,
+    )
+
+    prometheus_logger.litellm_deployment_successful_fallbacks.labels.assert_called_once_with(
+        requested_model=original_model_group,
+        fallback_model="gpt-4",
+        hashed_api_key="test_hash",
+        api_key_alias="test_alias",
+        team="test_team",
+        team_alias="test_team_alias",
+        exception_status="429",
+        exception_class="RateLimitError",
+    )
+    prometheus_logger.litellm_deployment_successful_fallbacks.labels().inc.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_log_failure_fallback_event(prometheus_logger):
+    prometheus_logger.litellm_deployment_failed_fallbacks = MagicMock()
+
+    original_model_group = "gpt-3.5-turbo"
+    kwargs = {
+        "model": "gpt-4",
+        "metadata": {
+            "user_api_key_hash": "test_hash",
+            "user_api_key_alias": "test_alias",
+            "user_api_key_team_id": "test_team",
+            "user_api_key_team_alias": "test_team_alias",
+        },
+    }
+    original_exception = litellm.RateLimitError(
+        message="Test error", llm_provider="openai", model="gpt-3.5-turbo"
+    )
+
+    await prometheus_logger.log_failure_fallback_event(
+        original_model_group=original_model_group,
+        kwargs=kwargs,
+        original_exception=original_exception,
+    )
+
+    prometheus_logger.litellm_deployment_failed_fallbacks.labels.assert_called_once_with(
+        requested_model=original_model_group,
+        fallback_model="gpt-4",
+        hashed_api_key="test_hash",
+        api_key_alias="test_alias",
+        team="test_team",
+        team_alias="test_team_alias",
+        exception_status="429",
+        exception_class="RateLimitError",
+    )
+    prometheus_logger.litellm_deployment_failed_fallbacks.labels().inc.assert_called_once()
+
+
+def test_deployment_state_management(prometheus_logger):
+    prometheus_logger.litellm_deployment_state = MagicMock()
+
+    test_params = {
+        "litellm_model_name": "gpt-3.5-turbo",
+        "model_id": "model-123",
+        "api_base": "https://api.openai.com",
+        "api_provider": "openai",
+    }
+
+    # Test set_deployment_healthy (state=0)
+    prometheus_logger.set_deployment_healthy(**test_params)
+    prometheus_logger.litellm_deployment_state.labels.assert_called_with(
+        test_params["litellm_model_name"],
+        test_params["model_id"],
+        test_params["api_base"],
+        test_params["api_provider"],
+    )
+    prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(0)
+
+    # Test set_deployment_partial_outage (state=1)
+    prometheus_logger.set_deployment_partial_outage(**test_params)
+    prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(1)
+
+    # Test set_deployment_complete_outage (state=2)
+    prometheus_logger.set_deployment_complete_outage(**test_params)
+    prometheus_logger.litellm_deployment_state.labels().set.assert_called_with(2)
+
+
+def test_increment_deployment_cooled_down(prometheus_logger):
+    prometheus_logger.litellm_deployment_cooled_down = MagicMock()
+
+    prometheus_logger.increment_deployment_cooled_down(
+        litellm_model_name="gpt-3.5-turbo",
+        model_id="model-123",
+        api_base="https://api.openai.com",
+        api_provider="openai",
+        exception_status="429",
+    )
+
+    prometheus_logger.litellm_deployment_cooled_down.labels.assert_called_once_with(
+        "gpt-3.5-turbo", "model-123", "https://api.openai.com", "openai", "429"
+    )
+    prometheus_logger.litellm_deployment_cooled_down.labels().inc.assert_called_once()