(feat) add /key/health endpoint to test key based logging (#6073)

* add /key/health endpoint * add /key/health endpoint * fix return from /key/health * update doc string * fix doc string for /key/health * add test for /key/health * fix linting * docs /key/health
2024-10-05 11:56:55 +05:30 · 2024-10-05 11:56:55 +05:30 · c84cfe977e
commit c84cfe977e
parent 4e921bee2b
4 changed files with 297 additions and 0 deletions
--- a/docs/my-website/docs/proxy/team_logging.md
+++ b/docs/my-website/docs/proxy/team_logging.md
@ -301,3 +301,53 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
 Help us improve this feature, by filing a [ticket here](https://github.com/BerriAI/litellm/issues)
 ### Check if key callbacks are configured correctly `/key/health`
 Call `/key/health` with the key to check if the callback settings are configured correctly
 Pass the key in the request header
 ```bash
 curl -X POST "http://localhost:4000/key/health" \
  -H "Authorization: Bearer <your-key>" \
  -H "Content-Type: application/json"
 ```
 <Tabs>
 <TabItem label="Response when key is configured correctly" value="Response when key is configured correctly">
 Response when logging callbacks are setup correctly:
 ```json
 {
  "key": "healthy",
  "logging_callbacks": {
    "callbacks": [
      "gcs_bucket"
    ],
    "status": "healthy",
    "details": "No logger exceptions triggered, system is healthy. Manually check if logs were sent to ['gcs_bucket']"
  }
 }
 ```
 </TabItem>
 <TabItem label="Response when key is configured incorrectly" value="Response when key is configured incorrectly">
 Response when logging callbacks are not setup correctly:
 ```json
 {
  "key": "healthy",
  "logging_callbacks": {
    "callbacks": [
      "gcs_bucket"
    ],
    "status": "unhealthy",
    "details": "Logger exceptions triggered, system is unhealthy: Failed to load vertex credentials. Check to see if credentials containing partial/invalid information."
  }
 }
 ```
 </TabItem>
 </Tabs>
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -254,6 +254,7 @@ class LiteLLMRoutes(enum.Enum):
    info_routes = [
        "/key/info",
        "/key/health",
        "/team/info",
        "/team/list",
        "/user/info",
@ -276,6 +277,7 @@ class LiteLLMRoutes(enum.Enum):
        "/key/update",
        "/key/delete",
        "/key/info",
        "/key/health",
        # user
        "/user/new",
        "/user/update",
@ -334,6 +336,7 @@ class LiteLLMRoutes(enum.Enum):
            "/key/generate",
            "/key/update",
            "/key/delete",
            "/key/health",
            "/key/info",
            "/global/spend/tags",
            "/global/spend/keys",
@ -1920,3 +1923,14 @@ class CurrentItemRateLimit(TypedDict):
    current_requests: int
    current_tpm: int
    current_rpm: int
 class LoggingCallbackStatus(TypedDict, total=False):
    callbacks: List[str]
    status: str
    details: Optional[str]
 class KeyHealthResponse(TypedDict, total=False):
    key: str
    logging_callbacks: Optional[LoggingCallbackStatus]
--- a/litellm/proxy/management_endpoints/key_management_endpoints.py
+++ b/litellm/proxy/management_endpoints/key_management_endpoints.py
@ -1448,3 +1448,167 @@ async def unblock_key(
    )
    return record
@router.post(
    "/key/health",
    tags=["key management"],
    dependencies=[Depends(user_api_key_auth)],
    response_model=KeyHealthResponse,
 )
@management_endpoint_wrapper
 async def key_health(
    request: Request,
    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
    """
    Check the health of the key
    Checks:
    - If key based logging is configured correctly - sends a test log
    Usage 
    Pass the key in the request header
    ```bash
    curl -X POST "http://localhost:4000/key/health" \
     -H "Authorization: Bearer sk-1234" \
     -H "Content-Type: application/json"
    ```
    Response when logging callbacks are setup correctly:
    ```json
    {
      "key": "healthy",
      "logging_callbacks": {
        "callbacks": [
          "gcs_bucket"
        ],
        "status": "healthy",
        "details": "No logger exceptions triggered, system is healthy. Manually check if logs were sent to ['gcs_bucket']"
      }
    }
    ```
    Response when logging callbacks are not setup correctly:
    ```json
    {
      "key": "healthy",
      "logging_callbacks": {
        "callbacks": [
          "gcs_bucket"
        ],
        "status": "unhealthy",
        "details": "Logger exceptions triggered, system is unhealthy: Failed to load vertex credentials. Check to see if credentials containing partial/invalid information."
      }
    }
    ```
    """
    try:
        # Get the key's metadata
        key_metadata = user_api_key_dict.metadata
        health_status: KeyHealthResponse = KeyHealthResponse(
            key="healthy",
            logging_callbacks=None,
        )
        # Check if logging is configured in metadata
        if key_metadata and "logging" in key_metadata:
            logging_statuses = await test_key_logging(
                user_api_key_dict=user_api_key_dict,
                request=request,
                key_logging=key_metadata["logging"],
            )
            health_status["logging_callbacks"] = logging_statuses
        return KeyHealthResponse(**health_status)
    except Exception as e:
        raise ProxyException(
            message=f"Key health check failed: {str(e)}",
            type=ProxyErrorTypes.internal_server_error,
            param=getattr(e, "param", "None"),
            code=status.HTTP_500_INTERNAL_SERVER_ERROR,
        )
 async def test_key_logging(
    user_api_key_dict: UserAPIKeyAuth,
    request: Request,
    key_logging: List[Dict[str, Any]],
 ) -> LoggingCallbackStatus:
    """
    Test the key-based logging
    - Test that key logging is correctly formatted and all args are passed correctly
    - Make a mock completion call -> user can check if it's correctly logged
    - Check if any logger.exceptions were triggered -> if they were then returns it to the user client side
    """
    import logging
    from io import StringIO
    from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
    from litellm.proxy.proxy_server import general_settings, proxy_config
    logging_callbacks: List[str] = []
    for callback in key_logging:
        if callback.get("callback_name") is not None:
            logging_callbacks.append(callback["callback_name"])
        else:
            raise ValueError("callback_name is required in key_logging")
    log_capture_string = StringIO()
    ch = logging.StreamHandler(log_capture_string)
    ch.setLevel(logging.ERROR)
    logger = logging.getLogger()
    logger.addHandler(ch)
    try:
        data = {
            "model": "openai/litellm-key-health-test",
            "messages": [
                {
                    "role": "user",
                    "content": "Hello, this is a test from litellm /key/health. No LLM API call was made for this",
                }
            ],
            "mock_response": "test response",
        }
        data = await add_litellm_data_to_request(
            data=data,
            user_api_key_dict=user_api_key_dict,
            proxy_config=proxy_config,
            general_settings=general_settings,
            request=request,
        )
        await litellm.acompletion(
            **data
        )  # make mock completion call to trigger key based callbacks
    except Exception as e:
        return LoggingCallbackStatus(
            callbacks=logging_callbacks,
            status="error",
            details=f"Logging test failed: {str(e)}",
        )
    await asyncio.sleep(1)  # wait for callbacks to run
    # Check if any logger exceptions were triggered
    log_contents = log_capture_string.getvalue()
    logger.removeHandler(ch)
    if log_contents:
        return LoggingCallbackStatus(
            callbacks=logging_callbacks,
            status="unhealthy",
            details=f"Logger exceptions triggered, system is unhealthy: {log_contents}",
        )
    else:
        return LoggingCallbackStatus(
            callbacks=logging_callbacks,
            status="healthy",
            details=f"No logger exceptions triggered, system is healthy. Manually check if logs were sent to {logging_callbacks} ",
        )
--- a/tests/otel_tests/test_key_logging_callbacks.py
+++ b/tests/otel_tests/test_key_logging_callbacks.py
@ -0,0 +1,69 @@
 """
 Tests for Key based logging callbacks
 """
 import httpx
 import pytest
@pytest.mark.asyncio()
 async def test_key_logging_callbacks():
    """
    Create virtual key with a logging callback set on the key
    Call /key/health for the key -> it should be unhealthy
    """
    # Generate a key with logging callback
    generate_url = "http://0.0.0.0:4000/key/generate"
    generate_headers = {
        "Authorization": "Bearer sk-1234",
        "Content-Type": "application/json",
    }
    generate_payload = {
        "metadata": {
            "logging": [
                {
                    "callback_name": "gcs_bucket",
                    "callback_type": "success_and_failure",
                    "callback_vars": {
                        "gcs_bucket_name": "key-logging-project1",
                        "gcs_path_service_account": "bad-service-account",
                    },
                }
            ]
        }
    }
    async with httpx.AsyncClient() as client:
        generate_response = await client.post(
            generate_url, headers=generate_headers, json=generate_payload
        )
    assert generate_response.status_code == 200
    generate_data = generate_response.json()
    assert "key" in generate_data
    _key = generate_data["key"]
    # Check key health
    health_url = "http://localhost:4000/key/health"
    health_headers = {
        "Authorization": f"Bearer {_key}",
        "Content-Type": "application/json",
    }
    async with httpx.AsyncClient() as client:
        health_response = await client.post(health_url, headers=health_headers, json={})
    assert health_response.status_code == 200
    health_data = health_response.json()
    print("key_health_data", health_data)
    # Check the response format and content
    assert "key" in health_data
    assert "logging_callbacks" in health_data
    assert health_data["logging_callbacks"]["callbacks"] == ["gcs_bucket"]
    assert health_data["logging_callbacks"]["status"] == "unhealthy"
    assert (
        "Failed to load vertex credentials"
        in health_data["logging_callbacks"]["details"]
    )