mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
(feat) add /key/health endpoint to test key based logging (#6073)
* add /key/health endpoint * add /key/health endpoint * fix return from /key/health * update doc string * fix doc string for /key/health * add test for /key/health * fix linting * docs /key/health
This commit is contained in:
parent
4e921bee2b
commit
c84cfe977e
4 changed files with 297 additions and 0 deletions
|
@ -301,3 +301,53 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
|||
|
||||
Help us improve this feature, by filing a [ticket here](https://github.com/BerriAI/litellm/issues)
|
||||
|
||||
### Check if key callbacks are configured correctly `/key/health`
|
||||
|
||||
Call `/key/health` with the key to check if the callback settings are configured correctly
|
||||
|
||||
Pass the key in the request header
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:4000/key/health" \
|
||||
-H "Authorization: Bearer <your-key>" \
|
||||
-H "Content-Type: application/json"
|
||||
```
|
||||
|
||||
<Tabs>
|
||||
<TabItem label="Response when key is configured correctly" value="Response when key is configured correctly">
|
||||
|
||||
Response when logging callbacks are setup correctly:
|
||||
|
||||
```json
|
||||
{
|
||||
"key": "healthy",
|
||||
"logging_callbacks": {
|
||||
"callbacks": [
|
||||
"gcs_bucket"
|
||||
],
|
||||
"status": "healthy",
|
||||
"details": "No logger exceptions triggered, system is healthy. Manually check if logs were sent to ['gcs_bucket']"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem label="Response when key is configured incorrectly" value="Response when key is configured incorrectly">
|
||||
|
||||
Response when logging callbacks are not setup correctly:
|
||||
```json
|
||||
{
|
||||
"key": "healthy",
|
||||
"logging_callbacks": {
|
||||
"callbacks": [
|
||||
"gcs_bucket"
|
||||
],
|
||||
"status": "unhealthy",
|
||||
"details": "Logger exceptions triggered, system is unhealthy: Failed to load vertex credentials. Check to see if credentials containing partial/invalid information."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
|
|
@ -254,6 +254,7 @@ class LiteLLMRoutes(enum.Enum):
|
|||
|
||||
info_routes = [
|
||||
"/key/info",
|
||||
"/key/health",
|
||||
"/team/info",
|
||||
"/team/list",
|
||||
"/user/info",
|
||||
|
@ -276,6 +277,7 @@ class LiteLLMRoutes(enum.Enum):
|
|||
"/key/update",
|
||||
"/key/delete",
|
||||
"/key/info",
|
||||
"/key/health",
|
||||
# user
|
||||
"/user/new",
|
||||
"/user/update",
|
||||
|
@ -334,6 +336,7 @@ class LiteLLMRoutes(enum.Enum):
|
|||
"/key/generate",
|
||||
"/key/update",
|
||||
"/key/delete",
|
||||
"/key/health",
|
||||
"/key/info",
|
||||
"/global/spend/tags",
|
||||
"/global/spend/keys",
|
||||
|
@ -1920,3 +1923,14 @@ class CurrentItemRateLimit(TypedDict):
|
|||
current_requests: int
|
||||
current_tpm: int
|
||||
current_rpm: int
|
||||
|
||||
|
||||
class LoggingCallbackStatus(TypedDict, total=False):
|
||||
callbacks: List[str]
|
||||
status: str
|
||||
details: Optional[str]
|
||||
|
||||
|
||||
class KeyHealthResponse(TypedDict, total=False):
|
||||
key: str
|
||||
logging_callbacks: Optional[LoggingCallbackStatus]
|
||||
|
|
|
@ -1448,3 +1448,167 @@ async def unblock_key(
|
|||
)
|
||||
|
||||
return record
|
||||
|
||||
|
||||
@router.post(
|
||||
"/key/health",
|
||||
tags=["key management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=KeyHealthResponse,
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def key_health(
|
||||
request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Check the health of the key
|
||||
|
||||
Checks:
|
||||
- If key based logging is configured correctly - sends a test log
|
||||
|
||||
Usage
|
||||
|
||||
Pass the key in the request header
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:4000/key/health" \
|
||||
-H "Authorization: Bearer sk-1234" \
|
||||
-H "Content-Type: application/json"
|
||||
```
|
||||
|
||||
Response when logging callbacks are setup correctly:
|
||||
|
||||
```json
|
||||
{
|
||||
"key": "healthy",
|
||||
"logging_callbacks": {
|
||||
"callbacks": [
|
||||
"gcs_bucket"
|
||||
],
|
||||
"status": "healthy",
|
||||
"details": "No logger exceptions triggered, system is healthy. Manually check if logs were sent to ['gcs_bucket']"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Response when logging callbacks are not setup correctly:
|
||||
```json
|
||||
{
|
||||
"key": "healthy",
|
||||
"logging_callbacks": {
|
||||
"callbacks": [
|
||||
"gcs_bucket"
|
||||
],
|
||||
"status": "unhealthy",
|
||||
"details": "Logger exceptions triggered, system is unhealthy: Failed to load vertex credentials. Check to see if credentials containing partial/invalid information."
|
||||
}
|
||||
}
|
||||
```
|
||||
"""
|
||||
try:
|
||||
# Get the key's metadata
|
||||
key_metadata = user_api_key_dict.metadata
|
||||
|
||||
health_status: KeyHealthResponse = KeyHealthResponse(
|
||||
key="healthy",
|
||||
logging_callbacks=None,
|
||||
)
|
||||
|
||||
# Check if logging is configured in metadata
|
||||
if key_metadata and "logging" in key_metadata:
|
||||
logging_statuses = await test_key_logging(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
request=request,
|
||||
key_logging=key_metadata["logging"],
|
||||
)
|
||||
health_status["logging_callbacks"] = logging_statuses
|
||||
|
||||
return KeyHealthResponse(**health_status)
|
||||
|
||||
except Exception as e:
|
||||
raise ProxyException(
|
||||
message=f"Key health check failed: {str(e)}",
|
||||
type=ProxyErrorTypes.internal_server_error,
|
||||
param=getattr(e, "param", "None"),
|
||||
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
|
||||
|
||||
async def test_key_logging(
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
request: Request,
|
||||
key_logging: List[Dict[str, Any]],
|
||||
) -> LoggingCallbackStatus:
|
||||
"""
|
||||
Test the key-based logging
|
||||
|
||||
- Test that key logging is correctly formatted and all args are passed correctly
|
||||
- Make a mock completion call -> user can check if it's correctly logged
|
||||
- Check if any logger.exceptions were triggered -> if they were then returns it to the user client side
|
||||
"""
|
||||
import logging
|
||||
from io import StringIO
|
||||
|
||||
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
|
||||
from litellm.proxy.proxy_server import general_settings, proxy_config
|
||||
|
||||
logging_callbacks: List[str] = []
|
||||
for callback in key_logging:
|
||||
if callback.get("callback_name") is not None:
|
||||
logging_callbacks.append(callback["callback_name"])
|
||||
else:
|
||||
raise ValueError("callback_name is required in key_logging")
|
||||
|
||||
log_capture_string = StringIO()
|
||||
ch = logging.StreamHandler(log_capture_string)
|
||||
ch.setLevel(logging.ERROR)
|
||||
logger = logging.getLogger()
|
||||
logger.addHandler(ch)
|
||||
|
||||
try:
|
||||
data = {
|
||||
"model": "openai/litellm-key-health-test",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, this is a test from litellm /key/health. No LLM API call was made for this",
|
||||
}
|
||||
],
|
||||
"mock_response": "test response",
|
||||
}
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
proxy_config=proxy_config,
|
||||
general_settings=general_settings,
|
||||
request=request,
|
||||
)
|
||||
await litellm.acompletion(
|
||||
**data
|
||||
) # make mock completion call to trigger key based callbacks
|
||||
except Exception as e:
|
||||
return LoggingCallbackStatus(
|
||||
callbacks=logging_callbacks,
|
||||
status="error",
|
||||
details=f"Logging test failed: {str(e)}",
|
||||
)
|
||||
|
||||
await asyncio.sleep(1) # wait for callbacks to run
|
||||
|
||||
# Check if any logger exceptions were triggered
|
||||
log_contents = log_capture_string.getvalue()
|
||||
logger.removeHandler(ch)
|
||||
if log_contents:
|
||||
return LoggingCallbackStatus(
|
||||
callbacks=logging_callbacks,
|
||||
status="unhealthy",
|
||||
details=f"Logger exceptions triggered, system is unhealthy: {log_contents}",
|
||||
)
|
||||
else:
|
||||
return LoggingCallbackStatus(
|
||||
callbacks=logging_callbacks,
|
||||
status="healthy",
|
||||
details=f"No logger exceptions triggered, system is healthy. Manually check if logs were sent to {logging_callbacks} ",
|
||||
)
|
||||
|
|
69
tests/otel_tests/test_key_logging_callbacks.py
Normal file
69
tests/otel_tests/test_key_logging_callbacks.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
"""
|
||||
Tests for Key based logging callbacks
|
||||
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_key_logging_callbacks():
|
||||
"""
|
||||
Create virtual key with a logging callback set on the key
|
||||
Call /key/health for the key -> it should be unhealthy
|
||||
"""
|
||||
# Generate a key with logging callback
|
||||
generate_url = "http://0.0.0.0:4000/key/generate"
|
||||
generate_headers = {
|
||||
"Authorization": "Bearer sk-1234",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
generate_payload = {
|
||||
"metadata": {
|
||||
"logging": [
|
||||
{
|
||||
"callback_name": "gcs_bucket",
|
||||
"callback_type": "success_and_failure",
|
||||
"callback_vars": {
|
||||
"gcs_bucket_name": "key-logging-project1",
|
||||
"gcs_path_service_account": "bad-service-account",
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
generate_response = await client.post(
|
||||
generate_url, headers=generate_headers, json=generate_payload
|
||||
)
|
||||
|
||||
assert generate_response.status_code == 200
|
||||
generate_data = generate_response.json()
|
||||
assert "key" in generate_data
|
||||
|
||||
_key = generate_data["key"]
|
||||
|
||||
# Check key health
|
||||
health_url = "http://localhost:4000/key/health"
|
||||
health_headers = {
|
||||
"Authorization": f"Bearer {_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
health_response = await client.post(health_url, headers=health_headers, json={})
|
||||
|
||||
assert health_response.status_code == 200
|
||||
health_data = health_response.json()
|
||||
print("key_health_data", health_data)
|
||||
# Check the response format and content
|
||||
assert "key" in health_data
|
||||
assert "logging_callbacks" in health_data
|
||||
assert health_data["logging_callbacks"]["callbacks"] == ["gcs_bucket"]
|
||||
assert health_data["logging_callbacks"]["status"] == "unhealthy"
|
||||
assert (
|
||||
"Failed to load vertex credentials"
|
||||
in health_data["logging_callbacks"]["details"]
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue