forked from phoenix/litellm-mirror
(feat) add /key/health endpoint to test key based logging (#6073)
* add /key/health endpoint * add /key/health endpoint * fix return from /key/health * update doc string * fix doc string for /key/health * add test for /key/health * fix linting * docs /key/health
This commit is contained in:
parent
4e921bee2b
commit
c84cfe977e
4 changed files with 297 additions and 0 deletions
|
@ -301,3 +301,53 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
|
||||||
Help us improve this feature, by filing a [ticket here](https://github.com/BerriAI/litellm/issues)
|
Help us improve this feature, by filing a [ticket here](https://github.com/BerriAI/litellm/issues)
|
||||||
|
|
||||||
|
### Check if key callbacks are configured correctly `/key/health`
|
||||||
|
|
||||||
|
Call `/key/health` with the key to check if the callback settings are configured correctly
|
||||||
|
|
||||||
|
Pass the key in the request header
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST "http://localhost:4000/key/health" \
|
||||||
|
-H "Authorization: Bearer <your-key>" \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem label="Response when key is configured correctly" value="Response when key is configured correctly">
|
||||||
|
|
||||||
|
Response when logging callbacks are setup correctly:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"key": "healthy",
|
||||||
|
"logging_callbacks": {
|
||||||
|
"callbacks": [
|
||||||
|
"gcs_bucket"
|
||||||
|
],
|
||||||
|
"status": "healthy",
|
||||||
|
"details": "No logger exceptions triggered, system is healthy. Manually check if logs were sent to ['gcs_bucket']"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem label="Response when key is configured incorrectly" value="Response when key is configured incorrectly">
|
||||||
|
|
||||||
|
Response when logging callbacks are not setup correctly:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"key": "healthy",
|
||||||
|
"logging_callbacks": {
|
||||||
|
"callbacks": [
|
||||||
|
"gcs_bucket"
|
||||||
|
],
|
||||||
|
"status": "unhealthy",
|
||||||
|
"details": "Logger exceptions triggered, system is unhealthy: Failed to load vertex credentials. Check to see if credentials containing partial/invalid information."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
|
@ -254,6 +254,7 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
|
|
||||||
info_routes = [
|
info_routes = [
|
||||||
"/key/info",
|
"/key/info",
|
||||||
|
"/key/health",
|
||||||
"/team/info",
|
"/team/info",
|
||||||
"/team/list",
|
"/team/list",
|
||||||
"/user/info",
|
"/user/info",
|
||||||
|
@ -276,6 +277,7 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
"/key/update",
|
"/key/update",
|
||||||
"/key/delete",
|
"/key/delete",
|
||||||
"/key/info",
|
"/key/info",
|
||||||
|
"/key/health",
|
||||||
# user
|
# user
|
||||||
"/user/new",
|
"/user/new",
|
||||||
"/user/update",
|
"/user/update",
|
||||||
|
@ -334,6 +336,7 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
"/key/generate",
|
"/key/generate",
|
||||||
"/key/update",
|
"/key/update",
|
||||||
"/key/delete",
|
"/key/delete",
|
||||||
|
"/key/health",
|
||||||
"/key/info",
|
"/key/info",
|
||||||
"/global/spend/tags",
|
"/global/spend/tags",
|
||||||
"/global/spend/keys",
|
"/global/spend/keys",
|
||||||
|
@ -1920,3 +1923,14 @@ class CurrentItemRateLimit(TypedDict):
|
||||||
current_requests: int
|
current_requests: int
|
||||||
current_tpm: int
|
current_tpm: int
|
||||||
current_rpm: int
|
current_rpm: int
|
||||||
|
|
||||||
|
|
||||||
|
class LoggingCallbackStatus(TypedDict, total=False):
|
||||||
|
callbacks: List[str]
|
||||||
|
status: str
|
||||||
|
details: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
class KeyHealthResponse(TypedDict, total=False):
|
||||||
|
key: str
|
||||||
|
logging_callbacks: Optional[LoggingCallbackStatus]
|
||||||
|
|
|
@ -1448,3 +1448,167 @@ async def unblock_key(
|
||||||
)
|
)
|
||||||
|
|
||||||
return record
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/key/health",
|
||||||
|
tags=["key management"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
response_model=KeyHealthResponse,
|
||||||
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
|
async def key_health(
|
||||||
|
request: Request,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Check the health of the key
|
||||||
|
|
||||||
|
Checks:
|
||||||
|
- If key based logging is configured correctly - sends a test log
|
||||||
|
|
||||||
|
Usage
|
||||||
|
|
||||||
|
Pass the key in the request header
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST "http://localhost:4000/key/health" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
```
|
||||||
|
|
||||||
|
Response when logging callbacks are setup correctly:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"key": "healthy",
|
||||||
|
"logging_callbacks": {
|
||||||
|
"callbacks": [
|
||||||
|
"gcs_bucket"
|
||||||
|
],
|
||||||
|
"status": "healthy",
|
||||||
|
"details": "No logger exceptions triggered, system is healthy. Manually check if logs were sent to ['gcs_bucket']"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Response when logging callbacks are not setup correctly:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"key": "healthy",
|
||||||
|
"logging_callbacks": {
|
||||||
|
"callbacks": [
|
||||||
|
"gcs_bucket"
|
||||||
|
],
|
||||||
|
"status": "unhealthy",
|
||||||
|
"details": "Logger exceptions triggered, system is unhealthy: Failed to load vertex credentials. Check to see if credentials containing partial/invalid information."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get the key's metadata
|
||||||
|
key_metadata = user_api_key_dict.metadata
|
||||||
|
|
||||||
|
health_status: KeyHealthResponse = KeyHealthResponse(
|
||||||
|
key="healthy",
|
||||||
|
logging_callbacks=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if logging is configured in metadata
|
||||||
|
if key_metadata and "logging" in key_metadata:
|
||||||
|
logging_statuses = await test_key_logging(
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
|
request=request,
|
||||||
|
key_logging=key_metadata["logging"],
|
||||||
|
)
|
||||||
|
health_status["logging_callbacks"] = logging_statuses
|
||||||
|
|
||||||
|
return KeyHealthResponse(**health_status)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise ProxyException(
|
||||||
|
message=f"Key health check failed: {str(e)}",
|
||||||
|
type=ProxyErrorTypes.internal_server_error,
|
||||||
|
param=getattr(e, "param", "None"),
|
||||||
|
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def test_key_logging(
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
request: Request,
|
||||||
|
key_logging: List[Dict[str, Any]],
|
||||||
|
) -> LoggingCallbackStatus:
|
||||||
|
"""
|
||||||
|
Test the key-based logging
|
||||||
|
|
||||||
|
- Test that key logging is correctly formatted and all args are passed correctly
|
||||||
|
- Make a mock completion call -> user can check if it's correctly logged
|
||||||
|
- Check if any logger.exceptions were triggered -> if they were then returns it to the user client side
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
|
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
|
||||||
|
from litellm.proxy.proxy_server import general_settings, proxy_config
|
||||||
|
|
||||||
|
logging_callbacks: List[str] = []
|
||||||
|
for callback in key_logging:
|
||||||
|
if callback.get("callback_name") is not None:
|
||||||
|
logging_callbacks.append(callback["callback_name"])
|
||||||
|
else:
|
||||||
|
raise ValueError("callback_name is required in key_logging")
|
||||||
|
|
||||||
|
log_capture_string = StringIO()
|
||||||
|
ch = logging.StreamHandler(log_capture_string)
|
||||||
|
ch.setLevel(logging.ERROR)
|
||||||
|
logger = logging.getLogger()
|
||||||
|
logger.addHandler(ch)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = {
|
||||||
|
"model": "openai/litellm-key-health-test",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, this is a test from litellm /key/health. No LLM API call was made for this",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"mock_response": "test response",
|
||||||
|
}
|
||||||
|
data = await add_litellm_data_to_request(
|
||||||
|
data=data,
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
|
proxy_config=proxy_config,
|
||||||
|
general_settings=general_settings,
|
||||||
|
request=request,
|
||||||
|
)
|
||||||
|
await litellm.acompletion(
|
||||||
|
**data
|
||||||
|
) # make mock completion call to trigger key based callbacks
|
||||||
|
except Exception as e:
|
||||||
|
return LoggingCallbackStatus(
|
||||||
|
callbacks=logging_callbacks,
|
||||||
|
status="error",
|
||||||
|
details=f"Logging test failed: {str(e)}",
|
||||||
|
)
|
||||||
|
|
||||||
|
await asyncio.sleep(1) # wait for callbacks to run
|
||||||
|
|
||||||
|
# Check if any logger exceptions were triggered
|
||||||
|
log_contents = log_capture_string.getvalue()
|
||||||
|
logger.removeHandler(ch)
|
||||||
|
if log_contents:
|
||||||
|
return LoggingCallbackStatus(
|
||||||
|
callbacks=logging_callbacks,
|
||||||
|
status="unhealthy",
|
||||||
|
details=f"Logger exceptions triggered, system is unhealthy: {log_contents}",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return LoggingCallbackStatus(
|
||||||
|
callbacks=logging_callbacks,
|
||||||
|
status="healthy",
|
||||||
|
details=f"No logger exceptions triggered, system is healthy. Manually check if logs were sent to {logging_callbacks} ",
|
||||||
|
)
|
||||||
|
|
69
tests/otel_tests/test_key_logging_callbacks.py
Normal file
69
tests/otel_tests/test_key_logging_callbacks.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
"""
|
||||||
|
Tests for Key based logging callbacks
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_key_logging_callbacks():
|
||||||
|
"""
|
||||||
|
Create virtual key with a logging callback set on the key
|
||||||
|
Call /key/health for the key -> it should be unhealthy
|
||||||
|
"""
|
||||||
|
# Generate a key with logging callback
|
||||||
|
generate_url = "http://0.0.0.0:4000/key/generate"
|
||||||
|
generate_headers = {
|
||||||
|
"Authorization": "Bearer sk-1234",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
generate_payload = {
|
||||||
|
"metadata": {
|
||||||
|
"logging": [
|
||||||
|
{
|
||||||
|
"callback_name": "gcs_bucket",
|
||||||
|
"callback_type": "success_and_failure",
|
||||||
|
"callback_vars": {
|
||||||
|
"gcs_bucket_name": "key-logging-project1",
|
||||||
|
"gcs_path_service_account": "bad-service-account",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
generate_response = await client.post(
|
||||||
|
generate_url, headers=generate_headers, json=generate_payload
|
||||||
|
)
|
||||||
|
|
||||||
|
assert generate_response.status_code == 200
|
||||||
|
generate_data = generate_response.json()
|
||||||
|
assert "key" in generate_data
|
||||||
|
|
||||||
|
_key = generate_data["key"]
|
||||||
|
|
||||||
|
# Check key health
|
||||||
|
health_url = "http://localhost:4000/key/health"
|
||||||
|
health_headers = {
|
||||||
|
"Authorization": f"Bearer {_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
health_response = await client.post(health_url, headers=health_headers, json={})
|
||||||
|
|
||||||
|
assert health_response.status_code == 200
|
||||||
|
health_data = health_response.json()
|
||||||
|
print("key_health_data", health_data)
|
||||||
|
# Check the response format and content
|
||||||
|
assert "key" in health_data
|
||||||
|
assert "logging_callbacks" in health_data
|
||||||
|
assert health_data["logging_callbacks"]["callbacks"] == ["gcs_bucket"]
|
||||||
|
assert health_data["logging_callbacks"]["status"] == "unhealthy"
|
||||||
|
assert (
|
||||||
|
"Failed to load vertex credentials"
|
||||||
|
in health_data["logging_callbacks"]["details"]
|
||||||
|
)
|
Loading…
Add table
Add a link
Reference in a new issue