From c5e144af23ec1ae345b9a70cfcce122a4a82c67e Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 19 Jan 2024 08:45:23 -0800 Subject: [PATCH] docs(health.md): add /health/readiness and /health/liveliness to docs --- docs/my-website/docs/proxy/call_hooks.md | 2 +- docs/my-website/docs/proxy/health.md | 59 ++++++++++++++++++-- docs/my-website/docs/proxy/load_balancing.md | 3 +- docs/my-website/sidebars.js | 28 +++++++--- litellm/proxy/proxy_server.py | 2 + 5 files changed, 79 insertions(+), 15 deletions(-) diff --git a/docs/my-website/docs/proxy/call_hooks.md b/docs/my-website/docs/proxy/call_hooks.md index a92b94a86..ee49e395f 100644 --- a/docs/my-website/docs/proxy/call_hooks.md +++ b/docs/my-website/docs/proxy/call_hooks.md @@ -1,4 +1,4 @@ -# Modify Incoming Data +# Modify / Reject Incoming Requests Modify data just before making litellm completion calls call on proxy diff --git a/docs/my-website/docs/proxy/health.md b/docs/my-website/docs/proxy/health.md index d6b7a51dd..1d0b992d6 100644 --- a/docs/my-website/docs/proxy/health.md +++ b/docs/my-website/docs/proxy/health.md @@ -5,8 +5,10 @@ Use this to health check all LLMs defined in your config.yaml The proxy exposes: * a /health endpoint which returns the health of the LLM APIs -* a /test endpoint which makes a ping to the litellm server +* a /health/readiness endpoint for returning if the proxy is ready to accept requests +* a /health/liveliness endpoint for returning if the proxy is alive +## `/health` #### Request Make a GET Request to `/health` on the proxy ```shell @@ -39,7 +41,7 @@ litellm --health } ``` -## Background Health Checks +### Background Health Checks You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`. @@ -61,7 +63,7 @@ $ litellm /path/to/config.yaml curl --location 'http://0.0.0.0:8000/health' ``` -## Embedding Models +### Embedding Models We need some way to know if the model is an embedding model when running checks, if you have this in your config, specifying mode it makes an embedding health check @@ -77,7 +79,7 @@ model_list: mode: embedding # 👈 ADD THIS ``` -## Text Completion Models +### Text Completion Models We need some way to know if the model is a text completion model when running checks, if you have this in your config, specifying mode it makes an embedding health check @@ -92,3 +94,52 @@ model_list: model_info: mode: completion # 👈 ADD THIS ``` + +## `/health/readiness` + +Unprotected endpoint for checking if proxy is ready to accept requests + +Example Request: + +```bash +curl --location 'http://0.0.0.0:8000/health/readiness' +``` + +Example Response: + +*If proxy connected to a database* + +```json +{ + "status": "healthy", + "db": "connected" +} +``` + +*If proxy not connected to a database* + +```json +{ + "status": "healthy", + "db": "Not connected" +} +``` + +`/health/liveliness` + +Unprotected endpoint for checking if proxy is alive + + +Example Request: + +``` +curl -X 'GET' \ + 'http://0.0.0.0:8000/health/liveliness' \ + -H 'accept: application/json' +``` + +Example Response: + +```json +"I'm alive!" +``` \ No newline at end of file diff --git a/docs/my-website/docs/proxy/load_balancing.md b/docs/my-website/docs/proxy/load_balancing.md index e223c2d5a..bc40ff2c7 100644 --- a/docs/my-website/docs/proxy/load_balancing.md +++ b/docs/my-website/docs/proxy/load_balancing.md @@ -1,5 +1,4 @@ - -# Load Balancing - Multiple Instances of 1 model +# Multiple Instances of 1 model Load balance multiple instances of the same model The proxy will handle routing requests (using LiteLLM's Router). **Set `rpm` in the config if you want maximize throughput** diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 995d7bcb8..900e7bc5f 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -106,29 +106,41 @@ const sidebars = { "proxy/configs", { type: 'link', - label: 'All Endpoints', + label: '📖 All Endpoints', href: 'https://litellm-api.up.railway.app/', }, "proxy/user_keys", - "proxy/load_balancing", "proxy/virtual_keys", "proxy/users", "proxy/ui", "proxy/model_management", - "proxy/reliability", - "proxy/caching", + "proxy/health", { "type": "category", - "label": "Logging, Alerting", + "label": "🔥 Load Balancing", + "items": [ + "proxy/load_balancing", + "proxy/reliability", + ] + }, + { + "type": "category", + "label": "Logging, Alerting, Caching", "items": [ "proxy/logging", "proxy/alerting", "proxy/streaming_logging", + "proxy/caching", + ] + }, + { + "type": "category", + "label": "Admin Controls", + "items": [ + "proxy/call_hooks", + "proxy/rules", ] }, - "proxy/health", - "proxy/call_hooks", - "proxy/rules", "proxy/deploy", "proxy/cli", ] diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index fdc326305..acc48af60 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -2734,6 +2734,8 @@ async def config_yaml_endpoint(config_info: ConfigYAML): @router.get("/test", tags=["health"]) async def test_endpoint(request: Request): """ + [DEPRECATED] use `/health/liveliness` instead. + A test endpoint that pings the proxy server to check if it's healthy. Parameters: