mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
feat(health_check.py): set upperbound for api when making health check call (#7865)
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 10s
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 10s
* feat(health_check.py): set upperbound for api when making health check call prevent bad model from health check to hang and cause pod restarts * fix(health_check.py): cleanup task once completed * fix(constants.py): bump default health check timeout to 1min * docs(health.md): add 'health_check_timeout' to health docs on litellm * build(proxy_server_config.yaml): add bad model to health check
This commit is contained in:
parent
e67f18b153
commit
3a7b13efa2
9 changed files with 111 additions and 12 deletions
|
@ -245,6 +245,22 @@ general_settings:
|
||||||
health_check_details: False
|
health_check_details: False
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Health Check Timeout
|
||||||
|
|
||||||
|
The health check timeout is set in `litellm/constants.py` and defaults to 60 seconds.
|
||||||
|
|
||||||
|
This can be overridden in the config.yaml by setting `health_check_timeout` in the model_info section.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: openai/gpt-4o
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-4o
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
model_info:
|
||||||
|
health_check_timeout: 10 # 👈 OVERRIDE HEALTH CHECK TIMEOUT
|
||||||
|
```
|
||||||
|
|
||||||
## `/health/readiness`
|
## `/health/readiness`
|
||||||
|
|
||||||
Unprotected endpoint for checking if proxy is ready to accept requests
|
Unprotected endpoint for checking if proxy is ready to accept requests
|
||||||
|
|
|
@ -140,3 +140,5 @@ BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES = [
|
||||||
|
|
||||||
BATCH_STATUS_POLL_INTERVAL_SECONDS = 3600 # 1 hour
|
BATCH_STATUS_POLL_INTERVAL_SECONDS = 3600 # 1 hour
|
||||||
BATCH_STATUS_POLL_MAX_ATTEMPTS = 24 # for 24 hours
|
BATCH_STATUS_POLL_MAX_ATTEMPTS = 24 # for 24 hours
|
||||||
|
|
||||||
|
HEALTH_CHECK_TIMEOUT_SECONDS = 60 # 60 seconds
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -6,8 +6,8 @@ model_list:
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||||
- model_name: openai-o1
|
- model_name: openai-o1
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/o1
|
model: openai/random_sleep
|
||||||
api_key: sk-1234
|
api_key: sk-1234
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
api_base: http://0.0.0.0:8090
|
||||||
mock_timeout: true
|
model_info:
|
||||||
timeout: 3
|
health_check_timeout: 1
|
||||||
|
|
|
@ -8,7 +8,7 @@ from typing import List, Optional
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
from litellm.constants import HEALTH_CHECK_TIMEOUT_SECONDS
|
||||||
|
|
||||||
ILLEGAL_DISPLAY_PARAMS = [
|
ILLEGAL_DISPLAY_PARAMS = [
|
||||||
"messages",
|
"messages",
|
||||||
|
@ -62,10 +62,28 @@ def filter_deployments_by_id(
|
||||||
return filtered_deployments
|
return filtered_deployments
|
||||||
|
|
||||||
|
|
||||||
|
async def run_with_timeout(task, timeout):
|
||||||
|
try:
|
||||||
|
return await asyncio.wait_for(task, timeout)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
task.cancel()
|
||||||
|
# Only cancel child tasks of the current task
|
||||||
|
current_task = asyncio.current_task()
|
||||||
|
for t in asyncio.all_tasks():
|
||||||
|
if t != current_task:
|
||||||
|
t.cancel()
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(task, 0.1) # Give 100ms for cleanup
|
||||||
|
except (asyncio.TimeoutError, asyncio.CancelledError, Exception):
|
||||||
|
pass
|
||||||
|
return {"error": "Timeout exceeded"}
|
||||||
|
|
||||||
|
|
||||||
async def _perform_health_check(model_list: list, details: Optional[bool] = True):
|
async def _perform_health_check(model_list: list, details: Optional[bool] = True):
|
||||||
"""
|
"""
|
||||||
Perform a health check for each model in the list.
|
Perform a health check for each model in the list.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tasks = []
|
tasks = []
|
||||||
for model in model_list:
|
for model in model_list:
|
||||||
litellm_params = model["litellm_params"]
|
litellm_params = model["litellm_params"]
|
||||||
|
@ -74,16 +92,21 @@ async def _perform_health_check(model_list: list, details: Optional[bool] = True
|
||||||
litellm_params = _update_litellm_params_for_health_check(
|
litellm_params = _update_litellm_params_for_health_check(
|
||||||
model_info, litellm_params
|
model_info, litellm_params
|
||||||
)
|
)
|
||||||
tasks.append(
|
timeout = model_info.get("health_check_timeout") or HEALTH_CHECK_TIMEOUT_SECONDS
|
||||||
|
|
||||||
|
task = run_with_timeout(
|
||||||
litellm.ahealth_check(
|
litellm.ahealth_check(
|
||||||
litellm_params,
|
model["litellm_params"],
|
||||||
mode=mode,
|
mode=mode,
|
||||||
prompt="test from litellm",
|
prompt="test from litellm",
|
||||||
input=["test from litellm"],
|
input=["test from litellm"],
|
||||||
)
|
),
|
||||||
|
timeout,
|
||||||
)
|
)
|
||||||
|
|
||||||
results = await asyncio.gather(*tasks)
|
tasks.append(task)
|
||||||
|
|
||||||
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
healthy_endpoints = []
|
healthy_endpoints = []
|
||||||
unhealthy_endpoints = []
|
unhealthy_endpoints = []
|
||||||
|
|
|
@ -81,6 +81,16 @@ model_list:
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
stream_timeout: 0.001
|
stream_timeout: 0.001
|
||||||
rpm: 1000
|
rpm: 1000
|
||||||
|
- model_name: bad-model
|
||||||
|
litellm_params:
|
||||||
|
model: openai/bad-model
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
mock_timeout: True
|
||||||
|
timeout: 60
|
||||||
|
rpm: 1000
|
||||||
|
model_info:
|
||||||
|
health_check_timeout: 1
|
||||||
- model_name: "*"
|
- model_name: "*"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/*
|
model: openai/*
|
||||||
|
|
|
@ -297,3 +297,54 @@ async def test_perform_health_check_with_health_check_model():
|
||||||
assert healthy_endpoints[0]["model"] == "openai/gpt-4o-mini"
|
assert healthy_endpoints[0]["model"] == "openai/gpt-4o-mini"
|
||||||
assert len(healthy_endpoints) == 1
|
assert len(healthy_endpoints) == 1
|
||||||
assert len(unhealthy_endpoints) == 0
|
assert len(unhealthy_endpoints) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_health_check_bad_model():
|
||||||
|
from litellm.proxy.health_check import _perform_health_check
|
||||||
|
import time
|
||||||
|
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "openai-gpt-4o",
|
||||||
|
"litellm_params": {
|
||||||
|
"api_key": "sk-1234",
|
||||||
|
"api_base": "https://exampleopenaiendpoint-production.up.railway.app",
|
||||||
|
"model": "openai/my-fake-openai-endpoint",
|
||||||
|
"mock_timeout": True,
|
||||||
|
"timeout": 60,
|
||||||
|
},
|
||||||
|
"model_info": {
|
||||||
|
"id": "ca27ca2eeea2f9e38bb274ead831948a26621a3738d06f1797253f0e6c4278c0",
|
||||||
|
"db_model": False,
|
||||||
|
"health_check_timeout": 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
details = None
|
||||||
|
healthy_endpoints, unhealthy_endpoints = await _perform_health_check(
|
||||||
|
model_list, details
|
||||||
|
)
|
||||||
|
print(f"healthy_endpoints: {healthy_endpoints}")
|
||||||
|
print(f"unhealthy_endpoints: {unhealthy_endpoints}")
|
||||||
|
|
||||||
|
# Track which model is actually used in the health check
|
||||||
|
health_check_calls = []
|
||||||
|
|
||||||
|
async def mock_health_check(litellm_params, **kwargs):
|
||||||
|
health_check_calls.append(litellm_params["model"])
|
||||||
|
await asyncio.sleep(10)
|
||||||
|
return {"status": "healthy"}
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"litellm.ahealth_check", side_effect=mock_health_check
|
||||||
|
) as mock_health_check:
|
||||||
|
start_time = time.time()
|
||||||
|
healthy_endpoints, unhealthy_endpoints = await _perform_health_check(model_list)
|
||||||
|
end_time = time.time()
|
||||||
|
print("health check calls: ", health_check_calls)
|
||||||
|
assert len(healthy_endpoints) == 0
|
||||||
|
assert len(unhealthy_endpoints) == 1
|
||||||
|
assert (
|
||||||
|
end_time - start_time < 2
|
||||||
|
), "Health check took longer than health_check_timeout"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue