mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-12 16:16:09 +00:00
ci: let pytest run the distro server (#2586)
# What does this PR do? * Use #2580 functionality to auto-start the server with the tests * Reduce timeout to 30sec * Print server logs on errors * Pytest logs are collected to a file pytest.log Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
f4950f4ef0
commit
dae1fcd3c2
2 changed files with 56 additions and 42 deletions
|
@ -37,26 +37,42 @@ def is_port_available(port: int, host: str = "localhost") -> bool:
|
|||
def start_llama_stack_server(config_name: str) -> subprocess.Popen:
|
||||
"""Start a llama stack server with the given config."""
|
||||
cmd = ["llama", "stack", "run", config_name]
|
||||
|
||||
# Start server in background
|
||||
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
devnull = open(os.devnull, "w")
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=devnull, # redirect stdout to devnull to prevent deadlock
|
||||
stderr=devnull, # redirect stderr to devnull to prevent deadlock
|
||||
text=True,
|
||||
env={**os.environ, "LLAMA_STACK_LOG_FILE": "server.log"},
|
||||
)
|
||||
return process
|
||||
|
||||
|
||||
def wait_for_server_ready(base_url: str, timeout: int = 120) -> bool:
|
||||
def wait_for_server_ready(base_url: str, timeout: int = 30, process: subprocess.Popen | None = None) -> bool:
|
||||
"""Wait for the server to be ready by polling the health endpoint."""
|
||||
health_url = f"{base_url}/v1/health"
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
if process and process.poll() is not None:
|
||||
print(f"Server process terminated with return code: {process.returncode}")
|
||||
return False
|
||||
|
||||
try:
|
||||
response = requests.get(health_url, timeout=5)
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
||||
pass
|
||||
|
||||
# Print progress every 5 seconds
|
||||
elapsed = time.time() - start_time
|
||||
if int(elapsed) % 5 == 0 and elapsed > 0:
|
||||
print(f"Waiting for server at {base_url}... ({elapsed:.1f}s elapsed)")
|
||||
|
||||
time.sleep(0.5)
|
||||
|
||||
print(f"Server failed to respond within {timeout} seconds")
|
||||
return False
|
||||
|
||||
|
||||
|
@ -179,11 +195,12 @@ def llama_stack_client(request, provider_data):
|
|||
server_process = start_llama_stack_server(config_name)
|
||||
|
||||
# Wait for server to be ready
|
||||
if not wait_for_server_ready(base_url, timeout=120):
|
||||
if not wait_for_server_ready(base_url, timeout=30, process=server_process):
|
||||
print("Server failed to start within timeout")
|
||||
server_process.terminate()
|
||||
raise RuntimeError(
|
||||
f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid."
|
||||
f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
|
||||
f"See server.log for details."
|
||||
)
|
||||
|
||||
print(f"Server is ready at {base_url}")
|
||||
|
@ -227,3 +244,31 @@ def llama_stack_client(request, provider_data):
|
|||
def openai_client(client_with_models):
|
||||
base_url = f"{client_with_models.base_url}/v1/openai/v1"
|
||||
return OpenAI(base_url=base_url, api_key="fake")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def cleanup_server_process(request):
|
||||
"""Cleanup server process at the end of the test session."""
|
||||
yield # Run tests
|
||||
|
||||
if hasattr(request.session, "_llama_stack_server_process"):
|
||||
server_process = request.session._llama_stack_server_process
|
||||
if server_process:
|
||||
if server_process.poll() is None:
|
||||
print("Terminating llama stack server process...")
|
||||
else:
|
||||
print(f"Server process already terminated with return code: {server_process.returncode}")
|
||||
return
|
||||
try:
|
||||
server_process.terminate()
|
||||
server_process.wait(timeout=10)
|
||||
print("Server process terminated gracefully")
|
||||
except subprocess.TimeoutExpired:
|
||||
print("Server process did not terminate gracefully, killing it")
|
||||
server_process.kill()
|
||||
server_process.wait()
|
||||
print("Server process killed")
|
||||
except Exception as e:
|
||||
print(f"Error during server cleanup: {e}")
|
||||
else:
|
||||
print("Server process not found - won't be able to cleanup")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue