ci: let pytest run the distro server (#2586)

# What does this PR do?

* Use #2580 functionality to auto-start the server with the tests
* Reduce timeout to 30sec
* Print server logs on errors
* Pytest logs are collected to a file pytest.log

Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
Sébastien Han 2025-07-03 19:51:46 +02:00 committed by GitHub
parent f4950f4ef0
commit dae1fcd3c2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 56 additions and 42 deletions

View file

@ -37,26 +37,42 @@ def is_port_available(port: int, host: str = "localhost") -> bool:
def start_llama_stack_server(config_name: str) -> subprocess.Popen:
"""Start a llama stack server with the given config."""
cmd = ["llama", "stack", "run", config_name]
# Start server in background
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
devnull = open(os.devnull, "w")
process = subprocess.Popen(
cmd,
stdout=devnull, # redirect stdout to devnull to prevent deadlock
stderr=devnull, # redirect stderr to devnull to prevent deadlock
text=True,
env={**os.environ, "LLAMA_STACK_LOG_FILE": "server.log"},
)
return process
def wait_for_server_ready(base_url: str, timeout: int = 120) -> bool:
def wait_for_server_ready(base_url: str, timeout: int = 30, process: subprocess.Popen | None = None) -> bool:
"""Wait for the server to be ready by polling the health endpoint."""
health_url = f"{base_url}/v1/health"
start_time = time.time()
while time.time() - start_time < timeout:
if process and process.poll() is not None:
print(f"Server process terminated with return code: {process.returncode}")
return False
try:
response = requests.get(health_url, timeout=5)
if response.status_code == 200:
return True
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
pass
# Print progress every 5 seconds
elapsed = time.time() - start_time
if int(elapsed) % 5 == 0 and elapsed > 0:
print(f"Waiting for server at {base_url}... ({elapsed:.1f}s elapsed)")
time.sleep(0.5)
print(f"Server failed to respond within {timeout} seconds")
return False
@ -179,11 +195,12 @@ def llama_stack_client(request, provider_data):
server_process = start_llama_stack_server(config_name)
# Wait for server to be ready
if not wait_for_server_ready(base_url, timeout=120):
if not wait_for_server_ready(base_url, timeout=30, process=server_process):
print("Server failed to start within timeout")
server_process.terminate()
raise RuntimeError(
f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid."
f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
f"See server.log for details."
)
print(f"Server is ready at {base_url}")
@ -227,3 +244,31 @@ def llama_stack_client(request, provider_data):
def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1"
return OpenAI(base_url=base_url, api_key="fake")
@pytest.fixture(scope="session", autouse=True)
def cleanup_server_process(request):
"""Cleanup server process at the end of the test session."""
yield # Run tests
if hasattr(request.session, "_llama_stack_server_process"):
server_process = request.session._llama_stack_server_process
if server_process:
if server_process.poll() is None:
print("Terminating llama stack server process...")
else:
print(f"Server process already terminated with return code: {server_process.returncode}")
return
try:
server_process.terminate()
server_process.wait(timeout=10)
print("Server process terminated gracefully")
except subprocess.TimeoutExpired:
print("Server process did not terminate gracefully, killing it")
server_process.kill()
server_process.wait()
print("Server process killed")
except Exception as e:
print(f"Error during server cleanup: {e}")
else:
print("Server process not found - won't be able to cleanup")