mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
feat: Add one-step integration testing with server auto-start
Add support for server:<config> format in --stack-config option to enable seamless one-step integration testing. This eliminates the need to manually start servers in separate terminals before running tests. Features: - Auto-start llama stack server if target port is available - Reuse existing server if port is already in use - Health check polling with 2-minute timeout - Custom port support via server:<config>:<port> - Clean test output with background server execution - Backward compatibility with all existing formats Examples: pytest tests/integration/inference/ --stack-config=server:fireworks pytest tests/integration/safety/ --stack-config=server:together:8322 Test Plan: - Verified server auto-start with available ports - Verified server reuse with occupied ports - Verified health check polling via /v1/health endpoint - Tested custom port configuration - Confirmed backward compatibility with existing config formats
This commit is contained in:
parent
958600a5c1
commit
6060353016
2 changed files with 110 additions and 5 deletions
|
@ -9,7 +9,9 @@ pytest --help
|
||||||
```
|
```
|
||||||
|
|
||||||
Here are the most important options:
|
Here are the most important options:
|
||||||
- `--stack-config`: specify the stack config to use. You have three ways to point to a stack:
|
- `--stack-config`: specify the stack config to use. You have four ways to point to a stack:
|
||||||
|
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:fireworks`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
|
||||||
|
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:together:8322`)
|
||||||
- a URL which points to a Llama Stack distribution server
|
- a URL which points to a Llama Stack distribution server
|
||||||
- a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
|
- a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
|
||||||
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
||||||
|
@ -26,12 +28,39 @@ Model parameters can be influenced by the following options:
|
||||||
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
|
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
|
||||||
if no model is specified.
|
if no model is specified.
|
||||||
|
|
||||||
Experimental, under development, options:
|
|
||||||
- `--record-responses`: record new API responses instead of using cached ones
|
|
||||||
|
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
|
### Testing against a Server
|
||||||
|
|
||||||
|
Run all text inference tests by auto-starting a server with the `fireworks` config:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest -s -v tests/integration/inference/test_text_inference.py \
|
||||||
|
--stack-config=server:fireworks \
|
||||||
|
--text-model=meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
```
|
||||||
|
|
||||||
|
Run tests with auto-server startup on a custom port:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest -s -v tests/integration/inference/ \
|
||||||
|
--stack-config=server:together:8322 \
|
||||||
|
--text-model=meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
```
|
||||||
|
|
||||||
|
Run multiple test suites with auto-server (eliminates manual server management):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Auto-start server and run all integration tests
|
||||||
|
export FIREWORKS_API_KEY=<your_key>
|
||||||
|
|
||||||
|
pytest -s -v tests/integration/inference/ tests/integration/safety/ tests/integration/agents/ \
|
||||||
|
--stack-config=server:fireworks \
|
||||||
|
--text-model=meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing with Library Client
|
||||||
|
|
||||||
Run all text inference tests with the `together` distribution:
|
Run all text inference tests with the `together` distribution:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
@ -6,9 +6,13 @@
|
||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
import os
|
import os
|
||||||
|
import socket
|
||||||
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import requests
|
||||||
import yaml
|
import yaml
|
||||||
from llama_stack_client import LlamaStackClient
|
from llama_stack_client import LlamaStackClient
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
@ -17,6 +21,44 @@ from llama_stack import LlamaStackAsLibraryClient
|
||||||
from llama_stack.distribution.stack import run_config_from_adhoc_config_spec
|
from llama_stack.distribution.stack import run_config_from_adhoc_config_spec
|
||||||
from llama_stack.env import get_env_or_fail
|
from llama_stack.env import get_env_or_fail
|
||||||
|
|
||||||
|
DEFAULT_PORT = 8321
|
||||||
|
|
||||||
|
|
||||||
|
def is_port_available(port: int, host: str = "localhost") -> bool:
|
||||||
|
"""Check if a port is available for binding."""
|
||||||
|
try:
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
||||||
|
sock.bind((host, port))
|
||||||
|
return True
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def start_llama_stack_server(config_name: str) -> subprocess.Popen:
|
||||||
|
"""Start a llama stack server with the given config."""
|
||||||
|
cmd = ["llama", "stack", "run", config_name]
|
||||||
|
|
||||||
|
# Start server in background
|
||||||
|
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
|
return process
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_server_ready(base_url: str, timeout: int = 120) -> bool:
|
||||||
|
"""Wait for the server to be ready by polling the health endpoint."""
|
||||||
|
health_url = f"{base_url}/v1/health"
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
while time.time() - start_time < timeout:
|
||||||
|
try:
|
||||||
|
response = requests.get(health_url, timeout=5)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return True
|
||||||
|
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
||||||
|
pass
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def provider_data():
|
def provider_data():
|
||||||
|
@ -122,6 +164,40 @@ def llama_stack_client(request, provider_data):
|
||||||
if not config:
|
if not config:
|
||||||
raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
|
raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
|
||||||
|
|
||||||
|
# Handle server:<config_name> format or server:<config_name>:<port>
|
||||||
|
if config.startswith("server:"):
|
||||||
|
parts = config.split(":")
|
||||||
|
config_name = parts[1]
|
||||||
|
port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
|
||||||
|
base_url = f"http://localhost:{port}"
|
||||||
|
|
||||||
|
# Check if port is available
|
||||||
|
if is_port_available(port):
|
||||||
|
print(f"Starting llama stack server with config '{config_name}' on port {port}...")
|
||||||
|
|
||||||
|
# Start server
|
||||||
|
server_process = start_llama_stack_server(config_name)
|
||||||
|
|
||||||
|
# Wait for server to be ready
|
||||||
|
if not wait_for_server_ready(base_url, timeout=120):
|
||||||
|
print("Server failed to start within timeout")
|
||||||
|
server_process.terminate()
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid."
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Server is ready at {base_url}")
|
||||||
|
|
||||||
|
# Store process for potential cleanup (pytest will handle termination at session end)
|
||||||
|
request.session._llama_stack_server_process = server_process
|
||||||
|
else:
|
||||||
|
print(f"Port {port} is already in use, assuming server is already running...")
|
||||||
|
|
||||||
|
return LlamaStackClient(
|
||||||
|
base_url=base_url,
|
||||||
|
provider_data=provider_data,
|
||||||
|
)
|
||||||
|
|
||||||
# check if this looks like a URL
|
# check if this looks like a URL
|
||||||
if config.startswith("http") or "//" in config:
|
if config.startswith("http") or "//" in config:
|
||||||
return LlamaStackClient(
|
return LlamaStackClient(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue