diff --git a/tests/integration/README.md b/tests/integration/README.md index 31d58c83f..fc8612139 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -9,7 +9,9 @@ pytest --help ``` Here are the most important options: -- `--stack-config`: specify the stack config to use. You have three ways to point to a stack: +- `--stack-config`: specify the stack config to use. You have four ways to point to a stack: + - **`server:`** - automatically start a server with the given config (e.g., `server:fireworks`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running. + - **`server::`** - same as above but with a custom port (e.g., `server:together:8322`) - a URL which points to a Llama Stack distribution server - a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file - a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface. @@ -26,12 +28,39 @@ Model parameters can be influenced by the following options: Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped if no model is specified. -Experimental, under development, options: -- `--record-responses`: record new API responses instead of using cached ones - - ## Examples +### Testing against a Server + +Run all text inference tests by auto-starting a server with the `fireworks` config: + +```bash +pytest -s -v tests/integration/inference/test_text_inference.py \ + --stack-config=server:fireworks \ + --text-model=meta-llama/Llama-3.1-8B-Instruct +``` + +Run tests with auto-server startup on a custom port: + +```bash +pytest -s -v tests/integration/inference/ \ + --stack-config=server:together:8322 \ + --text-model=meta-llama/Llama-3.1-8B-Instruct +``` + +Run multiple test suites with auto-server (eliminates manual server management): + +```bash +# Auto-start server and run all integration tests +export FIREWORKS_API_KEY= + +pytest -s -v tests/integration/inference/ tests/integration/safety/ tests/integration/agents/ \ + --stack-config=server:fireworks \ + --text-model=meta-llama/Llama-3.1-8B-Instruct +``` + +### Testing with Library Client + Run all text inference tests with the `together` distribution: ```bash diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 8b6b3ddbe..2d6092e44 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -6,9 +6,13 @@ import inspect import os +import socket +import subprocess import tempfile +import time import pytest +import requests import yaml from llama_stack_client import LlamaStackClient from openai import OpenAI @@ -17,6 +21,44 @@ from llama_stack import LlamaStackAsLibraryClient from llama_stack.distribution.stack import run_config_from_adhoc_config_spec from llama_stack.env import get_env_or_fail +DEFAULT_PORT = 8321 + + +def is_port_available(port: int, host: str = "localhost") -> bool: + """Check if a port is available for binding.""" + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.bind((host, port)) + return True + except OSError: + return False + + +def start_llama_stack_server(config_name: str) -> subprocess.Popen: + """Start a llama stack server with the given config.""" + cmd = ["llama", "stack", "run", config_name] + + # Start server in background + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + return process + + +def wait_for_server_ready(base_url: str, timeout: int = 120) -> bool: + """Wait for the server to be ready by polling the health endpoint.""" + health_url = f"{base_url}/v1/health" + start_time = time.time() + + while time.time() - start_time < timeout: + try: + response = requests.get(health_url, timeout=5) + if response.status_code == 200: + return True + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): + pass + time.sleep(0.5) + + return False + @pytest.fixture(scope="session") def provider_data(): @@ -122,6 +164,40 @@ def llama_stack_client(request, provider_data): if not config: raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG") + # Handle server: format or server:: + if config.startswith("server:"): + parts = config.split(":") + config_name = parts[1] + port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT)) + base_url = f"http://localhost:{port}" + + # Check if port is available + if is_port_available(port): + print(f"Starting llama stack server with config '{config_name}' on port {port}...") + + # Start server + server_process = start_llama_stack_server(config_name) + + # Wait for server to be ready + if not wait_for_server_ready(base_url, timeout=120): + print("Server failed to start within timeout") + server_process.terminate() + raise RuntimeError( + f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid." + ) + + print(f"Server is ready at {base_url}") + + # Store process for potential cleanup (pytest will handle termination at session end) + request.session._llama_stack_server_process = server_process + else: + print(f"Port {port} is already in use, assuming server is already running...") + + return LlamaStackClient( + base_url=base_url, + provider_data=provider_data, + ) + # check if this looks like a URL if config.startswith("http") or "//" in config: return LlamaStackClient(