feat(tests): migrate to global "setups" system for test configuration

This PR refactors the integration test system from suite-specific "contexts" to global "setups". This provides better separation of concerns: **suites = what to test, setups = how to configure.** Key changes: - New `tests/integration/setups.py` with global, reusable configurations (ollama, vllm, gpt, claude) - Simplified `tests/integration/suites.py` to only define test collection roots + default setup references - Updated CLI from `--context` to `--setup` parameter that works with any suite - Modified `scripts/integration-tests.sh` to use `--test-setup` instead of `--test-context` - Updated documentation to reflect the new global setup system Benefits: - Setups can be reused across multiple suites (e.g., use "gpt" with any suite) - Clear separation between test selection (suites) and configuration (setups) - Easier to add new configurations without modifying existing suites - Centralized configuration management Usage examples: - `pytest tests/integration --suite=responses --setup=gpt` - `pytest tests/integration --suite=vision --setup=ollama` - `pytest tests/integration --suite=base --setup=vllm`
2025-10-05 12:21:52 +00:00 · 2025-09-08 14:56:08 -07:00 · 2025-09-08 14:56:08 -07:00 · c662d8aa31
commit c662d8aa31
parent 47b640370e
10 changed files with 272 additions and 178 deletions
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -15,7 +15,7 @@ from dotenv import load_dotenv

 from llama_stack.log import get_logger

-from .suites import SUITE_DEFINITIONS
+from .suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS

 logger = get_logger(__name__, category="tests")

@ -63,19 +63,33 @@ def pytest_configure(config):
        key, value = env_var.split("=", 1)
        os.environ[key] = value

-    suites_raw = config.getoption("--suite")
-    suites: list[str] = []
-    if suites_raw:
-        suites = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
-        unknown = [p for p in suites if p not in SUITE_DEFINITIONS]
-        if unknown:
+    inference_mode = config.getoption("--inference-mode")
+    os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = inference_mode
+
+    suite = config.getoption("--suite")
+    if suite:
+        if suite not in SUITE_DEFINITIONS:
+            raise pytest.UsageError(f"Unknown suite: {suite}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}")
+
+    # Apply setups (global parameterizations): env + defaults
+    setup = config.getoption("--setup")
+    if suite and not setup:
+        setup = SUITE_DEFINITIONS[suite].default_setup
+
+    if setup:
+        if setup not in SETUP_DEFINITIONS:
            raise pytest.UsageError(
-                f"Unknown suite(s): {', '.join(unknown)}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}"
+                f"Unknown setup '{setup}'. Available: {', '.join(sorted(SETUP_DEFINITIONS.keys()))}"
            )
-    for suite in suites:
-        suite_def = SUITE_DEFINITIONS.get(suite, {})
-        defaults: dict = suite_def.get("defaults", {})
-        for dest, value in defaults.items():
+
+        setup_obj = SETUP_DEFINITIONS[setup]
+        logger.info(f"Applying setup '{setup}'{' for suite ' + suite if suite else ''}")
+        # Apply env first
+        for k, v in setup_obj.env.items():
+            if k not in os.environ:
+                os.environ[k] = str(v)
+        # Apply defaults if not provided explicitly
+        for dest, value in setup_obj.defaults.items():
            current = getattr(config.option, dest, None)
            if not current:
                setattr(config.option, dest, value)
@ -120,6 +134,13 @@ def pytest_addoption(parser):
        default=384,
        help="Output dimensionality of the embedding model to use for testing. Default: 384",
    )
+
+    parser.addoption(
+        "--inference-mode",
+        help="Inference mode: { record, replay, live } (default: replay)",
+        choices=["record", "replay", "live"],
+        default="replay",
+    )
    parser.addoption(
        "--report",
        help="Path where the test report should be written, e.g. --report=/path/to/report.md",
@ -127,14 +148,18 @@ def pytest_addoption(parser):

    available_suites = ", ".join(sorted(SUITE_DEFINITIONS.keys()))
    suite_help = (
-        "Comma-separated integration test suites to narrow collection and prefill defaults. "
-        "Available: "
-        f"{available_suites}. "
-        "Explicit CLI flags (e.g., --text-model) override suite defaults. "
-        "Examples: --suite=responses or --suite=responses,vision."
+        f"Single test suite to run (narrows collection). Available: {available_suites}. Example: --suite=responses"
    )
    parser.addoption("--suite", help=suite_help)

+    # Global setups for any suite
+    available_setups = ", ".join(sorted(SETUP_DEFINITIONS.keys()))
+    setup_help = (
+        f"Global test setup configuration. Available: {available_setups}. "
+        "Can be used with any suite. Example: --setup=ollama"
+    )
+    parser.addoption("--setup", help=setup_help)
+

 MODEL_SHORT_IDS = {
    "meta-llama/Llama-3.2-3B-Instruct": "3B",
@ -221,16 +246,12 @@ pytest_plugins = ["tests.integration.fixtures.common"]

 def pytest_ignore_collect(path: str, config: pytest.Config) -> bool:
    """Skip collecting paths outside the selected suite roots for speed."""
-    suites_raw = config.getoption("--suite")
-    if not suites_raw:
+    suite = config.getoption("--suite")
+    if not suite:
        return False

-    names = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
-    roots: list[str] = []
-    for name in names:
-        suite_def = SUITE_DEFINITIONS.get(name)
-        if suite_def:
-            roots.extend(suite_def.get("roots", []))
+    sobj = SUITE_DEFINITIONS.get(suite)
+    roots: list[str] = sobj.get("roots", []) if isinstance(sobj, dict) else getattr(sobj, "roots", [])
    if not roots:
        return False