feat(tests): introduce a test "suite" concept to encompass dirs, options (#3339)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 4s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 4s
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Python Package Build Test / build (3.12) (push) Failing after 3s
Test External API and Providers / test-external (venv) (push) Failing after 4s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
Unit Tests / unit-tests (3.13) (push) Failing after 3s
UI Tests / ui-tests (22) (push) Successful in 33s
Pre-commit / pre-commit (push) Successful in 1m15s

Our integration tests need to be 'grouped' because each group often
needs a specific set of models it works with. We separated vision tests
due to this, and we have a separate set of tests which test "Responses"
API.

This PR makes this system a bit more official so it is very easy to
target these groups and apply all testing infrastructure towards all the
groups (for example, record-replay) uniformly.

There are three suites declared:
- base
- vision
- responses

Note that our CI currently runs the "base" and "vision" suites.

You can use the `--suite` option when running pytest (or any of the
testing scripts or workflows.) For example:
```
OLLAMA_URL=http://localhost:11434 \
  pytest -s -v tests/integration/ --stack-config starter --suite vision
```
This commit is contained in:
Ashwin Bharambe 2025-09-05 13:58:49 -07:00 committed by GitHub
parent 0c2757a05b
commit 47b640370e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 255 additions and 161 deletions

View file

@ -6,15 +6,17 @@
import inspect
import itertools
import os
import platform
import textwrap
import time
from pathlib import Path
import pytest
from dotenv import load_dotenv
from llama_stack.log import get_logger
from .suites import SUITE_DEFINITIONS
logger = get_logger(__name__, category="tests")
@ -61,9 +63,22 @@ def pytest_configure(config):
key, value = env_var.split("=", 1)
os.environ[key] = value
if platform.system() == "Darwin": # Darwin is the system name for macOS
os.environ["DISABLE_CODE_SANDBOX"] = "1"
logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
suites_raw = config.getoption("--suite")
suites: list[str] = []
if suites_raw:
suites = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
unknown = [p for p in suites if p not in SUITE_DEFINITIONS]
if unknown:
raise pytest.UsageError(
f"Unknown suite(s): {', '.join(unknown)}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}"
)
for suite in suites:
suite_def = SUITE_DEFINITIONS.get(suite, {})
defaults: dict = suite_def.get("defaults", {})
for dest, value in defaults.items():
current = getattr(config.option, dest, None)
if not current:
setattr(config.option, dest, value)
def pytest_addoption(parser):
@ -105,16 +120,21 @@ def pytest_addoption(parser):
default=384,
help="Output dimensionality of the embedding model to use for testing. Default: 384",
)
parser.addoption(
"--record-responses",
action="store_true",
help="Record new API responses instead of using cached ones.",
)
parser.addoption(
"--report",
help="Path where the test report should be written, e.g. --report=/path/to/report.md",
)
available_suites = ", ".join(sorted(SUITE_DEFINITIONS.keys()))
suite_help = (
"Comma-separated integration test suites to narrow collection and prefill defaults. "
"Available: "
f"{available_suites}. "
"Explicit CLI flags (e.g., --text-model) override suite defaults. "
"Examples: --suite=responses or --suite=responses,vision."
)
parser.addoption("--suite", help=suite_help)
MODEL_SHORT_IDS = {
"meta-llama/Llama-3.2-3B-Instruct": "3B",
@ -197,3 +217,40 @@ def pytest_generate_tests(metafunc):
pytest_plugins = ["tests.integration.fixtures.common"]
def pytest_ignore_collect(path: str, config: pytest.Config) -> bool:
"""Skip collecting paths outside the selected suite roots for speed."""
suites_raw = config.getoption("--suite")
if not suites_raw:
return False
names = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
roots: list[str] = []
for name in names:
suite_def = SUITE_DEFINITIONS.get(name)
if suite_def:
roots.extend(suite_def.get("roots", []))
if not roots:
return False
p = Path(str(path)).resolve()
# Only constrain within tests/integration to avoid ignoring unrelated tests
integration_root = (Path(str(config.rootpath)) / "tests" / "integration").resolve()
if not p.is_relative_to(integration_root):
return False
for r in roots:
rp = (Path(str(config.rootpath)) / r).resolve()
if rp.is_file():
# Allow the exact file and any ancestor directories so pytest can walk into it.
if p == rp:
return False
if p.is_dir() and rp.is_relative_to(p):
return False
else:
# Allow anything inside an allowed directory
if p.is_relative_to(rp):
return False
return True