mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
feat(tests): introduce a test "suite" concept to encompass dirs, options (#3339)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 4s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 4s
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Python Package Build Test / build (3.12) (push) Failing after 3s
Test External API and Providers / test-external (venv) (push) Failing after 4s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
Unit Tests / unit-tests (3.13) (push) Failing after 3s
UI Tests / ui-tests (22) (push) Successful in 33s
Pre-commit / pre-commit (push) Successful in 1m15s
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 4s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 4s
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Python Package Build Test / build (3.12) (push) Failing after 3s
Test External API and Providers / test-external (venv) (push) Failing after 4s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
Unit Tests / unit-tests (3.13) (push) Failing after 3s
UI Tests / ui-tests (22) (push) Successful in 33s
Pre-commit / pre-commit (push) Successful in 1m15s
Our integration tests need to be 'grouped' because each group often needs a specific set of models it works with. We separated vision tests due to this, and we have a separate set of tests which test "Responses" API. This PR makes this system a bit more official so it is very easy to target these groups and apply all testing infrastructure towards all the groups (for example, record-replay) uniformly. There are three suites declared: - base - vision - responses Note that our CI currently runs the "base" and "vision" suites. You can use the `--suite` option when running pytest (or any of the testing scripts or workflows.) For example: ``` OLLAMA_URL=http://localhost:11434 \ pytest -s -v tests/integration/ --stack-config starter --suite vision ```
This commit is contained in:
parent
0c2757a05b
commit
47b640370e
25 changed files with 255 additions and 161 deletions
|
@ -6,15 +6,17 @@
|
|||
import inspect
|
||||
import itertools
|
||||
import os
|
||||
import platform
|
||||
import textwrap
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from .suites import SUITE_DEFINITIONS
|
||||
|
||||
logger = get_logger(__name__, category="tests")
|
||||
|
||||
|
||||
|
@ -61,9 +63,22 @@ def pytest_configure(config):
|
|||
key, value = env_var.split("=", 1)
|
||||
os.environ[key] = value
|
||||
|
||||
if platform.system() == "Darwin": # Darwin is the system name for macOS
|
||||
os.environ["DISABLE_CODE_SANDBOX"] = "1"
|
||||
logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
|
||||
suites_raw = config.getoption("--suite")
|
||||
suites: list[str] = []
|
||||
if suites_raw:
|
||||
suites = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
|
||||
unknown = [p for p in suites if p not in SUITE_DEFINITIONS]
|
||||
if unknown:
|
||||
raise pytest.UsageError(
|
||||
f"Unknown suite(s): {', '.join(unknown)}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}"
|
||||
)
|
||||
for suite in suites:
|
||||
suite_def = SUITE_DEFINITIONS.get(suite, {})
|
||||
defaults: dict = suite_def.get("defaults", {})
|
||||
for dest, value in defaults.items():
|
||||
current = getattr(config.option, dest, None)
|
||||
if not current:
|
||||
setattr(config.option, dest, value)
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
|
@ -105,16 +120,21 @@ def pytest_addoption(parser):
|
|||
default=384,
|
||||
help="Output dimensionality of the embedding model to use for testing. Default: 384",
|
||||
)
|
||||
parser.addoption(
|
||||
"--record-responses",
|
||||
action="store_true",
|
||||
help="Record new API responses instead of using cached ones.",
|
||||
)
|
||||
parser.addoption(
|
||||
"--report",
|
||||
help="Path where the test report should be written, e.g. --report=/path/to/report.md",
|
||||
)
|
||||
|
||||
available_suites = ", ".join(sorted(SUITE_DEFINITIONS.keys()))
|
||||
suite_help = (
|
||||
"Comma-separated integration test suites to narrow collection and prefill defaults. "
|
||||
"Available: "
|
||||
f"{available_suites}. "
|
||||
"Explicit CLI flags (e.g., --text-model) override suite defaults. "
|
||||
"Examples: --suite=responses or --suite=responses,vision."
|
||||
)
|
||||
parser.addoption("--suite", help=suite_help)
|
||||
|
||||
|
||||
MODEL_SHORT_IDS = {
|
||||
"meta-llama/Llama-3.2-3B-Instruct": "3B",
|
||||
|
@ -197,3 +217,40 @@ def pytest_generate_tests(metafunc):
|
|||
|
||||
|
||||
pytest_plugins = ["tests.integration.fixtures.common"]
|
||||
|
||||
|
||||
def pytest_ignore_collect(path: str, config: pytest.Config) -> bool:
|
||||
"""Skip collecting paths outside the selected suite roots for speed."""
|
||||
suites_raw = config.getoption("--suite")
|
||||
if not suites_raw:
|
||||
return False
|
||||
|
||||
names = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
|
||||
roots: list[str] = []
|
||||
for name in names:
|
||||
suite_def = SUITE_DEFINITIONS.get(name)
|
||||
if suite_def:
|
||||
roots.extend(suite_def.get("roots", []))
|
||||
if not roots:
|
||||
return False
|
||||
|
||||
p = Path(str(path)).resolve()
|
||||
|
||||
# Only constrain within tests/integration to avoid ignoring unrelated tests
|
||||
integration_root = (Path(str(config.rootpath)) / "tests" / "integration").resolve()
|
||||
if not p.is_relative_to(integration_root):
|
||||
return False
|
||||
|
||||
for r in roots:
|
||||
rp = (Path(str(config.rootpath)) / r).resolve()
|
||||
if rp.is_file():
|
||||
# Allow the exact file and any ancestor directories so pytest can walk into it.
|
||||
if p == rp:
|
||||
return False
|
||||
if p.is_dir() and rp.is_relative_to(p):
|
||||
return False
|
||||
else:
|
||||
# Allow anything inside an allowed directory
|
||||
if p.is_relative_to(rp):
|
||||
return False
|
||||
return True
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue