mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 20:14:13 +00:00
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 4s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 4s
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Python Package Build Test / build (3.12) (push) Failing after 3s
Test External API and Providers / test-external (venv) (push) Failing after 4s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
Unit Tests / unit-tests (3.13) (push) Failing after 3s
UI Tests / ui-tests (22) (push) Successful in 33s
Pre-commit / pre-commit (push) Successful in 1m15s
Our integration tests need to be 'grouped' because each group often needs a specific set of models it works with. We separated vision tests due to this, and we have a separate set of tests which test "Responses" API. This PR makes this system a bit more official so it is very easy to target these groups and apply all testing infrastructure towards all the groups (for example, record-replay) uniformly. There are three suites declared: - base - vision - responses Note that our CI currently runs the "base" and "vision" suites. You can use the `--suite` option when running pytest (or any of the testing scripts or workflows.) For example: ``` OLLAMA_URL=http://localhost:11434 \ pytest -s -v tests/integration/ --stack-config starter --suite vision ```
256 lines
8.8 KiB
Python
256 lines
8.8 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
import inspect
|
|
import itertools
|
|
import os
|
|
import textwrap
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from dotenv import load_dotenv
|
|
|
|
from llama_stack.log import get_logger
|
|
|
|
from .suites import SUITE_DEFINITIONS
|
|
|
|
logger = get_logger(__name__, category="tests")
|
|
|
|
|
|
@pytest.hookimpl(hookwrapper=True)
|
|
def pytest_runtest_makereport(item, call):
|
|
outcome = yield
|
|
report = outcome.get_result()
|
|
if report.when == "call":
|
|
item.execution_outcome = report.outcome
|
|
item.was_xfail = getattr(report, "wasxfail", False)
|
|
|
|
|
|
def pytest_sessionstart(session):
|
|
# stop macOS from complaining about duplicate OpenMP libraries
|
|
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
|
if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ:
|
|
os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"
|
|
|
|
|
|
def pytest_runtest_teardown(item):
|
|
# Check if the test actually ran and passed or failed, but was not skipped or an expected failure (xfail)
|
|
outcome = getattr(item, "execution_outcome", None)
|
|
was_xfail = getattr(item, "was_xfail", False)
|
|
|
|
name = item.nodeid
|
|
if not any(x in name for x in ("inference/", "safety/", "agents/")):
|
|
return
|
|
|
|
logger.debug(f"Test '{item.nodeid}' outcome was '{outcome}' (xfail={was_xfail})")
|
|
if outcome in ("passed", "failed") and not was_xfail:
|
|
interval_seconds = os.getenv("LLAMA_STACK_TEST_INTERVAL_SECONDS")
|
|
if interval_seconds:
|
|
time.sleep(float(interval_seconds))
|
|
|
|
|
|
def pytest_configure(config):
|
|
config.option.tbstyle = "short"
|
|
config.option.disable_warnings = True
|
|
|
|
load_dotenv()
|
|
|
|
env_vars = config.getoption("--env") or []
|
|
for env_var in env_vars:
|
|
key, value = env_var.split("=", 1)
|
|
os.environ[key] = value
|
|
|
|
suites_raw = config.getoption("--suite")
|
|
suites: list[str] = []
|
|
if suites_raw:
|
|
suites = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
|
|
unknown = [p for p in suites if p not in SUITE_DEFINITIONS]
|
|
if unknown:
|
|
raise pytest.UsageError(
|
|
f"Unknown suite(s): {', '.join(unknown)}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}"
|
|
)
|
|
for suite in suites:
|
|
suite_def = SUITE_DEFINITIONS.get(suite, {})
|
|
defaults: dict = suite_def.get("defaults", {})
|
|
for dest, value in defaults.items():
|
|
current = getattr(config.option, dest, None)
|
|
if not current:
|
|
setattr(config.option, dest, value)
|
|
|
|
|
|
def pytest_addoption(parser):
|
|
parser.addoption(
|
|
"--stack-config",
|
|
help=textwrap.dedent(
|
|
"""
|
|
a 'pointer' to the stack. this can be either be:
|
|
(a) a template name like `starter`, or
|
|
(b) a path to a run.yaml file, or
|
|
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`
|
|
"""
|
|
),
|
|
)
|
|
parser.addoption("--env", action="append", help="Set environment variables, e.g. --env KEY=value")
|
|
parser.addoption(
|
|
"--text-model",
|
|
help="comma-separated list of text models. Fixture name: text_model_id",
|
|
)
|
|
parser.addoption(
|
|
"--vision-model",
|
|
help="comma-separated list of vision models. Fixture name: vision_model_id",
|
|
)
|
|
parser.addoption(
|
|
"--embedding-model",
|
|
help="comma-separated list of embedding models. Fixture name: embedding_model_id",
|
|
)
|
|
parser.addoption(
|
|
"--safety-shield",
|
|
help="comma-separated list of safety shields. Fixture name: shield_id",
|
|
)
|
|
parser.addoption(
|
|
"--judge-model",
|
|
help="Specify the judge model to use for testing",
|
|
)
|
|
parser.addoption(
|
|
"--embedding-dimension",
|
|
type=int,
|
|
default=384,
|
|
help="Output dimensionality of the embedding model to use for testing. Default: 384",
|
|
)
|
|
parser.addoption(
|
|
"--report",
|
|
help="Path where the test report should be written, e.g. --report=/path/to/report.md",
|
|
)
|
|
|
|
available_suites = ", ".join(sorted(SUITE_DEFINITIONS.keys()))
|
|
suite_help = (
|
|
"Comma-separated integration test suites to narrow collection and prefill defaults. "
|
|
"Available: "
|
|
f"{available_suites}. "
|
|
"Explicit CLI flags (e.g., --text-model) override suite defaults. "
|
|
"Examples: --suite=responses or --suite=responses,vision."
|
|
)
|
|
parser.addoption("--suite", help=suite_help)
|
|
|
|
|
|
MODEL_SHORT_IDS = {
|
|
"meta-llama/Llama-3.2-3B-Instruct": "3B",
|
|
"meta-llama/Llama-3.1-8B-Instruct": "8B",
|
|
"meta-llama/Llama-3.1-70B-Instruct": "70B",
|
|
"meta-llama/Llama-3.1-405B-Instruct": "405B",
|
|
"meta-llama/Llama-3.2-11B-Vision-Instruct": "11B",
|
|
"meta-llama/Llama-3.2-90B-Vision-Instruct": "90B",
|
|
"meta-llama/Llama-3.3-70B-Instruct": "70B",
|
|
"meta-llama/Llama-Guard-3-1B": "Guard1B",
|
|
"meta-llama/Llama-Guard-3-8B": "Guard8B",
|
|
"all-MiniLM-L6-v2": "MiniLM",
|
|
}
|
|
|
|
|
|
def get_short_id(value):
|
|
return MODEL_SHORT_IDS.get(value, value)
|
|
|
|
|
|
def pytest_generate_tests(metafunc):
|
|
"""
|
|
This is the main function which processes CLI arguments and generates various combinations of parameters.
|
|
It is also responsible for generating test IDs which are succinct enough.
|
|
|
|
Each option can be comma separated list of values which results in multiple parameter combinations.
|
|
"""
|
|
params = []
|
|
param_values = {}
|
|
id_parts = []
|
|
|
|
# Map of fixture name to its CLI option and ID prefix
|
|
fixture_configs = {
|
|
"text_model_id": ("--text-model", "txt"),
|
|
"vision_model_id": ("--vision-model", "vis"),
|
|
"embedding_model_id": ("--embedding-model", "emb"),
|
|
"shield_id": ("--safety-shield", "shield"),
|
|
"judge_model_id": ("--judge-model", "judge"),
|
|
"embedding_dimension": ("--embedding-dimension", "dim"),
|
|
}
|
|
|
|
# Collect all parameters and their values
|
|
for fixture_name, (option, id_prefix) in fixture_configs.items():
|
|
if fixture_name not in metafunc.fixturenames:
|
|
continue
|
|
|
|
params.append(fixture_name)
|
|
val = metafunc.config.getoption(option)
|
|
|
|
values = [v.strip() for v in str(val).split(",")] if val else [None]
|
|
param_values[fixture_name] = values
|
|
if val:
|
|
id_parts.extend(f"{id_prefix}={get_short_id(v)}" for v in values)
|
|
|
|
if not params:
|
|
return
|
|
|
|
# Generate all combinations of parameter values
|
|
value_combinations = list(itertools.product(*[param_values[p] for p in params]))
|
|
|
|
# Generate test IDs
|
|
test_ids = []
|
|
non_empty_params = [(i, values) for i, values in enumerate(param_values.values()) if values[0] is not None]
|
|
|
|
# Get actual function parameters using inspect
|
|
test_func_params = set(inspect.signature(metafunc.function).parameters.keys())
|
|
|
|
if non_empty_params:
|
|
# For each combination, build an ID from the non-None parameters
|
|
for combo in value_combinations:
|
|
parts = []
|
|
for param_name, val in zip(params, combo, strict=True):
|
|
# Only include if parameter is in test function signature and value is meaningful
|
|
if param_name in test_func_params and val:
|
|
prefix = fixture_configs[param_name][1] # Get the ID prefix
|
|
parts.append(f"{prefix}={get_short_id(val)}")
|
|
if parts:
|
|
test_ids.append(":".join(parts))
|
|
|
|
metafunc.parametrize(params, value_combinations, scope="session", ids=test_ids if test_ids else None)
|
|
|
|
|
|
pytest_plugins = ["tests.integration.fixtures.common"]
|
|
|
|
|
|
def pytest_ignore_collect(path: str, config: pytest.Config) -> bool:
|
|
"""Skip collecting paths outside the selected suite roots for speed."""
|
|
suites_raw = config.getoption("--suite")
|
|
if not suites_raw:
|
|
return False
|
|
|
|
names = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
|
|
roots: list[str] = []
|
|
for name in names:
|
|
suite_def = SUITE_DEFINITIONS.get(name)
|
|
if suite_def:
|
|
roots.extend(suite_def.get("roots", []))
|
|
if not roots:
|
|
return False
|
|
|
|
p = Path(str(path)).resolve()
|
|
|
|
# Only constrain within tests/integration to avoid ignoring unrelated tests
|
|
integration_root = (Path(str(config.rootpath)) / "tests" / "integration").resolve()
|
|
if not p.is_relative_to(integration_root):
|
|
return False
|
|
|
|
for r in roots:
|
|
rp = (Path(str(config.rootpath)) / r).resolve()
|
|
if rp.is_file():
|
|
# Allow the exact file and any ancestor directories so pytest can walk into it.
|
|
if p == rp:
|
|
return False
|
|
if p.is_dir() and rp.is_relative_to(p):
|
|
return False
|
|
else:
|
|
# Allow anything inside an allowed directory
|
|
if p.is_relative_to(rp):
|
|
return False
|
|
return True
|