feat(tests): introduce a test "suite" concept to encompass dirs, options

This commit is contained in:
Ashwin Bharambe 2025-09-03 14:10:05 -07:00
parent c3d3a0b833
commit 99a2529909
24 changed files with 234 additions and 133 deletions

View file

@ -18,10 +18,10 @@ inputs:
inference-mode: inference-mode:
description: 'Inference mode (record or replay)' description: 'Inference mode (record or replay)'
required: true required: true
run-vision-tests: test-suite:
description: 'Whether to run vision tests' description: 'Test suite to use: base, responses, vision, etc.'
required: false required: false
default: 'false' default: ''
runs: runs:
using: 'composite' using: 'composite'
@ -42,7 +42,7 @@ runs:
--test-subdirs '${{ inputs.test-subdirs }}' \ --test-subdirs '${{ inputs.test-subdirs }}' \
--test-pattern '${{ inputs.test-pattern }}' \ --test-pattern '${{ inputs.test-pattern }}' \
--inference-mode '${{ inputs.inference-mode }}' \ --inference-mode '${{ inputs.inference-mode }}' \
${{ inputs.run-vision-tests == 'true' && '--run-vision-tests' || '' }} \ --test-suite '${{ inputs.test-suite }}' \
| tee pytest-${{ inputs.inference-mode }}.log | tee pytest-${{ inputs.inference-mode }}.log
@ -57,12 +57,7 @@ runs:
echo "New recordings detected, committing and pushing" echo "New recordings detected, committing and pushing"
git add tests/integration/recordings/ git add tests/integration/recordings/
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then git commit -m "Recordings update from CI (test-suite: ${{ inputs.test-suite }})"
git commit -m "Recordings update from CI (vision)"
else
git commit -m "Recordings update from CI"
fi
git fetch origin ${{ github.ref_name }} git fetch origin ${{ github.ref_name }}
git rebase origin/${{ github.ref_name }} git rebase origin/${{ github.ref_name }}
echo "Rebased successfully" echo "Rebased successfully"

View file

@ -1,17 +1,17 @@
name: Setup Ollama name: Setup Ollama
description: Start Ollama description: Start Ollama
inputs: inputs:
run-vision-tests: test-suite:
description: 'Run vision tests: "true" or "false"' description: 'Test suite to use: base, responses, vision, etc.'
required: false required: false
default: 'false' default: ''
runs: runs:
using: "composite" using: "composite"
steps: steps:
- name: Start Ollama - name: Start Ollama
shell: bash shell: bash
run: | run: |
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then if [ "${{ inputs.test-suite }}" == "vision" ]; then
image="ollama-with-vision-model" image="ollama-with-vision-model"
else else
image="ollama-with-models" image="ollama-with-models"

View file

@ -12,10 +12,10 @@ inputs:
description: 'Provider to setup (ollama or vllm)' description: 'Provider to setup (ollama or vllm)'
required: true required: true
default: 'ollama' default: 'ollama'
run-vision-tests: test-suite:
description: 'Whether to setup provider for vision tests' description: 'Test suite to use: base, responses, vision, etc.'
required: false required: false
default: 'false' default: ''
inference-mode: inference-mode:
description: 'Inference mode (record or replay)' description: 'Inference mode (record or replay)'
required: true required: true
@ -33,7 +33,7 @@ runs:
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }} if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-ollama uses: ./.github/actions/setup-ollama
with: with:
run-vision-tests: ${{ inputs.run-vision-tests }} test-suite: ${{ inputs.test-suite }}
- name: Setup vllm - name: Setup vllm
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }} if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}

View file

@ -50,7 +50,7 @@ jobs:
run-replay-mode-tests: run-replay-mode-tests:
runs-on: ubuntu-latest runs-on: ubuntu-latest
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }} name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.test-suite) }}
strategy: strategy:
fail-fast: false fail-fast: false
@ -61,7 +61,7 @@ jobs:
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
run-vision-tests: [true, false] test-suite: [base, vision]
steps: steps:
- name: Checkout repository - name: Checkout repository
@ -73,7 +73,7 @@ jobs:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
client-version: ${{ matrix.client-version }} client-version: ${{ matrix.client-version }}
provider: ${{ matrix.provider }} provider: ${{ matrix.provider }}
run-vision-tests: ${{ matrix.run-vision-tests }} test-suite: ${{ matrix.test-suite }}
inference-mode: 'replay' inference-mode: 'replay'
- name: Run tests - name: Run tests
@ -84,4 +84,4 @@ jobs:
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }} stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
provider: ${{ matrix.provider }} provider: ${{ matrix.provider }}
inference-mode: 'replay' inference-mode: 'replay'
run-vision-tests: ${{ matrix.run-vision-tests }} test-suite: ${{ matrix.test-suite }}

View file

@ -18,10 +18,10 @@ on:
description: 'Test against a specific provider' description: 'Test against a specific provider'
type: string type: string
default: 'ollama' default: 'ollama'
run-vision-tests: test-suite:
description: 'Whether to run vision tests' description: 'Test suite to use: base, responses, vision, etc.'
type: boolean type: string
default: false default: ''
test-pattern: test-pattern:
description: 'Regex pattern to pass to pytest -k' description: 'Regex pattern to pass to pytest -k'
type: string type: string
@ -40,7 +40,7 @@ jobs:
echo "::group::Workflow Inputs" echo "::group::Workflow Inputs"
echo "test-subdirs: ${{ inputs.test-subdirs }}" echo "test-subdirs: ${{ inputs.test-subdirs }}"
echo "test-provider: ${{ inputs.test-provider }}" echo "test-provider: ${{ inputs.test-provider }}"
echo "run-vision-tests: ${{ inputs.run-vision-tests }}" echo "test-suite: ${{ inputs.test-suite }}"
echo "test-pattern: ${{ inputs.test-pattern }}" echo "test-pattern: ${{ inputs.test-pattern }}"
echo "branch: ${{ github.ref_name }}" echo "branch: ${{ github.ref_name }}"
echo "::endgroup::" echo "::endgroup::"
@ -56,7 +56,7 @@ jobs:
python-version: "3.12" # Use single Python version for recording python-version: "3.12" # Use single Python version for recording
client-version: "latest" client-version: "latest"
provider: ${{ inputs.test-provider || 'ollama' }} provider: ${{ inputs.test-provider || 'ollama' }}
run-vision-tests: ${{ inputs.run-vision-tests }} test-suite: ${{ inputs.test-suite }}
inference-mode: 'record' inference-mode: 'record'
- name: Run and record tests - name: Run and record tests
@ -67,4 +67,4 @@ jobs:
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
provider: ${{ inputs.test-provider || 'ollama' }} provider: ${{ inputs.test-provider || 'ollama' }}
inference-mode: 'record' inference-mode: 'record'
run-vision-tests: ${{ inputs.run-vision-tests }} test-suite: ${{ inputs.test-suite }}

View file

@ -15,7 +15,7 @@ set -euo pipefail
BRANCH="" BRANCH=""
TEST_SUBDIRS="" TEST_SUBDIRS=""
TEST_PROVIDER="ollama" TEST_PROVIDER="ollama"
RUN_VISION_TESTS=false TEST_SUITE="base"
TEST_PATTERN="" TEST_PATTERN=""
# Help function # Help function
@ -27,9 +27,9 @@ Trigger the integration test recording workflow remotely. This way you do not ne
OPTIONS: OPTIONS:
-b, --branch BRANCH Branch to run the workflow on (defaults to current branch) -b, --branch BRANCH Branch to run the workflow on (defaults to current branch)
-s, --test-subdirs DIRS Comma-separated list of test subdirectories to run (REQUIRED)
-p, --test-provider PROVIDER Test provider to use: vllm or ollama (default: ollama) -p, --test-provider PROVIDER Test provider to use: vllm or ollama (default: ollama)
-v, --run-vision-tests Include vision tests in the recording -t, --test-suite SUITE Test suite to use: base, responses, vision, etc. (default: base)
-s, --test-subdirs DIRS Comma-separated list of test subdirectories to run (overrides suite)
-k, --test-pattern PATTERN Regex pattern to pass to pytest -k -k, --test-pattern PATTERN Regex pattern to pass to pytest -k
-h, --help Show this help message -h, --help Show this help message
@ -38,7 +38,7 @@ EXAMPLES:
$0 --test-subdirs "agents" $0 --test-subdirs "agents"
# Record tests for specific branch with vision tests # Record tests for specific branch with vision tests
$0 -b my-feature-branch --test-subdirs "inference" --run-vision-tests $0 -b my-feature-branch --test-suite vision
# Record multiple test subdirectories with specific provider # Record multiple test subdirectories with specific provider
$0 --test-subdirs "agents,inference" --test-provider vllm $0 --test-subdirs "agents,inference" --test-provider vllm
@ -71,9 +71,9 @@ while [[ $# -gt 0 ]]; do
TEST_PROVIDER="$2" TEST_PROVIDER="$2"
shift 2 shift 2
;; ;;
-v|--run-vision-tests) -t|--test-suite)
RUN_VISION_TESTS=true TEST_SUITE="$2"
shift shift 2
;; ;;
-k|--test-pattern) -k|--test-pattern)
TEST_PATTERN="$2" TEST_PATTERN="$2"
@ -92,11 +92,11 @@ while [[ $# -gt 0 ]]; do
done done
# Validate required parameters # Validate required parameters
if [[ -z "$TEST_SUBDIRS" ]]; then if [[ -z "$TEST_SUBDIRS" && -z "$TEST_SUITE" ]]; then
echo "Error: --test-subdirs is required" echo "Error: --test-subdirs or --test-suite is required"
echo "Please specify which test subdirectories to run, e.g.:" echo "Please specify which test subdirectories to run or test suite to use, e.g.:"
echo " $0 --test-subdirs \"agents,inference\"" echo " $0 --test-subdirs \"agents,inference\""
echo " $0 --test-subdirs \"inference\" --run-vision-tests" echo " $0 --test-suite vision"
echo "" echo ""
exit 1 exit 1
fi fi
@ -239,17 +239,19 @@ echo "Triggering integration test recording workflow..."
echo "Branch: $BRANCH" echo "Branch: $BRANCH"
echo "Test provider: $TEST_PROVIDER" echo "Test provider: $TEST_PROVIDER"
echo "Test subdirs: $TEST_SUBDIRS" echo "Test subdirs: $TEST_SUBDIRS"
echo "Run vision tests: $RUN_VISION_TESTS" echo "Test suite: $TEST_SUITE"
echo "Test pattern: ${TEST_PATTERN:-"(none)"}" echo "Test pattern: ${TEST_PATTERN:-"(none)"}"
echo "" echo ""
# Prepare inputs for gh workflow run # Prepare inputs for gh workflow run
if [[ -n "$TEST_SUBDIRS" ]]; then
INPUTS="-f test-subdirs='$TEST_SUBDIRS'" INPUTS="-f test-subdirs='$TEST_SUBDIRS'"
fi
if [[ -n "$TEST_PROVIDER" ]]; then if [[ -n "$TEST_PROVIDER" ]]; then
INPUTS="$INPUTS -f test-provider='$TEST_PROVIDER'" INPUTS="$INPUTS -f test-provider='$TEST_PROVIDER'"
fi fi
if [[ "$RUN_VISION_TESTS" == "true" ]]; then if [[ -n "$TEST_SUITE" ]]; then
INPUTS="$INPUTS -f run-vision-tests=true" INPUTS="$INPUTS -f test-suite='$TEST_SUITE'"
fi fi
if [[ -n "$TEST_PATTERN" ]]; then if [[ -n "$TEST_PATTERN" ]]; then
INPUTS="$INPUTS -f test-pattern='$TEST_PATTERN'" INPUTS="$INPUTS -f test-pattern='$TEST_PATTERN'"

View file

@ -16,7 +16,7 @@ STACK_CONFIG=""
PROVIDER="" PROVIDER=""
TEST_SUBDIRS="" TEST_SUBDIRS=""
TEST_PATTERN="" TEST_PATTERN=""
RUN_VISION_TESTS="false" TEST_SUITE="base"
INFERENCE_MODE="replay" INFERENCE_MODE="replay"
EXTRA_PARAMS="" EXTRA_PARAMS=""
@ -28,9 +28,9 @@ Usage: $0 [OPTIONS]
Options: Options:
--stack-config STRING Stack configuration to use (required) --stack-config STRING Stack configuration to use (required)
--provider STRING Provider to use (ollama, vllm, etc.) (required) --provider STRING Provider to use (ollama, vllm, etc.) (required)
--test-subdirs STRING Comma-separated list of test subdirectories to run (default: 'inference') --test-suite STRING Comma-separated list of test suites to run (default: 'base')
--run-vision-tests Run vision tests instead of regular tests
--inference-mode STRING Inference mode: record or replay (default: replay) --inference-mode STRING Inference mode: record or replay (default: replay)
--test-subdirs STRING Comma-separated list of test subdirectories to run (overrides suite)
--test-pattern STRING Regex pattern to pass to pytest -k --test-pattern STRING Regex pattern to pass to pytest -k
--help Show this help message --help Show this help message
@ -42,7 +42,7 @@ Examples:
$0 --stack-config server:ci-tests --provider vllm --test-subdirs 'inference,agents' $0 --stack-config server:ci-tests --provider vllm --test-subdirs 'inference,agents'
# Vision tests with ollama # Vision tests with ollama
$0 --stack-config server:ci-tests --provider ollama --run-vision-tests $0 --stack-config server:ci-tests --provider ollama --test-suite vision
# Record mode for updating test recordings # Record mode for updating test recordings
$0 --stack-config server:ci-tests --provider ollama --inference-mode record $0 --stack-config server:ci-tests --provider ollama --inference-mode record
@ -64,9 +64,9 @@ while [[ $# -gt 0 ]]; do
TEST_SUBDIRS="$2" TEST_SUBDIRS="$2"
shift 2 shift 2
;; ;;
--run-vision-tests) --test-suite)
RUN_VISION_TESTS="true" TEST_SUITE="$2"
shift shift 2
;; ;;
--inference-mode) --inference-mode)
INFERENCE_MODE="$2" INFERENCE_MODE="$2"
@ -92,22 +92,25 @@ done
# Validate required parameters # Validate required parameters
if [[ -z "$STACK_CONFIG" ]]; then if [[ -z "$STACK_CONFIG" ]]; then
echo "Error: --stack-config is required" echo "Error: --stack-config is required"
usage
exit 1 exit 1
fi fi
if [[ -z "$PROVIDER" ]]; then if [[ -z "$PROVIDER" ]]; then
echo "Error: --provider is required" echo "Error: --provider is required"
usage exit 1
fi
if [[ -z "$TEST_SUITE" && -z "$TEST_SUBDIRS" ]]; then
echo "Error: --test-suite or --test-subdirs is required"
exit 1 exit 1
fi fi
echo "=== Llama Stack Integration Test Runner ===" echo "=== Llama Stack Integration Test Runner ==="
echo "Stack Config: $STACK_CONFIG" echo "Stack Config: $STACK_CONFIG"
echo "Provider: $PROVIDER" echo "Provider: $PROVIDER"
echo "Test Subdirs: $TEST_SUBDIRS"
echo "Vision Tests: $RUN_VISION_TESTS"
echo "Inference Mode: $INFERENCE_MODE" echo "Inference Mode: $INFERENCE_MODE"
echo "Test Suite: $TEST_SUITE"
echo "Test Subdirs: $TEST_SUBDIRS"
echo "Test Pattern: $TEST_PATTERN" echo "Test Pattern: $TEST_PATTERN"
echo "" echo ""
@ -194,56 +197,12 @@ if [[ -n "$TEST_PATTERN" ]]; then
PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN" PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
fi fi
# Run vision tests if specified
if [[ "$RUN_VISION_TESTS" == "true" ]]; then
echo "Running vision tests..."
set +e
pytest -s -v tests/integration/inference/test_vision_inference.py \
--stack-config="$STACK_CONFIG" \
-k "$PYTEST_PATTERN" \
--vision-model=ollama/llama3.2-vision:11b \
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
--color=yes $EXTRA_PARAMS \
--capture=tee-sys
exit_code=$?
set -e
if [ $exit_code -eq 0 ]; then
echo "✅ Vision tests completed successfully"
elif [ $exit_code -eq 5 ]; then
echo "⚠️ No vision tests collected (pattern matched no tests)"
else
echo "❌ Vision tests failed"
exit 1
fi
exit 0
fi
# Run regular tests
if [[ -z "$TEST_SUBDIRS" ]]; then
TEST_SUBDIRS=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
sed 's|tests/integration/||' |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
sort)
fi
echo "Test subdirs to run: $TEST_SUBDIRS" echo "Test subdirs to run: $TEST_SUBDIRS"
if [[ -n "$TEST_SUBDIRS" ]]; then
# Collect all test files for the specified test types # Collect all test files for the specified test types
TEST_FILES="" TEST_FILES=""
for test_subdir in $(echo "$TEST_SUBDIRS" | tr ',' '\n'); do for test_subdir in $(echo "$TEST_SUBDIRS" | tr ',' '\n'); do
# Skip certain test types for vllm provider
if [[ "$PROVIDER" == "vllm" ]]; then
if [[ "$test_subdir" == "safety" ]] || [[ "$test_subdir" == "post_training" ]] || [[ "$test_subdir" == "tool_runtime" ]]; then
echo "Skipping $test_subdir for vllm provider"
continue
fi
fi
if [[ "$STACK_CONFIG" != *"server:"* ]] && [[ "$test_subdir" == "batches" ]]; then
echo "Skipping $test_subdir for library client until types are supported"
continue
fi
if [[ -d "tests/integration/$test_subdir" ]]; then if [[ -d "tests/integration/$test_subdir" ]]; then
# Find all Python test files in this directory # Find all Python test files in this directory
test_files=$(find tests/integration/$test_subdir -name "test_*.py" -o -name "*_test.py") test_files=$(find tests/integration/$test_subdir -name "test_*.py" -o -name "*_test.py")
@ -265,13 +224,19 @@ echo ""
echo "=== Running all collected tests in a single pytest command ===" echo "=== Running all collected tests in a single pytest command ==="
echo "Total test files: $(echo $TEST_FILES | wc -w)" echo "Total test files: $(echo $TEST_FILES | wc -w)"
PYTEST_TARGET="$TEST_FILES"
EXTRA_PARAMS="$EXTRA_PARAMS --text-model=$TEXT_MODEL --embedding-model=sentence-transformers/all-MiniLM-L6-v2"
else
PYTEST_TARGET="tests/integration/"
EXTRA_PARAMS="$EXTRA_PARAMS --suite=$TEST_SUITE"
fi
set +e set +e
pytest -s -v $TEST_FILES \ pytest -s -v $PYTEST_TARGET \
--stack-config="$STACK_CONFIG" \ --stack-config="$STACK_CONFIG" \
-k "$PYTEST_PATTERN" \ -k "$PYTEST_PATTERN" \
--text-model="$TEXT_MODEL" \ $EXTRA_PARAMS \
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \ --color=yes \
--color=yes $EXTRA_PARAMS \
--capture=tee-sys --capture=tee-sys
exit_code=$? exit_code=$?
set -e set -e
@ -294,7 +259,13 @@ df -h
# stop server # stop server
if [[ "$STACK_CONFIG" == *"server:"* ]]; then if [[ "$STACK_CONFIG" == *"server:"* ]]; then
echo "Stopping Llama Stack Server..." echo "Stopping Llama Stack Server..."
kill $(lsof -i :8321 | awk 'NR>1 {print $2}') pids=$(lsof -i :8321 | awk 'NR>1 {print $2}')
if [[ -n "$pids" ]]; then
echo "Killing Llama Stack Server processes: $pids"
kill -9 $pids
else
echo "No Llama Stack Server processes found ?!"
fi
echo "Llama Stack Server stopped" echo "Llama Stack Server stopped"
fi fi

View file

@ -77,7 +77,7 @@ You must be careful when re-recording. CI workflows assume a specific setup for
./scripts/github/schedule-record-workflow.sh --test-subdirs "agents,inference" ./scripts/github/schedule-record-workflow.sh --test-subdirs "agents,inference"
# Record with vision tests enabled # Record with vision tests enabled
./scripts/github/schedule-record-workflow.sh --test-subdirs "inference" --run-vision-tests ./scripts/github/schedule-record-workflow.sh --test-suite vision
# Record with specific provider # Record with specific provider
./scripts/github/schedule-record-workflow.sh --test-subdirs "agents" --test-provider vllm ./scripts/github/schedule-record-workflow.sh --test-subdirs "agents" --test-provider vllm

View file

@ -42,6 +42,27 @@ Model parameters can be influenced by the following options:
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
if no model is specified. if no model is specified.
### Suites (fast selection + sane defaults)
- `--suite`: comma-separated list of named suites that both narrow which tests are collected and prefill common model options (unless you pass them explicitly). This keeps runs fast and convenient.
- Available suites:
- `responses`: collects tests under `tests/integration/responses`; defaults `--text-model=ollama/llama3.2:3b-instruct-fp16`, `--embedding-model=sentence-transformers/all-MiniLM-L6-v2`.
- `vision`: collects only `tests/integration/inference/test_vision_inference.py`; defaults `--vision-model=ollama/llama3.2-vision:11b`, `--embedding-model=sentence-transformers/all-MiniLM-L6-v2`.
- Explicit flags always win. For example, `--suite=responses --text-model=<X>` overrides the suites text model.
Examples:
```bash
# Fast responses run with defaults
pytest -s -v tests/integration --stack-config=server:starter --suite=responses
# Fast single-file vision run with defaults
pytest -s -v tests/integration --stack-config=server:starter --suite=vision
# Combine suites and override a default
pytest -s -v tests/integration --stack-config=server:starter --suite=responses,vision --embedding-model=text-embedding-3-small
```
## Examples ## Examples
### Testing against a Server ### Testing against a Server

View file

@ -6,15 +6,17 @@
import inspect import inspect
import itertools import itertools
import os import os
import platform
import textwrap import textwrap
import time import time
from pathlib import Path
import pytest import pytest
from dotenv import load_dotenv from dotenv import load_dotenv
from llama_stack.log import get_logger from llama_stack.log import get_logger
from .suites import SUITE_DEFINITIONS
logger = get_logger(__name__, category="tests") logger = get_logger(__name__, category="tests")
@ -30,6 +32,8 @@ def pytest_runtest_makereport(item, call):
def pytest_sessionstart(session): def pytest_sessionstart(session):
# stop macOS from complaining about duplicate OpenMP libraries # stop macOS from complaining about duplicate OpenMP libraries
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ:
os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"
def pytest_runtest_teardown(item): def pytest_runtest_teardown(item):
@ -59,9 +63,22 @@ def pytest_configure(config):
key, value = env_var.split("=", 1) key, value = env_var.split("=", 1)
os.environ[key] = value os.environ[key] = value
if platform.system() == "Darwin": # Darwin is the system name for macOS suites_raw = config.getoption("--suite")
os.environ["DISABLE_CODE_SANDBOX"] = "1" suites: list[str] = []
logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS") if suites_raw:
suites = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
unknown = [p for p in suites if p not in SUITE_DEFINITIONS]
if unknown:
raise pytest.UsageError(
f"Unknown suite(s): {', '.join(unknown)}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}"
)
for suite in suites:
suite_def = SUITE_DEFINITIONS.get(suite, {})
defaults: dict = suite_def.get("defaults", {})
for dest, value in defaults.items():
current = getattr(config.option, dest, None)
if not current:
setattr(config.option, dest, value)
def pytest_addoption(parser): def pytest_addoption(parser):
@ -103,16 +120,21 @@ def pytest_addoption(parser):
default=384, default=384,
help="Output dimensionality of the embedding model to use for testing. Default: 384", help="Output dimensionality of the embedding model to use for testing. Default: 384",
) )
parser.addoption(
"--record-responses",
action="store_true",
help="Record new API responses instead of using cached ones.",
)
parser.addoption( parser.addoption(
"--report", "--report",
help="Path where the test report should be written, e.g. --report=/path/to/report.md", help="Path where the test report should be written, e.g. --report=/path/to/report.md",
) )
available_suites = ", ".join(sorted(SUITE_DEFINITIONS.keys()))
suite_help = (
"Comma-separated integration test suites to narrow collection and prefill defaults. "
"Available: "
f"{available_suites}. "
"Explicit CLI flags (e.g., --text-model) override suite defaults. "
"Examples: --suite=responses or --suite=responses,vision."
)
parser.addoption("--suite", help=suite_help)
MODEL_SHORT_IDS = { MODEL_SHORT_IDS = {
"meta-llama/Llama-3.2-3B-Instruct": "3B", "meta-llama/Llama-3.2-3B-Instruct": "3B",
@ -195,3 +217,40 @@ def pytest_generate_tests(metafunc):
pytest_plugins = ["tests.integration.fixtures.common"] pytest_plugins = ["tests.integration.fixtures.common"]
def pytest_ignore_collect(path: str, config: pytest.Config) -> bool:
"""Skip collecting paths outside the selected suite roots for speed."""
suites_raw = config.getoption("--suite")
if not suites_raw:
return False
names = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
roots: list[str] = []
for name in names:
suite_def = SUITE_DEFINITIONS.get(name)
if suite_def:
roots.extend(suite_def.get("roots", []))
if not roots:
return False
p = Path(str(path)).resolve()
# Only constrain within tests/integration to avoid ignoring unrelated tests
integration_root = (Path(str(config.rootpath)) / "tests" / "integration").resolve()
if not p.is_relative_to(integration_root):
return False
for r in roots:
rp = (Path(str(config.rootpath)) / r).resolve()
if rp.is_file():
# Allow the exact file and any ancestor directories so pytest can walk into it.
if p == rp:
return False
if p.is_dir() and rp.is_relative_to(p):
return False
else:
# Allow anything inside an allowed directory
if p.is_relative_to(rp):
return False
return True

View file

Before

Width:  |  Height:  |  Size: 108 KiB

After

Width:  |  Height:  |  Size: 108 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 148 KiB

After

Width:  |  Height:  |  Size: 148 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 139 KiB

After

Width:  |  Height:  |  Size: 139 KiB

Before After
Before After

View file

@ -0,0 +1,53 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
# Central definition of integration test suites. You can use these suites by passing --suite=name to pytest.
# For example:
#
# ```bash
# pytest tests/integration/ --suite=vision
# ```
#
# Each suite can:
# - restrict collection to specific roots (dirs or files)
# - provide default CLI option values (e.g. text_model, embedding_model, etc.)
from pathlib import Path
this_dir = Path(__file__).parent
default_roots = [
str(p)
for p in this_dir.glob("*")
if p.is_dir()
and p.name not in ("__pycache__", "fixtures", "test_cases", "recordings", "responses", "post_training")
]
SUITE_DEFINITIONS: dict[str, dict] = {
"base": {
"description": "Base suite that includes most tests but runs them with a text Ollama model",
"roots": default_roots,
"defaults": {
"text_model": "ollama/llama3.2:3b-instruct-fp16",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
},
"responses": {
"description": "Suite that includes only the OpenAI Responses tests; needs a strong tool-calling model",
"roots": ["tests/integration/responses"],
"defaults": {
"text_model": "openai/gpt-4o",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
},
"vision": {
"description": "Suite that includes only the vision tests",
"roots": ["tests/integration/inference/test_vision_inference.py"],
"defaults": {
"vision_model": "ollama/llama3.2-vision:11b",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
},
}