feat(tests): migrate to global "setups" system for test configuration (#3390)

This PR refactors the integration test system to use global "setups" which provides better separation of concerns: **suites = what to test, setups = how to configure.** NOTE: if you naming suggestions, please provide feedback Changes: - New `tests/integration/setups.py` with global, reusable configurations (ollama, vllm, gpt, claude) - Modified `scripts/integration-tests.sh` options to match with the underlying pytest options - Updated documentation to reflect the new global setup system The main benefit is that setups can be reused across multiple suites (e.g., use "gpt" with any suite) even though sometimes they could specifically tailored for a suite (vision <> ollama-vision). It is now easier to add new configurations without modifying existing suites. Usage examples: - `pytest tests/integration --suite=responses --setup=gpt` - `pytest tests/integration --suite=vision` # auto-selects "ollama-vision" setup - `pytest tests/integration --suite=base --setup=vllm`
2025-12-04 10:10:36 +00:00 · 2025-09-09 15:50:56 -07:00 · 2025-09-09 15:50:56 -07:00 · a8aa815b6a
commit a8aa815b6a
parent 28696c3f30
11 changed files with 385 additions and 206 deletions
--- a/scripts/get_setup_env.py
+++ b/scripts/get_setup_env.py
@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Small helper script to extract environment variables from a test setup.
+Used by integration-tests.sh to set environment variables before starting the server.
+"""
+
+import argparse
+import sys
+
+from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
+
+
+def get_setup_env_vars(setup_name, suite_name=None):
+    """
+    Get environment variables for a setup, with optional suite default fallback.
+
+    Args:
+        setup_name: Name of the setup (e.g., 'ollama', 'gpt')
+        suite_name: Optional suite name to get default setup if setup_name is None
+
+    Returns:
+        Dictionary of environment variables
+    """
+    # If no setup specified, try to get default from suite
+    if not setup_name and suite_name:
+        suite = SUITE_DEFINITIONS.get(suite_name)
+        if suite and suite.default_setup:
+            setup_name = suite.default_setup
+
+    if not setup_name:
+        return {}
+
+    setup = SETUP_DEFINITIONS.get(setup_name)
+    if not setup:
+        print(
+            f"Error: Unknown setup '{setup_name}'. Available: {', '.join(sorted(SETUP_DEFINITIONS.keys()))}",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    return setup.env
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Extract environment variables from a test setup")
+    parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)")
+    parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided")
+    parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)")
+
+    args = parser.parse_args()
+
+    env_vars = get_setup_env_vars(args.setup, args.suite)
+
+    if args.format == "bash":
+        # Output as bash export statements
+        for key, value in env_vars.items():
+            print(f"export {key}='{value}'")
+    elif args.format == "json":
+        import json
+
+        print(json.dumps(env_vars))
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/github/schedule-record-workflow.sh
+++ b/scripts/github/schedule-record-workflow.sh
@ -14,7 +14,7 @@ set -euo pipefail
 # Default values
 BRANCH=""
 TEST_SUBDIRS=""
-TEST_PROVIDER="ollama"
+TEST_SETUP="ollama"
 TEST_SUITE="base"
 TEST_PATTERN=""

@ -27,24 +27,24 @@ Trigger the integration test recording workflow remotely. This way you do not ne

 OPTIONS:
    -b, --branch BRANCH         Branch to run the workflow on (defaults to current branch)
-    -p, --test-provider PROVIDER Test provider to use: vllm or ollama (default: ollama)
-    -t, --test-suite SUITE      Test suite to use: base, responses, vision, etc. (default: base)
-    -s, --test-subdirs DIRS     Comma-separated list of test subdirectories to run (overrides suite)
-    -k, --test-pattern PATTERN  Regex pattern to pass to pytest -k
+    -t, --suite SUITE           Test suite to use: base, responses, vision, etc. (default: base)
+    -p, --setup SETUP           Test setup to use: vllm, ollama, gpt, etc. (default: ollama)
+    -s, --subdirs DIRS          Comma-separated list of test subdirectories to run (overrides suite)
+    -k, --pattern PATTERN       Regex pattern to pass to pytest -k
    -h, --help                  Show this help message

 EXAMPLES:
    # Record tests for current branch with agents subdirectory
-    $0 --test-subdirs "agents"
+    $0 --subdirs "agents"

    # Record tests for specific branch with vision tests
-    $0 -b my-feature-branch --test-suite vision
+    $0 -b my-feature-branch --suite vision

-    # Record multiple test subdirectories with specific provider
-    $0 --test-subdirs "agents,inference" --test-provider vllm
+    # Record multiple test subdirectories with specific setup
+    $0 --subdirs "agents,inference" --setup vllm

    # Record tests matching a specific pattern
-    $0 --test-subdirs "inference" --test-pattern "test_streaming"
+    $0 --subdirs "inference" --pattern "test_streaming"

 EOF
 }
@ -63,19 +63,19 @@ while [[ $# -gt 0 ]]; do
            BRANCH="$2"
            shift 2
            ;;
-        -s|--test-subdirs)
+        -s|--subdirs)
            TEST_SUBDIRS="$2"
            shift 2
            ;;
-        -p|--test-provider)
-            TEST_PROVIDER="$2"
+        -p|--setup)
+            TEST_SETUP="$2"
            shift 2
            ;;
-        -t|--test-suite)
+        -t|--suite)
            TEST_SUITE="$2"
            shift 2
            ;;
-        -k|--test-pattern)
+        -k|--pattern)
            TEST_PATTERN="$2"
            shift 2
            ;;
@ -93,21 +93,16 @@ done

 # Validate required parameters
 if [[ -z "$TEST_SUBDIRS" && -z "$TEST_SUITE" ]]; then
-    echo "Error: --test-subdirs or --test-suite is required"
+    echo "Error: --subdirs or --suite is required"
    echo "Please specify which test subdirectories to run or test suite to use, e.g.:"
-    echo "  $0 --test-subdirs \"agents,inference\""
-    echo "  $0 --test-suite vision"
+    echo "  $0 --subdirs \"agents,inference\""
+    echo "  $0 --suite vision"
    echo ""
    exit 1
 fi

-# Validate test provider
-if [[ "$TEST_PROVIDER" != "vllm" && "$TEST_PROVIDER" != "ollama" ]]; then
-    echo "❌ Error: Invalid test provider '$TEST_PROVIDER'"
-    echo "   Supported providers: vllm, ollama"
-    echo "   Example: $0 --test-subdirs \"agents\" --test-provider vllm"
-    exit 1
-fi
+# Validate test setup (optional - setups are validated by the workflow itself)
+# Common setups: ollama, vllm, gpt, etc.

 # Check if required tools are installed
 if ! command -v gh &> /dev/null; then
@ -237,7 +232,7 @@ fi
 # Build the workflow dispatch command
 echo "Triggering integration test recording workflow..."
 echo "Branch: $BRANCH"
-echo "Test provider: $TEST_PROVIDER"
+echo "Test setup: $TEST_SETUP"
 echo "Test subdirs: $TEST_SUBDIRS"
 echo "Test suite: $TEST_SUITE"
 echo "Test pattern: ${TEST_PATTERN:-"(none)"}"
@ -245,16 +240,16 @@ echo ""

 # Prepare inputs for gh workflow run
 if [[ -n "$TEST_SUBDIRS" ]]; then
-    INPUTS="-f test-subdirs='$TEST_SUBDIRS'"
+    INPUTS="-f subdirs='$TEST_SUBDIRS'"
 fi
-if [[ -n "$TEST_PROVIDER" ]]; then
-    INPUTS="$INPUTS -f test-provider='$TEST_PROVIDER'"
+if [[ -n "$TEST_SETUP" ]]; then
+    INPUTS="$INPUTS -f test-setup='$TEST_SETUP'"
 fi
 if [[ -n "$TEST_SUITE" ]]; then
-    INPUTS="$INPUTS -f test-suite='$TEST_SUITE'"
+    INPUTS="$INPUTS -f suite='$TEST_SUITE'"
 fi
 if [[ -n "$TEST_PATTERN" ]]; then
-    INPUTS="$INPUTS -f test-pattern='$TEST_PATTERN'"
+    INPUTS="$INPUTS -f pattern='$TEST_PATTERN'"
 fi

 # Run the workflow
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -13,10 +13,10 @@ set -euo pipefail

 # Default values
 STACK_CONFIG=""
-PROVIDER=""
+TEST_SUITE="base"
+TEST_SETUP=""
 TEST_SUBDIRS=""
 TEST_PATTERN=""
-TEST_SUITE="base"
 INFERENCE_MODE="replay"
 EXTRA_PARAMS=""

@ -27,29 +27,30 @@ Usage: $0 [OPTIONS]

 Options:
    --stack-config STRING    Stack configuration to use (required)
-    --provider STRING        Provider to use (ollama, vllm, etc.) (required)
-    --test-suite STRING      Comma-separated list of test suites to run (default: 'base')
+    --suite STRING           Test suite to run (default: 'base')
+    --setup STRING           Test setup (models, env) to use (e.g., 'ollama', 'ollama-vision', 'gpt', 'vllm')
    --inference-mode STRING  Inference mode: record or replay (default: replay)
-    --test-subdirs STRING    Comma-separated list of test subdirectories to run (overrides suite)
-    --test-pattern STRING    Regex pattern to pass to pytest -k
+    --subdirs STRING         Comma-separated list of test subdirectories to run (overrides suite)
+    --pattern STRING         Regex pattern to pass to pytest -k
    --help                   Show this help message

-Suites are defined in tests/integration/suites.py. They are used to narrow the collection of tests and provide default model options.
+Suites are defined in tests/integration/suites.py and define which tests to run.
+Setups are defined in tests/integration/setups.py and provide global configuration (models, env).

 You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.

 Examples:
    # Basic inference tests with ollama
-    $0 --stack-config server:ci-tests --provider ollama
+    $0 --stack-config server:ci-tests --suite base --setup ollama

    # Multiple test directories with vllm
-    $0 --stack-config server:ci-tests --provider vllm --test-subdirs 'inference,agents'
+    $0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm

    # Vision tests with ollama
-    $0 --stack-config server:ci-tests --provider ollama --test-suite vision
+    $0 --stack-config server:ci-tests --suite vision  # default setup for this suite is ollama-vision

    # Record mode for updating test recordings
-    $0 --stack-config server:ci-tests --provider ollama --inference-mode record
+    $0 --stack-config server:ci-tests --suite base --inference-mode record
 EOF
 }

@ -60,15 +61,15 @@ while [[ $# -gt 0 ]]; do
            STACK_CONFIG="$2"
            shift 2
            ;;
-        --provider)
-            PROVIDER="$2"
+        --setup)
+            TEST_SETUP="$2"
            shift 2
            ;;
-        --test-subdirs)
+        --subdirs)
            TEST_SUBDIRS="$2"
            shift 2
            ;;
-        --test-suite)
+        --suite)
            TEST_SUITE="$2"
            shift 2
            ;;
@ -76,7 +77,7 @@ while [[ $# -gt 0 ]]; do
            INFERENCE_MODE="$2"
            shift 2
            ;;
-        --test-pattern)
+        --pattern)
            TEST_PATTERN="$2"
            shift 2
            ;;
@ -96,11 +97,13 @@ done
 # Validate required parameters
 if [[ -z "$STACK_CONFIG" ]]; then
    echo "Error: --stack-config is required"
+    usage
    exit 1
 fi

-if [[ -z "$PROVIDER" ]]; then
-    echo "Error: --provider is required"
+if [[ -z "$TEST_SETUP" && -n "$TEST_SUBDIRS" ]]; then
+    echo "Error: --test-setup is required when --test-subdirs is provided"
+    usage
    exit 1
 fi

@ -111,7 +114,7 @@ fi

 echo "=== Llama Stack Integration Test Runner ==="
 echo "Stack Config: $STACK_CONFIG"
-echo "Provider: $PROVIDER"
+echo "Setup: $TEST_SETUP"
 echo "Inference Mode: $INFERENCE_MODE"
 echo "Test Suite: $TEST_SUITE"
 echo "Test Subdirs: $TEST_SUBDIRS"
@ -129,21 +132,25 @@ echo ""

 # Set environment variables
 export LLAMA_STACK_CLIENT_TIMEOUT=300
-export LLAMA_STACK_TEST_INFERENCE_MODE="$INFERENCE_MODE"
-
-# Configure provider-specific settings
-if [[ "$PROVIDER" == "ollama" ]]; then
-    export OLLAMA_URL="http://0.0.0.0:11434"
-    export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16"
-    export SAFETY_MODEL="ollama/llama-guard3:1b"
-    EXTRA_PARAMS="--safety-shield=llama-guard"
-else
-    export VLLM_URL="http://localhost:8000/v1"
-    export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct"
-    EXTRA_PARAMS=""
-fi

 THIS_DIR=$(dirname "$0")
+
+if [[ -n "$TEST_SETUP" ]]; then
+    EXTRA_PARAMS="--setup=$TEST_SETUP"
+fi
+
+# Apply setup-specific environment variables (needed for server startup and tests)
+echo "=== Applying Setup Environment Variables ==="
+
+# the server needs this
+export LLAMA_STACK_TEST_INFERENCE_MODE="$INFERENCE_MODE"
+
+SETUP_ENV=$(PYTHONPATH=$THIS_DIR/.. python "$THIS_DIR/get_setup_env.py" --suite "$TEST_SUITE" --setup "$TEST_SETUP" --format bash)
+echo "Setting up environment variables:"
+echo "$SETUP_ENV"
+eval "$SETUP_ENV"
+echo ""
+
 ROOT_DIR="$THIS_DIR/.."
 cd $ROOT_DIR

@ -162,6 +169,18 @@ fi

 # Start Llama Stack Server if needed
 if [[ "$STACK_CONFIG" == *"server:"* ]]; then
+    stop_server() {
+        echo "Stopping Llama Stack Server..."
+        pids=$(lsof -i :8321 | awk 'NR>1 {print $2}')
+        if [[ -n "$pids" ]]; then
+            echo "Killing Llama Stack Server processes: $pids"
+            kill -9 $pids
+        else
+            echo "No Llama Stack Server processes found ?!"
+        fi
+        echo "Llama Stack Server stopped"
+    }
+
    # check if server is already running
    if curl -s http://localhost:8321/v1/health 2>/dev/null | grep -q "OK"; then
        echo "Llama Stack Server is already running, skipping start"
@ -185,14 +204,16 @@ if [[ "$STACK_CONFIG" == *"server:"* ]]; then
        done
        echo ""
    fi
+
+    trap stop_server EXIT ERR INT TERM
 fi

 # Run tests
 echo "=== Running Integration Tests ==="
 EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"

-# Additional exclusions for vllm provider
-if [[ "$PROVIDER" == "vllm" ]]; then
+# Additional exclusions for vllm setup
+if [[ "$TEST_SETUP" == "vllm" ]]; then
    EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
 fi

@ -229,20 +250,22 @@ if [[ -n "$TEST_SUBDIRS" ]]; then
    echo "Total test files: $(echo $TEST_FILES | wc -w)"

    PYTEST_TARGET="$TEST_FILES"
-    EXTRA_PARAMS="$EXTRA_PARAMS --text-model=$TEXT_MODEL --embedding-model=sentence-transformers/all-MiniLM-L6-v2"
 else
    PYTEST_TARGET="tests/integration/"
    EXTRA_PARAMS="$EXTRA_PARAMS --suite=$TEST_SUITE"
 fi

 set +e
+set -x
 pytest -s -v $PYTEST_TARGET \
    --stack-config="$STACK_CONFIG" \
+    --inference-mode="$INFERENCE_MODE" \
    -k "$PYTEST_PATTERN" \
    $EXTRA_PARAMS \
    --color=yes \
    --capture=tee-sys
 exit_code=$?
+set +x
 set -e

 if [ $exit_code -eq 0 ]; then
@ -260,18 +283,5 @@ echo "=== System Resources After Tests ==="
 free -h 2>/dev/null || echo "free command not available"
 df -h

-# stop server
-if [[ "$STACK_CONFIG" == *"server:"* ]]; then
-    echo "Stopping Llama Stack Server..."
-    pids=$(lsof -i :8321 | awk 'NR>1 {print $2}')
-    if [[ -n "$pids" ]]; then
-        echo "Killing Llama Stack Server processes: $pids"
-        kill -9 $pids
-    else
-        echo "No Llama Stack Server processes found ?!"
-    fi
-    echo "Llama Stack Server stopped"
-fi
-
 echo ""
 echo "=== Integration Tests Complete ==="