From f2eee4e417cb54e805865934ef75729b06bed538 Mon Sep 17 00:00:00 2001 From: ehhuang Date: Fri, 1 Aug 2025 17:38:49 -0700 Subject: [PATCH] chore: create integration-tests script (#3016) --- .../actions/run-and-record-tests/action.yml | 128 +--------- scripts/integration-tests.sh | 240 ++++++++++++++++++ 2 files changed, 246 insertions(+), 122 deletions(-) create mode 100755 scripts/integration-tests.sh diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml index a6acc5ce6..573148e46 100644 --- a/.github/actions/run-and-record-tests/action.yml +++ b/.github/actions/run-and-record-tests/action.yml @@ -29,132 +29,16 @@ runs: free -h df -h - - name: Set environment variables - shell: bash - run: | - echo "LLAMA_STACK_CLIENT_TIMEOUT=300" >> $GITHUB_ENV - echo "LLAMA_STACK_TEST_INFERENCE_MODE=${{ inputs.inference-mode }}" >> $GITHUB_ENV - - # Configure provider-specific settings - if [ "${{ inputs.provider }}" == "ollama" ]; then - echo "OLLAMA_URL=http://0.0.0.0:11434" >> $GITHUB_ENV - echo "TEXT_MODEL=ollama/llama3.2:3b-instruct-fp16" >> $GITHUB_ENV - echo "SAFETY_MODEL=ollama/llama-guard3:1b" >> $GITHUB_ENV - else - echo "VLLM_URL=http://localhost:8000/v1" >> $GITHUB_ENV - echo "TEXT_MODEL=vllm/meta-llama/Llama-3.2-1B-Instruct" >> $GITHUB_ENV - fi - - if [ "${{ inputs.run-vision-tests }}" == "true" ]; then - echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings/vision" >> $GITHUB_ENV - else - echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings" >> $GITHUB_ENV - fi - - - name: Run Llama Stack Server - if: ${{ contains(inputs.stack-config, 'server:') }} - shell: bash - run: | - # Run this so pytest in a loop doesn't start-stop servers in a loop - echo "Starting Llama Stack Server" - nohup uv run llama stack run ci-tests --image-type venv > server.log 2>&1 & - - echo "Waiting for Llama Stack Server to start" - for i in {1..30}; do - if curl -s http://localhost:8321/v1/health | grep -q "OK"; then - echo "Llama Stack Server started" - exit 0 - fi - sleep 1 - done - - echo "Llama Stack Server failed to start" - cat server.log - exit 1 - - name: Run Integration Tests shell: bash run: | - stack_config="${{ inputs.stack-config }}" - EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" + ./scripts/integration-tests.sh \ + --stack-config '${{ inputs.stack-config }}' \ + --provider '${{ inputs.provider }}' \ + --test-types '${{ inputs.test-types }}' \ + --inference-mode '${{ inputs.inference-mode }}' \ + ${{ inputs.run-vision-tests == 'true' && '--run-vision-tests' || '' }} - # Configure provider-specific settings - if [ "${{ inputs.provider }}" == "ollama" ]; then - EXTRA_PARAMS="--safety-shield=llama-guard" - else - EXTRA_PARAMS="" - EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls" - fi - - if [ "${{ inputs.run-vision-tests }}" == "true" ]; then - if uv run pytest -s -v tests/integration/inference/test_vision_inference.py --stack-config=${stack_config} \ - -k "not( ${EXCLUDE_TESTS} )" \ - --vision-model=ollama/llama3.2-vision:11b \ - --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \ - --color=yes ${EXTRA_PARAMS} \ - --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-vision.log; then - echo "✅ Tests completed for vision" - else - echo "❌ Tests failed for vision" - exit 1 - fi - - exit 0 - fi - - # Run non-vision tests - TEST_TYPES='${{ inputs.test-types }}' - echo "Test types to run: $TEST_TYPES" - - # Collect all test files for the specified test types - TEST_FILES="" - for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do - # if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime) - if [ "${{ inputs.provider }}" == "vllm" ]; then - if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then - echo "Skipping $test_type for vllm provider" - continue - fi - fi - - if [ -d "tests/integration/$test_type" ]; then - # Find all Python test files in this directory - test_files=$(find tests/integration/$test_type -name "test_*.py" -o -name "*_test.py") - if [ -n "$test_files" ]; then - TEST_FILES="$TEST_FILES $test_files" - echo "Added test files from $test_type: $(echo $test_files | wc -w) files" - fi - else - echo "Warning: Directory tests/integration/$test_type does not exist" - fi - done - - if [ -z "$TEST_FILES" ]; then - echo "No test files found for the specified test types" - exit 1 - fi - - echo "=== Running all collected tests in a single pytest command ===" - echo "Total test files: $(echo $TEST_FILES | wc -w)" - - if uv run pytest -s -v $TEST_FILES --stack-config=${stack_config} \ - -k "not( ${EXCLUDE_TESTS} )" \ - --text-model=$TEXT_MODEL \ - --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \ - --color=yes ${EXTRA_PARAMS} \ - --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-all.log; then - echo "✅ All tests completed successfully" - else - echo "❌ Tests failed" - exit 1 - fi - - - name: Check Storage and Memory Available After Tests - if: ${{ always() }} - shell: bash - run: | - free -h - df -h - name: Commit and push recordings if: ${{ inputs.inference-mode == 'record' }} diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh new file mode 100755 index 000000000..08d16db51 --- /dev/null +++ b/scripts/integration-tests.sh @@ -0,0 +1,240 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +set -e + +# Integration test runner script for Llama Stack +# This script extracts the integration test logic from GitHub Actions +# to allow developers to run integration tests locally + +# Default values +STACK_CONFIG="" +PROVIDER="" +TEST_TYPES='["inference"]' +RUN_VISION_TESTS="false" +INFERENCE_MODE="replay" +EXTRA_PARAMS="" + +# Function to display usage +usage() { + cat << EOF +Usage: $0 [OPTIONS] + +Options: + --stack-config STRING Stack configuration to use (required) + --provider STRING Provider to use (ollama, vllm, etc.) (required) + --test-types JSON JSON array of test types to run (default: '["inference"]') + --run-vision-tests Run vision tests instead of regular tests + --inference-mode STRING Inference mode: record or replay (default: replay) + --help Show this help message + +Examples: + # Basic inference tests with ollama + $0 --stack-config server:ollama --provider ollama + + # Multiple test types with vllm + $0 --stack-config server:vllm --provider vllm --test-types '["inference", "agents"]' + + # Vision tests with ollama + $0 --stack-config server:ollama --provider ollama --run-vision-tests + + # Record mode for updating test recordings + $0 --stack-config server:ollama --provider ollama --inference-mode record +EOF +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + --stack-config) + STACK_CONFIG="$2" + shift 2 + ;; + --provider) + PROVIDER="$2" + shift 2 + ;; + --test-types) + TEST_TYPES="$2" + shift 2 + ;; + --run-vision-tests) + RUN_VISION_TESTS="true" + shift + ;; + --inference-mode) + INFERENCE_MODE="$2" + shift 2 + ;; + --help) + usage + exit 0 + ;; + *) + echo "Unknown option: $1" + usage + exit 1 + ;; + esac +done + + +# Validate required parameters +if [[ -z "$STACK_CONFIG" ]]; then + echo "Error: --stack-config is required" + usage + exit 1 +fi + +if [[ -z "$PROVIDER" ]]; then + echo "Error: --provider is required" + usage + exit 1 +fi + +echo "=== Llama Stack Integration Test Runner ===" +echo "Stack Config: $STACK_CONFIG" +echo "Provider: $PROVIDER" +echo "Test Types: $TEST_TYPES" +echo "Vision Tests: $RUN_VISION_TESTS" +echo "Inference Mode: $INFERENCE_MODE" +echo "" + +# Check storage and memory before tests +echo "=== System Resources Before Tests ===" +free -h 2>/dev/null || echo "free command not available" +df -h +echo "" + +# Set environment variables +export LLAMA_STACK_CLIENT_TIMEOUT=300 +export LLAMA_STACK_TEST_INFERENCE_MODE="$INFERENCE_MODE" + +# Configure provider-specific settings +if [[ "$PROVIDER" == "ollama" ]]; then + export OLLAMA_URL="http://0.0.0.0:11434" + export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16" + export SAFETY_MODEL="ollama/llama-guard3:1b" + EXTRA_PARAMS="--safety-shield=llama-guard" +else + export VLLM_URL="http://localhost:8000/v1" + export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct" + EXTRA_PARAMS="" +fi + +# Set recording directory +if [[ "$RUN_VISION_TESTS" == "true" ]]; then + export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings/vision" +else + export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings" +fi + +# Start Llama Stack Server if needed +if [[ "$STACK_CONFIG" == *"server:"* ]]; then + echo "=== Starting Llama Stack Server ===" + nohup uv run llama stack run ci-tests --image-type venv > server.log 2>&1 & + + echo "Waiting for Llama Stack Server to start..." + for i in {1..30}; do + if curl -s http://localhost:8321/v1/health 2>/dev/null | grep -q "OK"; then + echo "✅ Llama Stack Server started successfully" + break + fi + if [[ $i -eq 30 ]]; then + echo "❌ Llama Stack Server failed to start" + echo "Server logs:" + cat server.log + exit 1 + fi + sleep 1 + done + echo "" +fi + +# Run tests +echo "=== Running Integration Tests ===" +EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" + +# Additional exclusions for vllm provider +if [[ "$PROVIDER" == "vllm" ]]; then + EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls" +fi + +# Run vision tests if specified +if [[ "$RUN_VISION_TESTS" == "true" ]]; then + echo "Running vision tests..." + if uv run pytest -s -v tests/integration/inference/test_vision_inference.py \ + --stack-config="$STACK_CONFIG" \ + -k "not( $EXCLUDE_TESTS )" \ + --vision-model=ollama/llama3.2-vision:11b \ + --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \ + --color=yes $EXTRA_PARAMS \ + --capture=tee-sys | tee pytest-${INFERENCE_MODE}-vision.log; then + echo "✅ Vision tests completed successfully" + else + echo "❌ Vision tests failed" + exit 1 + fi + exit 0 +fi + +# Run regular tests +echo "Test types to run: $TEST_TYPES" + +# Collect all test files for the specified test types +TEST_FILES="" +for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do + # Skip certain test types for vllm provider + if [[ "$PROVIDER" == "vllm" ]]; then + if [[ "$test_type" == "safety" ]] || [[ "$test_type" == "post_training" ]] || [[ "$test_type" == "tool_runtime" ]]; then + echo "Skipping $test_type for vllm provider" + continue + fi + fi + + if [[ -d "tests/integration/$test_type" ]]; then + # Find all Python test files in this directory + test_files=$(find tests/integration/$test_type -name "test_*.py" -o -name "*_test.py") + if [[ -n "$test_files" ]]; then + TEST_FILES="$TEST_FILES $test_files" + echo "Added test files from $test_type: $(echo $test_files | wc -w) files" + fi + else + echo "Warning: Directory tests/integration/$test_type does not exist" + fi +done + +if [[ -z "$TEST_FILES" ]]; then + echo "No test files found for the specified test types" + exit 1 +fi + +echo "" +echo "=== Running all collected tests in a single pytest command ===" +echo "Total test files: $(echo $TEST_FILES | wc -w)" + +if uv run pytest -s -v $TEST_FILES \ + --stack-config="$STACK_CONFIG" \ + -k "not( $EXCLUDE_TESTS )" \ + --text-model="$TEXT_MODEL" \ + --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \ + --color=yes $EXTRA_PARAMS \ + --capture=tee-sys | tee pytest-${INFERENCE_MODE}-all.log; then + echo "✅ All tests completed successfully" +else + echo "❌ Tests failed" + exit 1 +fi + +# Check storage and memory after tests +echo "" +echo "=== System Resources After Tests ===" +free -h 2>/dev/null || echo "free command not available" +df -h + +echo "" +echo "=== Integration Tests Complete ==="