diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml index ac3edba78..c85b36e08 100644 --- a/.github/actions/run-and-record-tests/action.yml +++ b/.github/actions/run-and-record-tests/action.yml @@ -24,21 +24,103 @@ runs: steps: - name: Check Storage and Memory Available Before Tests if: ${{ always() }} + shell: bash run: | free -h df -h + - name: Run Llama Stack Server + if: ${{ contains(inputs.stack-config, 'server:') }} + shell: bash + run: | + # Run this so pytest in a loop doesn't start-stop servers in a loop + echo "Starting Llama Stack Server" + nohup uv run llama stack run ci-tests --image-type venv > server.log 2>&1 & + + echo "Waiting for Llama Stack Server to start" + for i in {1..30}; do + if curl -s http://localhost:8321/v1/health | grep -q "OK"; then + echo "Llama Stack Server started" + exit 0 + fi + sleep 1 + done + + echo "Llama Stack Server failed to start" + cat server.log + exit 1 + - name: Run Integration Tests - uses: ./.github/actions/run-integration-tests - with: - test-types: ${{ inputs.test-types }} - stack-config: ${{ inputs.stack-config }} - provider: ${{ inputs.provider }} - inference-mode: ${{ inputs.inference-mode }} - run-vision-tests: ${{ inputs.run-vision-tests }} + env: + LLAMA_STACK_CLIENT_TIMEOUT: "300" + shell: bash + run: | + stack_config="${{ inputs.stack-config }}" + EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" + + export LLAMA_STACK_TEST_INFERENCE_MODE="${{ inputs.inference-mode }}" + + # Configure provider-specific settings + if [ "${{ inputs.provider }}" == "ollama" ]; then + export OLLAMA_URL="http://0.0.0.0:11434" + export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16" + export SAFETY_MODEL="ollama/llama-guard3:1b" + EXTRA_PARAMS="--safety-shield=llama-guard" + else + export VLLM_URL="http://localhost:8000/v1" + export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct" + EXTRA_PARAMS="" + EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls" + fi + + if [ "${{ inputs.run-vision-tests }}" == "true" ]; then + export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings/vision" + if uv run pytest -s -v tests/integration/inference/test_vision_inference.py --stack-config=${stack_config} \ + -k "not( ${EXCLUDE_TESTS} )" \ + --vision-model=ollama/llama3.2-vision:11b \ + --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \ + --color=yes ${EXTRA_PARAMS} \ + --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-vision.log; then + echo "✅ Tests completed for vision" + else + echo "❌ Tests failed for vision" + exit 1 + fi + + exit 0 + fi + + # Run non-vision tests + export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings" + TEST_TYPES='${{ inputs.test-types }}' + echo "Test types to run: $TEST_TYPES" + + for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do + # if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime) + if [ "${{ inputs.provider }}" == "vllm" ]; then + if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then + continue + fi + fi + + echo "=== Running tests for: $test_type ===" + + if uv run pytest -s -v tests/integration/$test_type --stack-config=${stack_config} \ + -k "not( ${EXCLUDE_TESTS} )" \ + --text-model=$TEXT_MODEL \ + --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \ + --color=yes ${EXTRA_PARAMS} \ + --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-$test_type.log; then + echo "✅ Tests completed for $test_type" + else + echo "❌ Tests failed for $test_type" + exit 1 + fi + done - name: Check Storage and Memory Available After Tests if: ${{ always() }} + shell: bash run: | free -h df -h @@ -47,11 +129,18 @@ runs: if: ${{ inputs.inference-mode == 'record' }} shell: bash run: | + echo "Checking for recording changes" git status --porcelain tests/integration/recordings/ + if [[ -n $(git status --porcelain tests/integration/recordings/) ]]; then echo "New recordings detected, committing and pushing" git add tests/integration/recordings/ - git commit -m "Recordings update from CI" + + if [ "${{ inputs.run-vision-tests }}" == "true" ]; then + git commit -m "Recordings update from CI (vision)" + else + git commit -m "Recordings update from CI" + fi git fetch origin ${{ github.event.pull_request.head.ref }} git rebase origin/${{ github.event.pull_request.head.ref }} diff --git a/.github/actions/run-integration-tests/action.yml b/.github/actions/run-integration-tests/action.yml deleted file mode 100644 index 7d93d2a61..000000000 --- a/.github/actions/run-integration-tests/action.yml +++ /dev/null @@ -1,116 +0,0 @@ -name: 'Run Integration Tests' -description: 'Run integration tests with configurable execution mode and provider settings' - -inputs: - test-types: - description: 'Test types to run (JSON array)' - required: true - stack-config: - description: 'Stack configuration: "ci-tests" or "server:ci-tests"' - required: true - provider: - description: 'Provider to use: "ollama" or "vllm"' - required: true - inference-mode: - description: 'Inference mode: "record" or "replay"' - required: true - run-vision-tests: - description: 'Run vision tests: "true" or "false"' - required: false - default: 'false' - -outputs: - logs-path: - description: 'Path to generated log files' - value: '*.log' - -runs: - using: 'composite' - steps: - - name: Run Llama Stack Server - if: ${{ contains(inputs.stack-config, 'server:') }} - run: | - # Run this so pytest in a loop doesn't start-stop servers in a loop - echo "Starting Llama Stack Server" - nohup uv run llama stack run ci-tests --image-type venv > server.log 2>&1 & - - echo "Waiting for Llama Stack Server to start" - for i in {1..30}; do - if curl -s http://localhost:8321/v1/health | grep -q "OK"; then - echo "Llama Stack Server started" - exit 0 - fi - sleep 1 - done - - echo "Llama Stack Server failed to start" - cat server.log - exit 1 - - - name: Run Integration Tests - env: - LLAMA_STACK_CLIENT_TIMEOUT: "300" - shell: bash - run: | - stack_config="${{ inputs.stack-config }}" - EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" - - export LLAMA_STACK_TEST_INFERENCE_MODE="${{ inputs.inference-mode }}" - - # Configure provider-specific settings - if [ "${{ inputs.provider }}" == "ollama" ]; then - export OLLAMA_URL="http://0.0.0.0:11434" - export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16" - export SAFETY_MODEL="ollama/llama-guard3:1b" - EXTRA_PARAMS="--safety-shield=llama-guard" - else - export VLLM_URL="http://localhost:8000/v1" - export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct" - EXTRA_PARAMS="" - EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls" - fi - - if [ "${{ inputs.run-vision-tests }}" == "true" ]; then - export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings/vision" - if uv run pytest -s -v tests/integration/inference/test_vision_inference.py --stack-config=${stack_config} \ - -k "not( ${EXCLUDE_TESTS} )" \ - --vision-model=ollama/llama3.2-vision:11b \ - --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \ - --color=yes ${EXTRA_PARAMS} \ - --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-vision.log; then - echo "✅ Tests completed for vision" - else - echo "❌ Tests failed for vision" - exit 1 - fi - - exit 0 - fi - - # Run non-vision tests - export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings" - TEST_TYPES='${{ inputs.test-types }}' - echo "Test types to run: $TEST_TYPES" - - for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do - # if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime) - if [ "${{ inputs.provider }}" == "vllm" ]; then - if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then - continue - fi - fi - - echo "=== Running tests for: $test_type ===" - - if uv run pytest -s -v tests/integration/$test_type --stack-config=${stack_config} \ - -k "not( ${EXCLUDE_TESTS} )" \ - --text-model=$TEXT_MODEL \ - --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \ - --color=yes ${EXTRA_PARAMS} \ - --capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-$test_type.log; then - echo "✅ Tests completed for $test_type" - else - echo "❌ Tests failed for $test_type" - exit 1 - fi - done diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index cf8a09b5f..b893ce2ca 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -17,7 +17,6 @@ on: - '.github/actions/setup-ollama/action.yml' - '.github/actions/setup-test-environment/action.yml' - '.github/actions/run-and-record-tests/action.yml' - - '.github/actions/run-integration-tests/action.yml' schedule: # If changing the cron schedule, update the provider in the test-matrix job - cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC diff --git a/.github/workflows/integration-vision-tests.yml b/.github/workflows/integration-vision-tests.yml index c87b25e9f..1c07d034c 100644 --- a/.github/workflows/integration-vision-tests.yml +++ b/.github/workflows/integration-vision-tests.yml @@ -17,7 +17,6 @@ on: - '.github/actions/setup-ollama/action.yml' - '.github/actions/setup-test-environment/action.yml' - '.github/actions/run-and-record-tests/action.yml' - - '.github/actions/run-integration-tests/action.yml' workflow_dispatch: inputs: test-all-client-versions: