ci: run all APIs integration tests (#2646)

# What does this PR do? We are now automatically building the list of integration test to run. In that process, eval and files and being tested now. This is pending https://github.com/meta-llama/llama-stack/pull/2628 Signed-off-by: Sébastien Han <seb@redhat.com>
2025-07-14 00:56:09 +00:00 · 2025-07-10 15:16:08 +02:00 · 2025-07-10 15:16:08 +02:00 · 01c222e12f
commit 01c222e12f
parent 81109a0f72
3 changed files with 26 additions and 16 deletions
--- a/.github/actions/setup-ollama/action.yml
+++ b/.github/actions/setup-ollama/action.yml
@ -8,4 +8,6 @@ runs:
      run: |
        docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
        # TODO: rebuild an ollama image with llama-guard3:1b
+        echo "Verifying Ollama status..."
+        timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'
        docker exec ollama ollama pull llama-guard3:1b
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -18,16 +18,33 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  test-matrix:
+  discover-tests:
    runs-on: ubuntu-latest
+    outputs:
+      test-type: ${{ steps.generate-matrix.outputs.test-type }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Generate test matrix
+        id: generate-matrix
+        run: |
+          # Get test directories dynamically, excluding non-test directories
+          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
+            grep -Ev "^(__pycache__|fixtures|test_cases)$" |
+            sort | jq -R -s -c 'split("\n")[:-1]')
+          echo "test-type=$TEST_TYPES" >> $GITHUB_OUTPUT
+
+  test-matrix:
+    needs: discover-tests
+    runs-on: ubuntu-latest
+
    strategy:
+      fail-fast: false
      matrix:
-        # Listing tests manually since some of them currently fail
-        # TODO: generate matrix list from tests/integration when fixed
-        test-type: [agents, inference, datasets, inspect, safety, scoring, post_training, providers, tool_runtime, vector_io]
+        test-type: ${{ fromJson(needs.discover-tests.outputs.test-type) }}
        client-type: [library, server]
        python-version: ["3.12", "3.13"]
-      fail-fast: false # we want to run all tests regardless of failure

    steps:
      - name: Checkout repository
@ -51,23 +68,13 @@ jobs:
          free -h
          df -h

-      - name: Verify Ollama status is OK
-        if: matrix.client-type == 'http'
-        run: |
-          echo "Verifying Ollama status..."
-          ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
-          echo "Ollama status: $ollama_status"
-          if [ "$ollama_status" != "OK" ]; then
-            echo "Ollama health check failed"
-            exit 1
-          fi
-
      - name: Run Integration Tests
        env:
          OLLAMA_INFERENCE_MODEL: "llama3.2:3b-instruct-fp16" # for server tests
          ENABLE_OLLAMA: "ollama" # for server tests
          OLLAMA_URL: "http://0.0.0.0:11434"
          SAFETY_MODEL: "llama-guard3:1b"
+          LLAMA_STACK_CLIENT_TIMEOUT: "300" # Increased timeout for eval operations
        # Use 'shell' to get pipefail behavior
        # https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
        # TODO: write a precommit hook to detect if a test contains a pipe but does not use 'shell: bash'
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -218,6 +218,7 @@ def llama_stack_client(request, provider_data):
        return LlamaStackClient(
            base_url=base_url,
            provider_data=provider_data,
+            timeout=int(os.environ.get("LLAMA_STACK_CLIENT_TIMEOUT", "30")),
        )

    # check if this looks like a URL using proper URL parsing