From 01c222e12f8b5e6c1cf8c2661bfe69e5680415c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 10 Jul 2025 15:16:08 +0200
Subject: [PATCH] ci: run all APIs integration tests (#2646)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

We are now automatically building the list of integration test to run.
In that process, eval and files and being tested now.

This is pending https://github.com/meta-llama/llama-stack/pull/2628

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 .github/actions/setup-ollama/action.yml |  2 ++
 .github/workflows/integration-tests.yml | 39 +++++++++++++++----------
 tests/integration/fixtures/common.py    |  1 +
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/.github/actions/setup-ollama/action.yml b/.github/actions/setup-ollama/action.yml
index da24839c2..37a369a9a 100644
--- a/.github/actions/setup-ollama/action.yml
+++ b/.github/actions/setup-ollama/action.yml
@@ -8,4 +8,6 @@ runs:
       run: |
         docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
         # TODO: rebuild an ollama image with llama-guard3:1b
+        echo "Verifying Ollama status..."
+        timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'
         docker exec ollama ollama pull llama-guard3:1b
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index b102191f2..c46100c38 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -18,16 +18,33 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  test-matrix:
+  discover-tests:
     runs-on: ubuntu-latest
+    outputs:
+      test-type: ${{ steps.generate-matrix.outputs.test-type }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Generate test matrix
+        id: generate-matrix
+        run: |
+          # Get test directories dynamically, excluding non-test directories
+          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
+            grep -Ev "^(__pycache__|fixtures|test_cases)$" |
+            sort | jq -R -s -c 'split("\n")[:-1]')
+          echo "test-type=$TEST_TYPES" >> $GITHUB_OUTPUT
+
+  test-matrix:
+    needs: discover-tests
+    runs-on: ubuntu-latest
+
     strategy:
+      fail-fast: false
       matrix:
-        # Listing tests manually since some of them currently fail
-        # TODO: generate matrix list from tests/integration when fixed
-        test-type: [agents, inference, datasets, inspect, safety, scoring, post_training, providers, tool_runtime, vector_io]
+        test-type: ${{ fromJson(needs.discover-tests.outputs.test-type) }}
         client-type: [library, server]
         python-version: ["3.12", "3.13"]
-      fail-fast: false # we want to run all tests regardless of failure
 
     steps:
       - name: Checkout repository
@@ -51,23 +68,13 @@ jobs:
           free -h
           df -h
 
-      - name: Verify Ollama status is OK
-        if: matrix.client-type == 'http'
-        run: |
-          echo "Verifying Ollama status..."
-          ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
-          echo "Ollama status: $ollama_status"
-          if [ "$ollama_status" != "OK" ]; then
-            echo "Ollama health check failed"
-            exit 1
-          fi
-
       - name: Run Integration Tests
         env:
           OLLAMA_INFERENCE_MODEL: "llama3.2:3b-instruct-fp16" # for server tests
           ENABLE_OLLAMA: "ollama" # for server tests
           OLLAMA_URL: "http://0.0.0.0:11434"
           SAFETY_MODEL: "llama-guard3:1b"
+          LLAMA_STACK_CLIENT_TIMEOUT: "300" # Increased timeout for eval operations
         # Use 'shell' to get pipefail behavior
         # https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
         # TODO: write a precommit hook to detect if a test contains a pipe but does not use 'shell: bash'
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 28a047ea5..749793b64 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -218,6 +218,7 @@ def llama_stack_client(request, provider_data):
         return LlamaStackClient(
             base_url=base_url,
             provider_data=provider_data,
+            timeout=int(os.environ.get("LLAMA_STACK_CLIENT_TIMEOUT", "30")),
         )
 
     # check if this looks like a URL using proper URL parsing