diff --git a/.github/actions/setup-ollama/action.yml b/.github/actions/setup-ollama/action.yml
index da24839c2..37a369a9a 100644
--- a/.github/actions/setup-ollama/action.yml
+++ b/.github/actions/setup-ollama/action.yml
@@ -8,4 +8,6 @@ runs:
       run: |
         docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
         # TODO: rebuild an ollama image with llama-guard3:1b
+        echo "Verifying Ollama status..."
+        timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'
         docker exec ollama ollama pull llama-guard3:1b
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index b102191f2..c46100c38 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -18,16 +18,33 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  test-matrix:
+  discover-tests:
     runs-on: ubuntu-latest
+    outputs:
+      test-type: ${{ steps.generate-matrix.outputs.test-type }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Generate test matrix
+        id: generate-matrix
+        run: |
+          # Get test directories dynamically, excluding non-test directories
+          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
+            grep -Ev "^(__pycache__|fixtures|test_cases)$" |
+            sort | jq -R -s -c 'split("\n")[:-1]')
+          echo "test-type=$TEST_TYPES" >> $GITHUB_OUTPUT
+
+  test-matrix:
+    needs: discover-tests
+    runs-on: ubuntu-latest
+
     strategy:
+      fail-fast: false
       matrix:
-        # Listing tests manually since some of them currently fail
-        # TODO: generate matrix list from tests/integration when fixed
-        test-type: [agents, inference, datasets, inspect, safety, scoring, post_training, providers, tool_runtime, vector_io]
+        test-type: ${{ fromJson(needs.discover-tests.outputs.test-type) }}
         client-type: [library, server]
         python-version: ["3.12", "3.13"]
-      fail-fast: false # we want to run all tests regardless of failure
 
     steps:
       - name: Checkout repository
@@ -51,23 +68,13 @@ jobs:
           free -h
           df -h
 
-      - name: Verify Ollama status is OK
-        if: matrix.client-type == 'http'
-        run: |
-          echo "Verifying Ollama status..."
-          ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
-          echo "Ollama status: $ollama_status"
-          if [ "$ollama_status" != "OK" ]; then
-            echo "Ollama health check failed"
-            exit 1
-          fi
-
       - name: Run Integration Tests
         env:
           OLLAMA_INFERENCE_MODEL: "llama3.2:3b-instruct-fp16" # for server tests
           ENABLE_OLLAMA: "ollama" # for server tests
           OLLAMA_URL: "http://0.0.0.0:11434"
           SAFETY_MODEL: "llama-guard3:1b"
+          LLAMA_STACK_CLIENT_TIMEOUT: "300" # Increased timeout for eval operations
         # Use 'shell' to get pipefail behavior
         # https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
         # TODO: write a precommit hook to detect if a test contains a pipe but does not use 'shell: bash'
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 28a047ea5..749793b64 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -218,6 +218,7 @@ def llama_stack_client(request, provider_data):
         return LlamaStackClient(
             base_url=base_url,
             provider_data=provider_data,
+            timeout=int(os.environ.get("LLAMA_STACK_CLIENT_TIMEOUT", "30")),
         )
 
     # check if this looks like a URL using proper URL parsing