From 66f3cf4002632f3278221587b1f0c116a2871f2e Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Tue, 16 Dec 2025 12:20:40 -0500
Subject: [PATCH] feat: wire Stainless preview SDK into integration tests
 (#4360)

# What does this PR do?

Enable stainless-builds workflow to test preview SDKs by calling
integration-tests workflow with python_url parameter. Add stainless
matrix config for faster CI runs on SDK changes.

  - Make integration-tests.yml reusable with workflow_call inputs
  - Thread python_url through test setup actions to install preview SDK
- Add matrix_key parameter to generate_ci_matrix.py for custom matrices
- Update stainless-builds.yml to call integration tests with preview URL

This allows us to test a client on the PR introducing the new changes
before merging. Contributors can even write new tests using the
generated client which should pass on the PR, indicating that they will
pass on main upon merge

## Test Plan

see triggered action using the workflows on this branch:
https://github.com/llamastack/llama-stack/actions/runs/20239891299/job/58105940429
which installs the stainless SDK from the given url.

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 .../install-llama-stack-client/action.yml     | 12 +++++++
 .github/actions/setup-runner/action.yml       |  5 +++
 .../actions/setup-test-environment/action.yml | 14 ++++++++
 .github/workflows/integration-tests.yml       | 28 ++++++++++++---
 .github/workflows/stainless-builds.yml        | 35 +++++++++++++++++--
 scripts/generate_ci_matrix.py                 | 17 ++++++---
 tests/integration/ci_matrix.json              |  3 ++
 7 files changed, 102 insertions(+), 12 deletions(-)

diff --git a/.github/actions/install-llama-stack-client/action.yml b/.github/actions/install-llama-stack-client/action.yml
index 3c1c77d9c..fc359f1fa 100644
--- a/.github/actions/install-llama-stack-client/action.yml
+++ b/.github/actions/install-llama-stack-client/action.yml
@@ -6,6 +6,10 @@ inputs:
     description: 'Client version to install on non-release branches (latest or published). Ignored on release branches.'
     required: false
     default: ""
+  sdk_install_url:
+    description: 'URL to install Python SDK from (for testing preview builds). If provided, overrides client-version.'
+    required: false
+    default: ""
 
 outputs:
   uv-extra-index-url:
@@ -25,6 +29,14 @@ runs:
       id: configure
       shell: bash
       run: |
+        # If sdk_install_url is provided (e.g., from Stainless preview), use it directly
+        if [ -n "${{ inputs.sdk_install_url }}" ]; then
+          echo "Using provided sdk_install_url: ${{ inputs.sdk_install_url }}"
+          echo "install-after-sync=true" >> $GITHUB_OUTPUT
+          echo "install-source=${{ inputs.sdk_install_url }}" >> $GITHUB_OUTPUT
+          exit 0
+        fi
+
         # Determine the branch we're working with
         BRANCH="${{ github.base_ref || github.ref }}"
         BRANCH="${BRANCH#refs/heads/}"
diff --git a/.github/actions/setup-runner/action.yml b/.github/actions/setup-runner/action.yml
index 6c642b310..b65288764 100644
--- a/.github/actions/setup-runner/action.yml
+++ b/.github/actions/setup-runner/action.yml
@@ -9,6 +9,10 @@ inputs:
     description: The llama-stack-client-python version to test against (latest or published)
     required: false
     default: "latest"
+  sdk_install_url:
+    description: 'URL to install Python SDK from (for testing preview builds). If provided, overrides client-version.'
+    required: false
+    default: ""
 runs:
   using: "composite"
   steps:
@@ -22,6 +26,7 @@ runs:
       uses: ./.github/actions/install-llama-stack-client
       with:
         client-version: ${{ inputs.client-version }}
+        sdk_install_url: ${{ inputs.sdk_install_url }}
 
     - name: Install dependencies
       shell: bash
diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml
index 1c9d019cc..55d403406 100644
--- a/.github/actions/setup-test-environment/action.yml
+++ b/.github/actions/setup-test-environment/action.yml
@@ -8,6 +8,10 @@ inputs:
   client-version:
     description: 'Client version (latest or published)'
     required: true
+  sdk_install_url:
+    description: 'URL to install Python SDK from (for testing preview builds). If provided, overrides client-version.'
+    required: false
+    default: ''
   setup:
     description: 'Setup to configure (ollama, vllm, gpt, etc.)'
     required: false
@@ -28,6 +32,7 @@ runs:
       with:
         python-version: ${{ inputs.python-version }}
         client-version: ${{ inputs.client-version }}
+        sdk_install_url: ${{ inputs.sdk_install_url }}
 
     - name: Setup ollama
       if: ${{ (inputs.setup == 'ollama' || inputs.setup == 'ollama-vision') && inputs.inference-mode == 'record' }}
@@ -65,6 +70,15 @@ runs:
           sleep 2
         done
 
+    - name: Verify client installation
+      shell: bash
+      run: |
+        echo "Verifying llama-stack-client installation:"
+        uv pip show llama-stack-client || echo "llama-stack-client not found"
+        echo ""
+        echo "All installed llama packages:"
+        uv pip list | grep llama || true
+
     - name: Build Llama Stack
       shell: bash
       run: |
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index b731dfae7..f3b5b95a9 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -37,6 +37,22 @@ on:
         description: 'Test against a specific setup'
         type: string
         default: 'ollama'
+  workflow_call:
+    inputs:
+      sdk_install_url:
+        required: false
+        type: string
+        description: 'URL to install Python SDK from (for testing preview builds)'
+      matrix_key:
+        required: false
+        type: string
+        default: 'default'
+        description: 'Matrix configuration key from ci_matrix.json (e.g., "default", "stainless")'
+      test-all-client-versions:
+        required: false
+        type: boolean
+        default: false
+        description: 'Test against both the latest and published versions'
 
 concurrency:
   # Skip concurrency for pushes to main - each commit should be tested independently
@@ -55,11 +71,12 @@ jobs:
       - name: Generate test matrix
         id: set-matrix
         run: |
-          # Generate matrix from CI_MATRIX in tests/integration/suites.py
-          # Supports schedule-based and manual input overrides
+          # Generate matrix from CI_MATRIX in tests/integration/ci_matrix.json
+          # Supports schedule-based, manual input, and workflow_call overrides
           MATRIX=$(PYTHONPATH=. python3 scripts/generate_ci_matrix.py \
             --schedule "${{ github.event.schedule }}" \
-            --test-setup "${{ github.event.inputs.test-setup }}")
+            --test-setup "${{ github.event.inputs.test-setup || '' }}" \
+            --matrix-key "${{ inputs.matrix_key || 'default' }}")
           echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
           echo "Generated matrix: $MATRIX"
 
@@ -75,8 +92,8 @@ jobs:
         # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
         python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
         node-version: [22]
-        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
-        # Test configurations: Generated from CI_MATRIX in tests/integration/suites.py
+        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true' || inputs.test-all-client-versions == true) && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
+        # Test configurations: Generated from CI_MATRIX in tests/integration/ci_matrix.json
         # See scripts/generate_ci_matrix.py for generation logic
         config: ${{ fromJSON(needs.generate-matrix.outputs.matrix).include }}
 
@@ -90,6 +107,7 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
           client-version: ${{ matrix.client-version }}
+          sdk_install_url: ${{ inputs.sdk_install_url || '' }}
           setup: ${{ matrix.config.setup }}
           suite: ${{ matrix.config.suite }}
           inference-mode: 'replay'
diff --git a/.github/workflows/stainless-builds.yml b/.github/workflows/stainless-builds.yml
index 5ab0cb4e0..880324756 100644
--- a/.github/workflows/stainless-builds.yml
+++ b/.github/workflows/stainless-builds.yml
@@ -22,6 +22,10 @@ on:
         description: 'PR number to run Stainless build for'
         required: true
         type: number
+      sdk_install_url:
+        description: 'Python SDK install URL (optional, for testing specific builds)'
+        required: false
+        type: string
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || inputs.pr_number || github.run_id }}
@@ -61,6 +65,7 @@ jobs:
       pr_head_sha: ${{ steps.compute.outputs.pr_head_sha }}
       pr_base_sha: ${{ steps.compute.outputs.pr_base_sha }}
       pr_base_ref: ${{ steps.compute.outputs.pr_base_ref }}
+      pr_title: ${{ steps.compute.outputs.pr_title }}
     steps:
       - name: Fetch PR details for workflow_dispatch
         if: github.event_name == 'workflow_dispatch'
@@ -68,7 +73,7 @@ jobs:
         env:
           GH_TOKEN: ${{ github.token }}
         run: |
-          PR_DATA=$(gh pr view ${{ inputs.pr_number }} --repo ${{ github.repository }} --json headRefName,headRepository,headRefOid,baseRefName,baseRefOid,headRepositoryOwner)
+          PR_DATA=$(gh pr view ${{ inputs.pr_number }} --repo ${{ github.repository }} --json headRefName,headRepository,headRefOid,baseRefName,baseRefOid,headRepositoryOwner,title)
           echo "pr_data=$PR_DATA" >> $GITHUB_OUTPUT
 
       - name: Compute branch names
@@ -84,6 +89,7 @@ jobs:
             HEAD_SHA=$(echo "$PR_DATA" | jq -r '.headRefOid')
             BASE_SHA=$(echo "$PR_DATA" | jq -r '.baseRefOid')
             BASE_REF=$(echo "$PR_DATA" | jq -r '.baseRefName')
+            PR_TITLE=$(echo "$PR_DATA" | jq -r '.title')
           else
             # Use pull_request_target event data
             HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
@@ -92,6 +98,7 @@ jobs:
             HEAD_SHA="${{ github.event.pull_request.head.sha }}"
             BASE_SHA="${{ github.event.pull_request.base.sha }}"
             BASE_REF="${{ github.event.pull_request.base.ref }}"
+            PR_TITLE="${{ github.event.pull_request.title }}"
           fi
 
           BASE_REPO="${{ github.repository }}"
@@ -118,14 +125,20 @@ jobs:
           echo "pr_head_sha=${HEAD_SHA}" >> $GITHUB_OUTPUT
           echo "pr_base_sha=${BASE_SHA}" >> $GITHUB_OUTPUT
           echo "pr_base_ref=${BASE_REF}" >> $GITHUB_OUTPUT
+          echo "pr_title=${PR_TITLE}" >> $GITHUB_OUTPUT
 
   preview:
     needs: compute-branch
-    if: github.event_name == 'workflow_dispatch' || github.event.action != 'closed'
+    # Skip preview if workflow_dispatch provides sdk_install_url, or if PR is being closed
+    if: |
+      (github.event_name == 'workflow_dispatch' && inputs.sdk_install_url == '') ||
+      (github.event_name == 'pull_request_target' && github.event.action != 'closed')
     runs-on: ubuntu-latest
     permissions:
       contents: read
       pull-requests: write
+    outputs:
+      sdk_install_url: ${{ fromJSON(steps.run-preview.outputs.outcomes || '{}').python.install_url || '' }}
     steps:
       # Checkout the PR's code to access the OpenAPI spec and config files.
       # This is necessary to read the spec/config from the PR (including from forks).
@@ -137,7 +150,10 @@ jobs:
           fetch-depth: 2
 
       - name: Run preview builds
+        id: run-preview
         uses: stainless-api/upload-openapi-spec-action/preview@979824f1ea5f44334940f0768d04642b6cdaa0d1 # 1.8.1
+        env:
+          PR_NUMBER: ${{ inputs.pr_number || github.event.pull_request.number }}
         with:
           stainless_api_key: ${{ secrets.STAINLESS_API_KEY }}
           org: ${{ env.STAINLESS_ORG }}
@@ -150,6 +166,21 @@ jobs:
           head_sha: ${{ needs.compute-branch.outputs.pr_head_sha }}
           branch: ${{ needs.compute-branch.outputs.preview_branch }}
           base_branch: ${{ needs.compute-branch.outputs.base_branch }}
+          commit_message: ${{ needs.compute-branch.outputs.pr_title }}
+          make_comment: true
+
+  run-integration-tests:
+    needs: [compute-branch, preview]
+    if: |
+      always() &&
+      (needs.preview.result == 'success' || needs.preview.result == 'skipped') &&
+      (github.event_name == 'workflow_dispatch' || github.event.action != 'closed')
+    uses: ./.github/workflows/integration-tests.yml
+    with:
+      # Use provided sdk_install_url from workflow_dispatch, or from preview build
+      sdk_install_url: ${{ inputs.sdk_install_url || needs.preview.outputs.sdk_install_url }}
+      matrix_key: 'stainless'
+      test-all-client-versions: false
 
   merge:
     needs: compute-branch
diff --git a/scripts/generate_ci_matrix.py b/scripts/generate_ci_matrix.py
index 0d4e924b3..f6e68ba8a 100755
--- a/scripts/generate_ci_matrix.py
+++ b/scripts/generate_ci_matrix.py
@@ -24,24 +24,30 @@ DEFAULT_MATRIX = matrix_config["default"]
 SCHEDULE_MATRICES: dict[str, list[dict[str, str]]] = matrix_config.get("schedules", {})
 
 
-def generate_matrix(schedule="", test_setup=""):
+def generate_matrix(schedule="", test_setup="", matrix_key="default"):
     """
-    Generate test matrix based on schedule or manual input.
+    Generate test matrix based on schedule, manual input, or matrix key.
 
     Args:
         schedule: GitHub cron schedule string (e.g., "1 0 * * 0" for weekly)
         test_setup: Manual test setup input (e.g., "ollama-vision")
+        matrix_key: Matrix configuration key from ci_matrix.json (e.g., "default", "stainless")
 
     Returns:
         Matrix configuration as JSON string
     """
-    # Weekly scheduled test matrices
+    # Weekly scheduled test matrices (highest priority)
     if schedule and schedule in SCHEDULE_MATRICES:
         matrix = SCHEDULE_MATRICES[schedule]
     # Manual input for specific setup
     elif test_setup == "ollama-vision":
         matrix = [{"suite": "vision", "setup": "ollama-vision"}]
-    # Default: use JSON-defined matrix
+    # Use specified matrix key from ci_matrix.json
+    elif matrix_key:
+        if matrix_key not in matrix_config:
+            raise ValueError(f"Invalid matrix_key '{matrix_key}'. Available keys: {list(matrix_config.keys())}")
+        matrix = matrix_config[matrix_key]
+    # Default: use JSON-defined default matrix
     else:
         matrix = DEFAULT_MATRIX
 
@@ -55,7 +61,8 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Generate CI test matrix")
     parser.add_argument("--schedule", default="", help="GitHub schedule cron string")
     parser.add_argument("--test-setup", default="", help="Manual test setup input")
+    parser.add_argument("--matrix-key", default="default", help="Matrix configuration key from ci_matrix.json")
 
     args = parser.parse_args()
 
-    print(generate_matrix(args.schedule, args.test_setup))
+    print(generate_matrix(args.schedule, args.test_setup, args.matrix_key))
diff --git a/tests/integration/ci_matrix.json b/tests/integration/ci_matrix.json
index 43678e5c7..fcadf95df 100644
--- a/tests/integration/ci_matrix.json
+++ b/tests/integration/ci_matrix.json
@@ -6,6 +6,9 @@
     {"suite": "responses", "setup": "gpt"},
     {"suite": "base-vllm-subset", "setup": "vllm"}
   ],
+  "stainless": [
+    {"suite": "base", "setup": "ollama", "allowed_clients": ["library"]}
+  ],
   "schedules": {
     "1 0 * * 0": [
       {"suite": "base", "setup": "vllm"}