Merge branch 'main' into add-mongodb-vector_io

2025-12-03 18:00:36 +00:00 · 2025-11-11 11:13:23 -08:00 · 2025-11-11 11:13:23 -08:00 · 5e9d28f0b4
commit 5e9d28f0b4
parent 9460c25f60 71b328fc4b
1791 changed files with 125464 additions and 386541 deletions
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@ -72,7 +72,8 @@ runs:
          echo "New recordings detected, committing and pushing"
          git add tests/integration/

-          git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
+          git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
+
          git fetch origin ${{ github.ref_name }}
          git rebase origin/${{ github.ref_name }}
          echo "Rebased successfully"
@ -88,6 +89,8 @@ runs:
      run: |
        # Ollama logs (if ollama container exists)
        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
+        # vllm logs (if vllm container exists)
+        sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
        # Note: distro container logs are now dumped in integration-tests.sh before container is removed

    - name: Upload logs
--- a/.github/actions/setup-vllm/action.yml
+++ b/.github/actions/setup-vllm/action.yml
@ -11,13 +11,14 @@ runs:
          --name vllm \
          -p 8000:8000 \
          --privileged=true \
-          quay.io/higginsd/vllm-cpu:65393ee064 \
+          quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
          --host 0.0.0.0 \
          --port 8000 \
          --enable-auto-tool-choice \
-          --tool-call-parser llama3_json \
-          --model /root/.cache/Llama-3.2-1B-Instruct \
-          --served-model-name meta-llama/Llama-3.2-1B-Instruct
+          --tool-call-parser hermes \
+          --model /root/.cache/Qwen3-0.6B \
+          --served-model-name Qwen/Qwen3-0.6B \
+          --max-model-len 8192

          # Wait for vllm to be ready
          echo "Waiting for vllm to be ready..."
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -22,7 +22,7 @@ updates:
      prefix: chore(python-deps)

  - package-ecosystem: npm
-    directory: "/llama_stack/ui"
+    directory: "/llama_stack_ui"
    schedule:
      interval: "weekly"
      day: "saturday"
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@ -18,6 +18,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
 | Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
 | Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
 | Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
+| Stainless SDK Builds | [stainless-builds.yml](stainless-builds.yml) | Build Stainless SDK from OpenAPI spec changes |
 | Close stale issues and PRs | [stale_bot.yml](stale_bot.yml) | Run the Stale Bot action |
 | Test External Providers Installed via Module | [test-external-provider-module.yml](test-external-provider-module.yml) | Test External Provider installation via Python module |
 | Test External API and Providers | [test-external.yml](test-external.yml) | Test the External API and Provider mechanisms |
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@ -14,7 +14,7 @@ on:
    paths:
      - 'distributions/**'
      - 'src/llama_stack/**'
-      - '!src/llama_stack/ui/**'
+      - '!src/llama_stack_ui/**'
      - 'tests/integration/**'
      - 'uv.lock'
      - 'pyproject.toml'
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -14,7 +14,7 @@ on:
    types: [opened, synchronize, reopened]
    paths:
      - 'src/llama_stack/**'
-      - '!src/llama_stack/ui/**'
+      - '!src/llama_stack_ui/**'
      - 'tests/**'
      - 'uv.lock'
      - 'pyproject.toml'
@ -23,10 +23,10 @@ on:
      - '.github/actions/setup-test-environment/action.yml'
      - '.github/actions/run-and-record-tests/action.yml'
      - 'scripts/integration-tests.sh'
+      - 'scripts/generate_ci_matrix.py'
  schedule:
    # If changing the cron schedule, update the provider in the test-matrix job
    - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
-    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
  workflow_dispatch:
    inputs:
      test-all-client-versions:
@ -44,8 +44,27 @@ concurrency:
  cancel-in-progress: true

 jobs:
+  generate-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+
+      - name: Generate test matrix
+        id: set-matrix
+        run: |
+          # Generate matrix from CI_MATRIX in tests/integration/suites.py
+          # Supports schedule-based and manual input overrides
+          MATRIX=$(PYTHONPATH=. python3 scripts/generate_ci_matrix.py \
+            --schedule "${{ github.event.schedule }}" \
+            --test-setup "${{ github.event.inputs.test-setup }}")
+          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
+          echo "Generated matrix: $MATRIX"

  run-replay-mode-tests:
+    needs: generate-matrix
    runs-on: ubuntu-latest
    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}

@ -56,18 +75,9 @@ jobs:
        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
-        # Define (setup, suite) pairs - they are always matched and cannot be independent
-        # Weekly schedule (Sun 1 AM): vllm+base
-        # Input test-setup=ollama-vision: ollama-vision+vision
-        # Default (including test-setup=ollama): ollama+base, ollama-vision+vision, gpt+responses
-        config: >-
-          ${{
-            github.event.schedule == '1 0 * * 0'
-              && fromJSON('[{"setup": "vllm", "suite": "base"}]')
-            || github.event.inputs.test-setup == 'ollama-vision'
-              && fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
-            || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}, {"setup": "gpt", "suite": "responses"}]')
-          }}
+        # Test configurations: Generated from CI_MATRIX in tests/integration/suites.py
+        # See scripts/generate_ci_matrix.py for generation logic
+        config: ${{ fromJSON(needs.generate-matrix.outputs.matrix).include }}

    steps:
      - name: Checkout repository
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@ -13,7 +13,7 @@ on:
      - 'release-[0-9]+.[0-9]+.x'
    paths:
      - 'src/llama_stack/**'
-      - '!src/llama_stack/ui/**'
+      - '!src/llama_stack_ui/**'
      - 'tests/integration/vector_io/**'
      - 'uv.lock'
      - 'pyproject.toml'
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -43,14 +43,14 @@ jobs:
        with:
          node-version: '20'
          cache: 'npm'
-          cache-dependency-path: 'src/llama_stack/ui/'
+          cache-dependency-path: 'src/llama_stack_ui/'

      - name: Set up uv
        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2

      - name: Install npm dependencies
        run: npm ci
-        working-directory: src/llama_stack/ui
+        working-directory: src/llama_stack_ui

      - name: Install pre-commit
        run: python -m pip install pre-commit
@ -165,3 +165,14 @@ jobs:
            echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
          fi
          exit $status
+
+      - name: Check if any unused recordings
+        run: |
+          set -e
+          PYTHONPATH=$PWD uv run ./scripts/cleanup_recordings.py --delete
+          changes=$(git status --short tests/integration | grep 'recordings' || true)
+          if [ -n "$changes" ]; then
+            echo "::error::Unused integration recordings detected. Run 'PYTHONPATH=$(pwd) uv run ./scripts/cleanup_recordings.py --delete' locally and commit the deletions."
+            echo "$changes"
+            exit 1
+          fi
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@ -10,7 +10,7 @@ on:
    branches:
      - main
    paths-ignore:
-        - 'src/llama_stack/ui/**'
+        - 'src/llama_stack_ui/**'

 jobs:
  build:
--- a/.github/workflows/stainless-builds.yml
+++ b/.github/workflows/stainless-builds.yml
@ -0,0 +1,110 @@
+name: Stainless SDK Builds
+run-name: Build Stainless SDK from OpenAPI spec changes
+
+# This workflow uses pull_request_target, which allows it to run on pull requests
+# from forks with access to secrets. This is safe because the workflow definition
+# comes from the base branch (trusted), and the action only reads OpenAPI spec
+# files without executing any code from the PR.
+
+on:
+  pull_request_target:
+    types:
+      - opened
+      - synchronize
+      - reopened
+      - closed
+    paths:
+      - "client-sdks/stainless/**"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+env:
+  # Stainless organization name.
+  STAINLESS_ORG: llamastack
+
+  # Stainless project name.
+  STAINLESS_PROJECT: llama-stack-client
+
+  # Path to your OpenAPI spec.
+  OAS_PATH: ./client-sdks/stainless/openapi.yml
+
+  # Path to your Stainless config. Optional; only provide this if you prefer
+  # to maintain the ground truth Stainless config in your own repo.
+  CONFIG_PATH: ./client-sdks/stainless/config.yml
+
+  # When to fail the job based on build conclusion.
+  # Options: "never" | "note" | "warning" | "error" | "fatal".
+  FAIL_ON: error
+
+  # In your repo secrets, configure:
+  # - STAINLESS_API_KEY: a Stainless API key, which you can generate on the
+  #   Stainless organization dashboard
+
+jobs:
+  preview:
+    if: github.event.action != 'closed'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+    steps:
+      # Checkout the PR's code to access the OpenAPI spec and config files.
+      # This is necessary to read the spec/config from the PR (including from forks).
+      - name: Checkout repository
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        with:
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          ref: ${{ github.event.pull_request.head.sha }}
+          fetch-depth: 2
+
+      # This action builds preview SDKs from the OpenAPI spec changes and
+      # posts/updates a comment on the PR with build results and links to the preview.
+      - name: Run preview builds
+        uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
+        with:
+          stainless_api_key: ${{ secrets.STAINLESS_API_KEY }}
+          org: ${{ env.STAINLESS_ORG }}
+          project: ${{ env.STAINLESS_PROJECT }}
+          oas_path: ${{ env.OAS_PATH }}
+          config_path: ${{ env.CONFIG_PATH }}
+          fail_on: ${{ env.FAIL_ON }}
+          base_sha: ${{ github.event.pull_request.base.sha }}
+          base_ref: ${{ github.event.pull_request.base.ref }}
+          head_sha: ${{ github.event.pull_request.head.sha }}
+
+  merge:
+    if: github.event.action == 'closed' && github.event.pull_request.merged == true
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+    steps:
+      # Checkout the PR's code to access the OpenAPI spec and config files.
+      # This is necessary to read the spec/config from the PR (including from forks).
+      - name: Checkout repository
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        with:
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          ref: ${{ github.event.pull_request.head.sha }}
+          fetch-depth: 2
+
+      # Note that this only merges in changes that happened on the last build on
+      # preview/${{ github.head_ref }}. It's possible that there are OAS/config
+      # changes that haven't been built, if the preview-sdk job didn't finish
+      # before this step starts. In theory we want to wait for all builds
+      # against preview/${{ github.head_ref }} to complete, but assuming that
+      # the preview-sdk job happens before the PR merge, it should be fine.
+      - name: Run merge build
+        uses: stainless-api/upload-openapi-spec-action/merge@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
+        with:
+          stainless_api_key: ${{ secrets.STAINLESS_API_KEY }}
+          org: ${{ env.STAINLESS_ORG }}
+          project: ${{ env.STAINLESS_PROJECT }}
+          oas_path: ${{ env.OAS_PATH }}
+          config_path: ${{ env.CONFIG_PATH }}
+          fail_on: ${{ env.FAIL_ON }}
+          base_sha: ${{ github.event.pull_request.base.sha }}
+          base_ref: ${{ github.event.pull_request.base.ref }}
+          head_sha: ${{ github.event.pull_request.head.sha }}
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@ -9,7 +9,7 @@ on:
    branches: [ main ]
    paths:
      - 'src/llama_stack/**'
-      - '!src/llama_stack/ui/**'
+      - '!src/llama_stack_ui/**'
      - 'tests/integration/**'
      - 'uv.lock'
      - 'pyproject.toml'
--- a/.github/workflows/ui-unit-tests.yml
+++ b/.github/workflows/ui-unit-tests.yml
@ -8,7 +8,7 @@ on:
  pull_request:
    branches: [ main ]
    paths:
-      - 'src/llama_stack/ui/**'
+      - 'src/llama_stack_ui/**'
      - '.github/workflows/ui-unit-tests.yml' # This workflow
  workflow_dispatch:

@ -33,22 +33,22 @@ jobs:
        with:
          node-version: ${{ matrix.node-version }}
          cache: 'npm'
-          cache-dependency-path: 'src/llama_stack/ui/package-lock.json'
+          cache-dependency-path: 'src/llama_stack_ui/package-lock.json'

      - name: Install dependencies
-        working-directory: src/llama_stack/ui
+        working-directory: src/llama_stack_ui
        run: npm ci

      - name: Run linting
-        working-directory: src/llama_stack/ui
+        working-directory: src/llama_stack_ui
        run: npm run lint

      - name: Run format check
-        working-directory: src/llama_stack/ui
+        working-directory: src/llama_stack_ui
        run: npm run format:check

      - name: Run unit tests
-        working-directory: src/llama_stack/ui
+        working-directory: src/llama_stack_ui
        env:
          CI: true

--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@ -13,7 +13,7 @@ on:
      - 'release-[0-9]+.[0-9]+.x'
    paths:
      - 'src/llama_stack/**'
-      - '!src/llama_stack/ui/**'
+      - '!src/llama_stack_ui/**'
      - 'tests/unit/**'
      - 'uv.lock'
      - 'pyproject.toml'