Merge 7a19488787 into sapling-pr-archive-ehhuang

2025-12-04 02:03:44 +00:00 · 2025-11-03 10:46:12 -08:00 · 2025-11-03 10:46:12 -08:00 · 202a28f8ca
commit 202a28f8ca
parent 6a2c68168c 7a19488787
71 changed files with 3537 additions and 39048 deletions
--- a/.github/actions/install-llama-stack-client/action.yml
+++ b/.github/actions/install-llama-stack-client/action.yml
@ -0,0 +1,60 @@
 name: Install llama-stack-client
 description: Install llama-stack-client based on branch context and client-version input
 inputs:
  client-version:
    description: 'Client version to install on non-release branches (latest or published). Ignored on release branches.'
    required: false
    default: ""
 outputs:
  uv-extra-index-url:
    description: 'UV_EXTRA_INDEX_URL to use (set for release branches)'
    value: ${{ steps.configure.outputs.uv-extra-index-url }}
  install-after-sync:
    description: 'Whether to install client after uv sync'
    value: ${{ steps.configure.outputs.install-after-sync }}
  install-source:
    description: 'Where to install client from after sync'
    value: ${{ steps.configure.outputs.install-source }}
 runs:
  using: "composite"
  steps:
    - name: Configure client installation
      id: configure
      shell: bash
      run: |
        # Determine the branch we're working with
        BRANCH="${{ github.base_ref || github.ref }}"
        BRANCH="${BRANCH#refs/heads/}"
        echo "Working with branch: $BRANCH"
        # On release branches: use test.pypi for uv sync, then install from git
        # On non-release branches: install based on client-version after sync
        if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
          echo "Detected release branch: $BRANCH"
          # Check if matching branch exists in client repo
          if ! git ls-remote --exit-code --heads https://github.com/llamastack/llama-stack-client-python.git "$BRANCH" > /dev/null 2>&1; then
            echo "::error::Branch $BRANCH not found in llama-stack-client-python repository"
            echo "::error::Please create the matching release branch in llama-stack-client-python before testing"
            exit 1
          fi
          # Configure to use test.pypi as extra index (PyPI is primary)
          echo "uv-extra-index-url=https://test.pypi.org/simple/" >> $GITHUB_OUTPUT
          echo "install-after-sync=true" >> $GITHUB_OUTPUT
          echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@$BRANCH" >> $GITHUB_OUTPUT
        elif [ "${{ inputs.client-version }}" = "latest" ]; then
          # Install from main git after sync
          echo "install-after-sync=true" >> $GITHUB_OUTPUT
          echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@main" >> $GITHUB_OUTPUT
        elif [ "${{ inputs.client-version }}" = "published" ]; then
          # Use published version from PyPI (installed by sync)
          echo "install-after-sync=false" >> $GITHUB_OUTPUT
        elif [ -n "${{ inputs.client-version }}" ]; then
          echo "::error::Invalid client-version: ${{ inputs.client-version }}"
          exit 1
        fi
--- a/.github/actions/setup-runner/action.yml
+++ b/.github/actions/setup-runner/action.yml
@ -18,25 +18,35 @@ runs:
        python-version: ${{ inputs.python-version }}
        version: 0.7.6
    - name: Configure client installation
      id: client-config
      uses: ./.github/actions/install-llama-stack-client
      with:
        client-version: ${{ inputs.client-version }}
    - name: Install dependencies
      shell: bash
      env:
        UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
      run: |
        # Export UV env vars for current step and persist to GITHUB_ENV for subsequent steps
        if [ -n "$UV_EXTRA_INDEX_URL" ]; then
          export UV_INDEX_STRATEGY=unsafe-best-match
          echo "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" >> $GITHUB_ENV
          echo "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" >> $GITHUB_ENV
          echo "Exported UV environment variables for current and subsequent steps"
        fi
        echo "Updating project dependencies via uv sync"
        uv sync --all-groups
        echo "Installing ad-hoc dependencies"
        uv pip install faiss-cpu
-        # Install llama-stack-client-python based on the client-version input
+        # Install specific client version after sync if needed
-        if [ "${{ inputs.client-version }}" = "latest" ]; then
+        if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
-          echo "Installing latest llama-stack-client-python from main branch"
+          echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
-          uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@main
+          uv pip install ${{ steps.client-config.outputs.install-source }}
        elif [ "${{ inputs.client-version }}" = "published" ]; then
          echo "Installing published llama-stack-client-python from PyPI"
          uv pip install llama-stack-client
        else
          echo "Invalid client-version: ${{ inputs.client-version }}"
          exit 1
        fi
        echo "Installed llama packages"
--- a/.github/actions/setup-test-environment/action.yml
+++ b/.github/actions/setup-test-environment/action.yml
@ -42,36 +42,7 @@ runs:
    - name: Build Llama Stack
      shell: bash
      run: |
-        # Install llama-stack-client-python based on the client-version input
+        # Client is already installed by setup-runner (handles both main and release branches)
        if [ "${{ inputs.client-version }}" = "latest" ]; then
          # Check if PR is targeting a release branch
          TARGET_BRANCH="${{ github.base_ref }}"
          if [[ "$TARGET_BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x-maint$ ]]; then
            echo "PR targets release branch: $TARGET_BRANCH"
            echo "Checking if matching branch exists in llama-stack-client-python..."
            # Check if the branch exists in the client repo
            if git ls-remote --exit-code --heads https://github.com/llamastack/llama-stack-client-python.git "$TARGET_BRANCH" > /dev/null 2>&1; then
              echo "Installing llama-stack-client-python from matching branch: $TARGET_BRANCH"
              export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@$TARGET_BRANCH
            else
              echo "::error::Branch $TARGET_BRANCH not found in llama-stack-client-python repository"
              echo "::error::Please create the matching release branch in llama-stack-client-python before testing"
              exit 1
            fi
          else
            echo "Installing latest llama-stack-client-python from main branch"
            export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@main
          fi
        elif [ "${{ inputs.client-version }}" = "published" ]; then
          echo "Installing published llama-stack-client-python from PyPI"
          unset LLAMA_STACK_CLIENT_DIR
        else
          echo "Invalid client-version: ${{ inputs.client-version }}"
          exit 1
        fi
        echo "Building Llama Stack"
        LLAMA_STACK_DIR=. \
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@ -13,7 +13,6 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
 | Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
 | Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
 | Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
 | Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
 | Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
 | Test llama stack list-deps | [providers-list-deps.yml](providers-list-deps.yml) | Test llama stack list-deps |
 | Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
--- a/.github/workflows/backward-compat.yml
+++ b/.github/workflows/backward-compat.yml
@ -6,7 +6,9 @@ on:
  pull_request:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.[0-9]+.[0-9]+'
      - 'release-[0-9]+.[0-9]+.[0-9]+'
      - 'release-[0-9]+.[0-9]+'
    paths:
      - 'src/llama_stack/core/datatypes.py'
      - 'src/llama_stack/providers/datatypes.py'
@ -35,7 +37,7 @@ jobs:
          python-version: '3.12'
      - name: Install uv
-        uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
+        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
        with:
          enable-cache: true
@ -413,7 +415,7 @@ jobs:
          python-version: '3.12'
      - name: Install uv
-        uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
+        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
        with:
          enable-cache: true
--- a/.github/workflows/conformance.yml
+++ b/.github/workflows/conformance.yml
@ -22,7 +22,6 @@ on:
      - 'docs/static/stable-llama-stack-spec.yaml'       # Stable APIs spec
      - 'docs/static/experimental-llama-stack-spec.yaml' # Experimental APIs spec
      - 'docs/static/deprecated-llama-stack-spec.yaml'   # Deprecated APIs spec
      - 'docs/static/llama-stack-spec.html'              # Legacy HTML spec
      - '.github/workflows/conformance.yml'              # This workflow itself
 concurrency:
--- a/.github/workflows/install-script-ci.yml
+++ b/.github/workflows/install-script-ci.yml
@ -30,10 +30,16 @@ jobs:
      - name: Build a single provider
        run: |
          BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=starter"
          if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
            BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
          fi
          if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
            BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
          fi
          docker build . \
            -f containers/Containerfile \
-            --build-arg INSTALL_MODE=editable \
+            $BUILD_ARGS \
            --build-arg DISTRO_NAME=starter \
            --tag llama-stack:starter-ci
      - name: Run installer end-to-end
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@ -6,11 +6,11 @@ on:
  push:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.x'
  pull_request:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.x'
    paths:
      - 'distributions/**'
      - 'src/llama_stack/**'
--- a/.github/workflows/integration-sql-store-tests.yml
+++ b/.github/workflows/integration-sql-store-tests.yml
@ -6,11 +6,11 @@ on:
  push:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.x'
  pull_request:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.x'
    paths:
      - 'src/llama_stack/providers/utils/sqlstore/**'
      - 'tests/integration/sqlstore/**'
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -6,11 +6,11 @@ on:
  push:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.x'
  pull_request:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.x'
    types: [opened, synchronize, reopened]
    paths:
      - 'src/llama_stack/**'
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@ -6,11 +6,11 @@ on:
  push:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.x'
  pull_request:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.x'
    paths:
      - 'src/llama_stack/**'
      - '!src/llama_stack/ui/**'
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -7,7 +7,7 @@ on:
  push:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.x'
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
@ -46,7 +46,7 @@ jobs:
          cache-dependency-path: 'src/llama_stack/ui/'
      - name: Set up uv
-        uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
+        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
      - name: Install npm dependencies
        run: npm ci
@ -130,11 +130,34 @@ jobs:
            exit 1
          fi
      - name: Configure client installation
        id: client-config
        uses: ./.github/actions/install-llama-stack-client
      - name: Sync dev + type_checking dependencies
-        run: uv sync --group dev --group type_checking
+        env:
          UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
        run: |
          if [ -n "$UV_EXTRA_INDEX_URL" ]; then
            export UV_INDEX_STRATEGY="unsafe-best-match"
          fi
          uv sync --group dev --group type_checking
          # Install specific client version after sync if needed
          if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
            echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
            uv pip install ${{ steps.client-config.outputs.install-source }}
          fi
      - name: Run mypy (full type_checking)
        env:
          UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
        run: |
          if [ -n "$UV_EXTRA_INDEX_URL" ]; then
            export UV_INDEX_STRATEGY="unsafe-best-match"
          fi
          set +e
          uv run --group dev --group type_checking mypy
          status=$?
--- a/.github/workflows/precommit-trigger.yml
+++ b/.github/workflows/precommit-trigger.yml
@ -1,227 +0,0 @@
 name: Pre-commit Bot
 run-name: Pre-commit bot for PR #${{ github.event.issue.number }}
 on:
  issue_comment:
    types: [created]
 jobs:
  pre-commit:
    # Only run on pull request comments
    if: github.event.issue.pull_request && contains(github.event.comment.body, '@github-actions run precommit')
    runs-on: ubuntu-latest
    permissions:
      contents: write
      pull-requests: write
    steps:
      - name: Check comment author and get PR details
        id: check_author
        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            // Get PR details
            const pr = await github.rest.pulls.get({
              owner: context.repo.owner,
              repo: context.repo.repo,
              pull_number: context.issue.number
            });
            // Check if commenter has write access or is the PR author
            const commenter = context.payload.comment.user.login;
            const prAuthor = pr.data.user.login;
            let hasPermission = false;
            // Check if commenter is PR author
            if (commenter === prAuthor) {
              hasPermission = true;
              console.log(`Comment author ${commenter} is the PR author`);
            } else {
              // Check if commenter has write/admin access
              try {
                const permission = await github.rest.repos.getCollaboratorPermissionLevel({
                  owner: context.repo.owner,
                  repo: context.repo.repo,
                  username: commenter
                });
                const level = permission.data.permission;
                hasPermission = ['write', 'admin', 'maintain'].includes(level);
                console.log(`Comment author ${commenter} has permission: ${level}`);
              } catch (error) {
                console.log(`Could not check permissions for ${commenter}: ${error.message}`);
              }
            }
            if (!hasPermission) {
              await github.rest.issues.createComment({
                owner: context.repo.owner,
                repo: context.repo.repo,
                issue_number: context.issue.number,
                body: `❌ @${commenter} You don't have permission to trigger pre-commit. Only PR authors or repository collaborators can run this command.`
              });
              core.setFailed(`User ${commenter} does not have permission`);
              return;
            }
            // Save PR info for later steps
            core.setOutput('pr_number', context.issue.number);
            core.setOutput('pr_head_ref', pr.data.head.ref);
            core.setOutput('pr_head_sha', pr.data.head.sha);
            core.setOutput('pr_head_repo', pr.data.head.repo.full_name);
            core.setOutput('pr_base_ref', pr.data.base.ref);
            core.setOutput('is_fork', pr.data.head.repo.full_name !== context.payload.repository.full_name);
            core.setOutput('authorized', 'true');
      - name: React to comment
        if: steps.check_author.outputs.authorized == 'true'
        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            await github.rest.reactions.createForIssueComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              comment_id: context.payload.comment.id,
              content: 'rocket'
            });
      - name: Comment starting
        if: steps.check_author.outputs.authorized == 'true'
        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            await github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: ${{ steps.check_author.outputs.pr_number }},
              body: `⏳ Running [pre-commit hooks](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) on PR #${{ steps.check_author.outputs.pr_number }}...`
            });
      - name: Checkout PR branch (same-repo)
        if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'false'
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          ref: ${{ steps.check_author.outputs.pr_head_ref }}
          fetch-depth: 0
          token: ${{ secrets.GITHUB_TOKEN }}
      - name: Checkout PR branch (fork)
        if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'true'
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          repository: ${{ steps.check_author.outputs.pr_head_repo }}
          ref: ${{ steps.check_author.outputs.pr_head_ref }}
          fetch-depth: 0
          token: ${{ secrets.GITHUB_TOKEN }}
      - name: Verify checkout
        if: steps.check_author.outputs.authorized == 'true'
        run: |
          echo "Current SHA: $(git rev-parse HEAD)"
          echo "Expected SHA: ${{ steps.check_author.outputs.pr_head_sha }}"
          if [[ "$(git rev-parse HEAD)" != "${{ steps.check_author.outputs.pr_head_sha }}" ]]; then
            echo "::error::Checked out SHA does not match expected SHA"
            exit 1
          fi
      - name: Set up Python
        if: steps.check_author.outputs.authorized == 'true'
        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
        with:
          python-version: '3.12'
          cache: pip
          cache-dependency-path: |
            **/requirements*.txt
            .pre-commit-config.yaml
      - name: Set up Node.js
        if: steps.check_author.outputs.authorized == 'true'
        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
        with:
          node-version: '20'
          cache: 'npm'
          cache-dependency-path: 'src/llama_stack/ui/'
      - name: Install npm dependencies
        if: steps.check_author.outputs.authorized == 'true'
        run: npm ci
        working-directory: src/llama_stack/ui
      - name: Run pre-commit
        if: steps.check_author.outputs.authorized == 'true'
        id: precommit
        uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
        continue-on-error: true
        env:
          SKIP: no-commit-to-branch
          RUFF_OUTPUT_FORMAT: github
      - name: Check for changes
        if: steps.check_author.outputs.authorized == 'true'
        id: changes
        run: |
          if ! git diff --exit-code || [ -n "$(git ls-files --others --exclude-standard)" ]; then
            echo "has_changes=true" >> $GITHUB_OUTPUT
            echo "Changes detected after pre-commit"
          else
            echo "has_changes=false" >> $GITHUB_OUTPUT
            echo "No changes after pre-commit"
          fi
      - name: Commit and push changes
        if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
        run: |
          git config --local user.email "github-actions[bot]@users.noreply.github.com"
          git config --local user.name "github-actions[bot]"
          git add -A
          git commit -m "style: apply pre-commit fixes
          🤖 Applied by @github-actions bot via pre-commit workflow"
          # Push changes
          git push origin HEAD:${{ steps.check_author.outputs.pr_head_ref }}
      - name: Comment success with changes
        if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            await github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: ${{ steps.check_author.outputs.pr_number }},
              body: `✅ Pre-commit hooks completed successfully!\n\n🔧 Changes have been committed and pushed to the PR branch.`
            });
      - name: Comment success without changes
        if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success'
        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            await github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: ${{ steps.check_author.outputs.pr_number }},
              body: `✅ Pre-commit hooks passed!\n\n✨ No changes needed - your code is already formatted correctly.`
            });
      - name: Comment failure
        if: failure()
        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            await github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: ${{ steps.check_author.outputs.pr_number }},
              body: `❌ Pre-commit workflow failed!\n\nPlease check the [workflow logs](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) for details.`
            });
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@ -72,10 +72,16 @@ jobs:
      - name: Build container image
        if: matrix.image-type == 'container'
        run: |
          BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=${{ matrix.distro }}"
          if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
            BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
          fi
          if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
            BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
          fi
          docker build . \
            -f containers/Containerfile \
-            --build-arg INSTALL_MODE=editable \
+            $BUILD_ARGS \
            --build-arg DISTRO_NAME=${{ matrix.distro }} \
            --tag llama-stack:${{ matrix.distro }}-ci
      - name: Print dependencies in the image
@ -108,12 +114,18 @@ jobs:
      - name: Build container image
        run: |
          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
          BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
          BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
          BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml"
          if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
            BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
          fi
          if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
            BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
          fi
          docker build . \
            -f containers/Containerfile \
-            --build-arg INSTALL_MODE=editable \
+            $BUILD_ARGS \
            --build-arg DISTRO_NAME=ci-tests \
            --build-arg BASE_IMAGE="$BASE_IMAGE" \
            --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
            -t llama-stack:ci-tests
      - name: Inspect the container image entrypoint
@ -148,12 +160,18 @@ jobs:
      - name: Build UBI9 container image
        run: |
          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
          BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
          BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
          BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml"
          if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
            BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
          fi
          if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
            BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
          fi
          docker build . \
            -f containers/Containerfile \
-            --build-arg INSTALL_MODE=editable \
+            $BUILD_ARGS \
            --build-arg DISTRO_NAME=ci-tests \
            --build-arg BASE_IMAGE="$BASE_IMAGE" \
            --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
            -t llama-stack:ci-tests-ubi9
      - name: Inspect UBI9 image
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@ -24,7 +24,7 @@ jobs:
      uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
    - name: Install uv
-      uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
+      uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
      with:
        python-version: ${{ matrix.python-version }}
        activate-environment: true
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@ -6,11 +6,11 @@ on:
  push:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.x'
  pull_request:
    branches:
      - main
-      - 'release-[0-9]+.[0-9]+.x-maint'
+      - 'release-[0-9]+.[0-9]+.x'
    paths:
      - 'src/llama_stack/**'
      - '!src/llama_stack/ui/**'
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -52,10 +52,6 @@ repos:
        additional_dependencies:
        - black==24.3.0
 -   repo: https://github.com/astral-sh/uv-pre-commit
    rev: 0.7.20
    hooks:
    -   id: uv-lock
 -   repo: https://github.com/pre-commit/mirrors-mypy
    rev: v1.18.2
@ -63,22 +59,13 @@ repos:
    -   id: mypy
        additional_dependencies:
          - uv==0.6.2
          - mypy
          - pytest
          - rich
          - types-requests
          - pydantic
          - httpx
        pass_filenames: false
 -   repo: local
    hooks:
    -   id: mypy-full
        name: mypy (full type_checking)
        entry: uv run --group dev --group type_checking mypy
        language: system
        pass_filenames: false
        stages: [manual]
 # - repo: https://github.com/tcort/markdown-link-check
 #   rev: v3.11.2
 #   hooks:
@ -87,11 +74,26 @@ repos:
 -   repo: local
    hooks:
      - id: uv-lock
        name: uv-lock
        additional_dependencies:
          - uv==0.7.20
        entry: ./scripts/uv-run-with-index.sh lock
        language: python
        pass_filenames: false
        require_serial: true
        files: ^(pyproject\.toml|uv\.lock)$
      - id: mypy-full
        name: mypy (full type_checking)
        entry: ./scripts/uv-run-with-index.sh run --group dev --group type_checking mypy
        language: system
        pass_filenames: false
        stages: [manual]
      - id: distro-codegen
        name: Distribution Template Codegen
        additional_dependencies:
          - uv==0.7.8
-        entry: uv run --group codegen ./scripts/distro_codegen.py
+        entry: ./scripts/uv-run-with-index.sh run --group codegen ./scripts/distro_codegen.py
        language: python
        pass_filenames: false
        require_serial: true
@ -100,7 +102,7 @@ repos:
        name: Provider Codegen
        additional_dependencies:
          - uv==0.7.8
-        entry: uv run --group codegen ./scripts/provider_codegen.py
+        entry: ./scripts/uv-run-with-index.sh run --group codegen ./scripts/provider_codegen.py
        language: python
        pass_filenames: false
        require_serial: true
@ -109,7 +111,7 @@ repos:
        name: API Spec Codegen
        additional_dependencies:
          - uv==0.7.8
-        entry: sh -c 'uv run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null'
+        entry: sh -c './scripts/uv-run-with-index.sh run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null'
        language: python
        pass_filenames: false
        require_serial: true
@ -150,7 +152,7 @@ repos:
        name: Generate CI documentation
        additional_dependencies:
          - uv==0.7.8
-        entry: uv run ./scripts/gen-ci-docs.py
+        entry: ./scripts/uv-run-with-index.sh run ./scripts/gen-ci-docs.py
        language: python
        pass_filenames: false
        require_serial: true
@ -162,6 +164,7 @@ repos:
        files: ^src/llama_stack/ui/.*\.(ts|tsx)$
        pass_filenames: false
        require_serial: true
      - id: check-log-usage
        name: Ensure 'llama_stack.log' usage for logging
        entry: bash
@ -197,6 +200,7 @@ repos:
              echo;
              exit 1;
            } || true
 ci:
    autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
    autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -956,7 +956,22 @@ paths:
        List routes.
        List all available API routes with their methods and implementing providers.
-      parameters: []
+      parameters:
        - name: api_filter
          in: query
          description: >-
            Optional filter to control which routes are returned. Can be an API level
            ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
            or 'deprecated' to show deprecated routes across all levels. If not specified,
            returns only non-deprecated v1 routes.
          required: false
          schema:
            type: string
            enum:
              - v1
              - v1alpha
              - v1beta
              - deprecated
      deprecated: false
  /v1/models:
    get:
--- a/containers/Containerfile
+++ b/containers/Containerfile
@ -19,6 +19,8 @@ ARG KEEP_WORKSPACE=""
 ARG DISTRO_NAME="starter"
 ARG RUN_CONFIG_PATH=""
 ARG UV_HTTP_TIMEOUT=500
 ARG UV_EXTRA_INDEX_URL=""
 ARG UV_INDEX_STRATEGY=""
 ENV UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT}
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PIP_DISABLE_PIP_VERSION_CHECK=1
@ -45,7 +47,7 @@ RUN set -eux; \
        exit 1; \
    fi
-RUN pip install --no-cache uv
+RUN pip install --no-cache-dir uv
 ENV UV_SYSTEM_PYTHON=1
 ENV INSTALL_MODE=${INSTALL_MODE}
@ -62,47 +64,60 @@ COPY . /workspace
 # Install the client package if it is provided
 # NOTE: this is installed before llama-stack since llama-stack depends on llama-stack-client-python
 # Unset UV index env vars to ensure we only use PyPI for the client
 RUN set -eux; \
    unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \
    if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
        if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
            echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
            exit 1; \
        fi; \
-        uv pip install --no-cache -e "$LLAMA_STACK_CLIENT_DIR"; \
+        uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
    fi;
 # Install llama-stack
 # Use UV_EXTRA_INDEX_URL inline only for editable install with RC dependencies
 RUN set -eux; \
    SAVED_UV_EXTRA_INDEX_URL="${UV_EXTRA_INDEX_URL:-}"; \
    SAVED_UV_INDEX_STRATEGY="${UV_INDEX_STRATEGY:-}"; \
    unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \
    if [ "$INSTALL_MODE" = "editable" ]; then \
        if [ ! -d "$LLAMA_STACK_DIR" ]; then \
            echo "INSTALL_MODE=editable requires LLAMA_STACK_DIR to point to a directory inside the build context" >&2; \
            exit 1; \
        fi; \
-        uv pip install --no-cache -e "$LLAMA_STACK_DIR"; \
+        if [ -n "$SAVED_UV_EXTRA_INDEX_URL" ] && [ -n "$SAVED_UV_INDEX_STRATEGY" ]; then \
-    elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
+            UV_EXTRA_INDEX_URL="$SAVED_UV_EXTRA_INDEX_URL" UV_INDEX_STRATEGY="$SAVED_UV_INDEX_STRATEGY" \
-        uv pip install --no-cache fastapi libcst; \
+                uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
        if [ -n "$TEST_PYPI_VERSION" ]; then \
            uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
        else \
-            uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
+            uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
        fi; \
    elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
        uv pip install --no-cache-dir fastapi libcst; \
        if [ -n "$TEST_PYPI_VERSION" ]; then \
            uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
        else \
            uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
        fi; \
    else \
        if [ -n "$PYPI_VERSION" ]; then \
-            uv pip install --no-cache "llama-stack==$PYPI_VERSION"; \
+            uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \
        else \
-            uv pip install --no-cache llama-stack; \
+            uv pip install --no-cache-dir llama-stack; \
        fi; \
    fi;
 # Install the dependencies for the distribution
 # Explicitly unset UV index env vars to ensure we only use PyPI for distribution deps
 RUN set -eux; \
    unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \
    if [ -z "$DISTRO_NAME" ]; then \
        echo "DISTRO_NAME must be provided" >&2; \
        exit 1; \
    fi; \
    deps="$(llama stack list-deps "$DISTRO_NAME")"; \
    if [ -n "$deps" ]; then \
-        printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache; \
+        printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \
    fi
 # Cleanup
--- a/docs/docs/providers/inference/remote_nvidia.mdx
+++ b/docs/docs/providers/inference/remote_nvidia.mdx
@ -20,6 +20,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
 | `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
 | `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
 | `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
 | `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints.  |
 ## Sample Configuration
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@ -84,7 +84,6 @@ def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: boo
    )
    yaml_filename = f"{filename_prefix}llama-stack-spec.yaml"
    html_filename = f"{filename_prefix}llama-stack-spec.html"
    with open(output_dir / yaml_filename, "w", encoding="utf-8") as fp:
        y = yaml.YAML()
@ -102,11 +101,6 @@ def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: boo
            fp,
        )
    with open(output_dir / html_filename, "w") as fp:
        spec.write_html(fp, pretty_print=True)
    print(f"Generated {yaml_filename} and {html_filename}")
 def main(output_dir: str):
    output_dir = Path(output_dir)
    if not output_dir.exists():
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -1258,7 +1258,23 @@
                ],
                "summary": "List routes.",
                "description": "List routes.\nList all available API routes with their methods and implementing providers.",
-                "parameters": [],
+                "parameters": [
                    {
                        "name": "api_filter",
                        "in": "query",
                        "description": "Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns only non-deprecated v1 routes.",
                        "required": false,
                        "schema": {
                            "type": "string",
                            "enum": [
                                "v1",
                                "v1alpha",
                                "v1beta",
                                "deprecated"
                            ]
                        }
                    }
                ],
                "deprecated": false
            }
        },
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -953,7 +953,22 @@ paths:
        List routes.
        List all available API routes with their methods and implementing providers.
-      parameters: []
+      parameters:
        - name: api_filter
          in: query
          description: >-
            Optional filter to control which routes are returned. Can be an API level
            ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
            or 'deprecated' to show deprecated routes across all levels. If not specified,
            returns only non-deprecated v1 routes.
          required: false
          schema:
            type: string
            enum:
              - v1
              - v1alpha
              - v1beta
              - deprecated
      deprecated: false
  /v1/models:
    get:
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -956,7 +956,22 @@ paths:
        List routes.
        List all available API routes with their methods and implementing providers.
-      parameters: []
+      parameters:
        - name: api_filter
          in: query
          description: >-
            Optional filter to control which routes are returned. Can be an API level
            ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
            or 'deprecated' to show deprecated routes across all levels. If not specified,
            returns only non-deprecated v1 routes.
          required: false
          schema:
            type: string
            enum:
              - v1
              - v1alpha
              - v1beta
              - deprecated
      deprecated: false
  /v1/models:
    get:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,7 +7,7 @@ required-version = ">=0.7.0"
 [project]
 name = "llama_stack"
-version = "0.3.0"
+version = "0.4.0.dev0"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "Llama Stack"
 readme = "README.md"
--- a/scripts/docker.sh
+++ b/scripts/docker.sh
@ -215,6 +215,16 @@ build_image() {
        --build-arg "LLAMA_STACK_DIR=/workspace"
    )
    # Pass UV index configuration for release branches
    if [[ -n "${UV_EXTRA_INDEX_URL:-}" ]]; then
        echo "Adding UV_EXTRA_INDEX_URL to docker build: $UV_EXTRA_INDEX_URL"
        build_cmd+=(--build-arg "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL")
    fi
    if [[ -n "${UV_INDEX_STRATEGY:-}" ]]; then
        echo "Adding UV_INDEX_STRATEGY to docker build: $UV_INDEX_STRATEGY"
        build_cmd+=(--build-arg "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY")
    fi
    if ! "${build_cmd[@]}"; then
        echo "❌ Failed to build Docker image"
        exit 1
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -23,7 +23,7 @@ COLLECT_ONLY=false
 # Function to display usage
 usage() {
-    cat << EOF
+    cat <<EOF
 Usage: $0 [OPTIONS]
 Options:
@ -102,7 +102,6 @@ while [[ $# -gt 0 ]]; do
    esac
 done
 # Validate required parameters
 if [[ -z "$STACK_CONFIG" && "$COLLECT_ONLY" == false ]]; then
    echo "Error: --stack-config is required"
@ -177,12 +176,12 @@ cd $ROOT_DIR
 # check if "llama" and "pytest" are available. this script does not use `uv run` given
 # it can be used in a pre-release environment where we have not been able to tell
 # uv about pre-release dependencies properly (yet).
-if [[ "$COLLECT_ONLY" == false ]] && ! command -v llama &> /dev/null; then
+if [[ "$COLLECT_ONLY" == false ]] && ! command -v llama &>/dev/null; then
    echo "llama could not be found, ensure llama-stack is installed"
    exit 1
 fi
-if ! command -v pytest &> /dev/null; then
+if ! command -v pytest &>/dev/null; then
    echo "pytest could not be found, ensure pytest is installed"
    exit 1
 fi
@ -216,10 +215,11 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
        export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
        export OTEL_BSP_SCHEDULE_DELAY="200"
        export OTEL_BSP_EXPORT_TIMEOUT="2000"
        export OTEL_METRIC_EXPORT_INTERVAL="200"
        # remove "server:" from STACK_CONFIG
        stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
-        nohup llama stack run $stack_config > server.log 2>&1 &
+        nohup llama stack run $stack_config >server.log 2>&1 &
        echo "Waiting for Llama Stack Server to start..."
        for i in {1..30}; do
@ -248,7 +248,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
        container_name="llama-stack-test-$DISTRO"
        if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then
            echo "Dumping container logs before stopping..."
-            docker logs "$container_name" > "docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true
+            docker logs "$container_name" >"docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true
            echo "Stopping and removing container: $container_name"
            docker stop "$container_name" 2>/dev/null || true
            docker rm "$container_name" 2>/dev/null || true
@ -280,6 +280,16 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
        --build-arg "LLAMA_STACK_DIR=/workspace"
    )
    # Pass UV index configuration for release branches
    if [[ -n "${UV_EXTRA_INDEX_URL:-}" ]]; then
        echo "Adding UV_EXTRA_INDEX_URL to docker build: $UV_EXTRA_INDEX_URL"
        build_cmd+=(--build-arg "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL")
    fi
    if [[ -n "${UV_INDEX_STRATEGY:-}" ]]; then
        echo "Adding UV_INDEX_STRATEGY to docker build: $UV_INDEX_STRATEGY"
        build_cmd+=(--build-arg "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY")
    fi
    if ! "${build_cmd[@]}"; then
        echo "❌ Failed to build Docker image"
        exit 1
@ -302,6 +312,9 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_METRIC_EXPORT_INTERVAL=200"
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_SCHEDULE_DELAY=200"
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_EXPORT_TIMEOUT=2000"
    # Pass through API keys if they exist
    [ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
@ -437,17 +450,13 @@ elif [ $exit_code -eq 5 ]; then
 else
    echo "❌ Tests failed"
    echo ""
    echo "=== Dumping last 100 lines of logs for debugging ==="
    # Output server or container logs based on stack config
    if [[ "$STACK_CONFIG" == *"server:"* && -f "server.log" ]]; then
-        echo "--- Last 100 lines of server.log ---"
+        echo "--- Server side failures can be located inside server.log (available from artifacts on CI) ---"
        tail -100 server.log
    elif [[ "$STACK_CONFIG" == *"docker:"* ]]; then
        docker_log_file="docker-${DISTRO}-${INFERENCE_MODE}.log"
        if [[ -f "$docker_log_file" ]]; then
-            echo "--- Last 100 lines of $docker_log_file ---"
+            echo "--- Server side failures can be located inside $docker_log_file (available from artifacts on CI) ---"
            tail -100 "$docker_log_file"
        fi
    fi
--- a/scripts/uv-run-with-index.sh
+++ b/scripts/uv-run-with-index.sh
@ -0,0 +1,42 @@
 #!/bin/bash
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 set -euo pipefail
 # Detect current branch and target branch
 # In GitHub Actions, use GITHUB_REF/GITHUB_BASE_REF
 if [[ -n "${GITHUB_REF:-}" ]]; then
  BRANCH="${GITHUB_REF#refs/heads/}"
 else
  BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "")
 fi
 # For PRs, check the target branch
 if [[ -n "${GITHUB_BASE_REF:-}" ]]; then
  TARGET_BRANCH="${GITHUB_BASE_REF}"
 else
  TARGET_BRANCH=$(git rev-parse --abbrev-ref HEAD@{upstream} 2>/dev/null | sed 's|origin/||' || echo "")
 fi
 # Check if on a release branch or targeting one, or LLAMA_STACK_RELEASE_MODE is set
 IS_RELEASE=false
 if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
  IS_RELEASE=true
 elif [[ "$TARGET_BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
  IS_RELEASE=true
 elif [[ "${LLAMA_STACK_RELEASE_MODE:-}" == "true" ]]; then
  IS_RELEASE=true
 fi
 # On release branches, use test.pypi as extra index for RC versions
 if [[ "$IS_RELEASE" == "true" ]]; then
  export UV_EXTRA_INDEX_URL="https://test.pypi.org/simple/"
  export UV_INDEX_STRATEGY="unsafe-best-match"
 fi
 # Run uv with all arguments passed through
 exec uv "$@"
--- a/src/llama_stack/apis/inspect/inspect.py
+++ b/src/llama_stack/apis/inspect/inspect.py
@ -4,14 +4,21 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Protocol, runtime_checkable
+from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.apis.version import (
    LLAMA_STACK_API_V1,
 )
 from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.schema_utils import json_schema_type, webmethod
 # Valid values for the route filter parameter.
 # Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
 # Special filter value: "deprecated" (shows deprecated routes regardless of level)
 ApiFilter = Literal["v1", "v1alpha", "v1beta", "deprecated"]
@json_schema_type
 class RouteInfo(BaseModel):
@ -64,11 +71,12 @@ class Inspect(Protocol):
    """
    @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
-    async def list_routes(self) -> ListRoutesResponse:
+    async def list_routes(self, api_filter: ApiFilter | None = None) -> ListRoutesResponse:
        """List routes.
        List all available API routes with their methods and implementing providers.
        :param api_filter: Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns only non-deprecated v1 routes.
        :returns: Response containing information about all available routes.
        """
        ...
--- a/src/llama_stack/cli/stack/run.py
+++ b/src/llama_stack/cli/stack/run.py
@ -8,15 +8,28 @@ import argparse
 import os
 import ssl
 import subprocess
 import sys
 from pathlib import Path
 import uvicorn
 import yaml
 from termcolor import cprint
 from llama_stack.cli.stack.utils import ImageType
 from llama_stack.cli.subcommand import Subcommand
-from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.datatypes import Api, Provider, StackRunConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.storage.datatypes import (
    InferenceStoreReference,
    KVStoreReference,
    ServerStoresConfig,
    SqliteKVStoreConfig,
    SqliteSqlStoreConfig,
    SqlStoreReference,
    StorageConfig,
 )
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.log import LoggingConfig, get_logger
@ -68,6 +81,12 @@ class StackRun(Subcommand):
            action="store_true",
            help="Start the UI server",
        )
        self.parser.add_argument(
            "--providers",
            type=str,
            default=None,
            help="Run a stack with only a list of providers. This list is formatted like: api1=provider1,api1=provider2,api2=provider3. Where there can be multiple providers per API.",
        )
    def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
        import yaml
@ -93,6 +112,49 @@ class StackRun(Subcommand):
                config_file = resolve_config_or_distro(args.config, Mode.RUN)
            except ValueError as e:
                self.parser.error(str(e))
        elif args.providers:
            provider_list: dict[str, list[Provider]] = dict()
            for api_provider in args.providers.split(","):
                if "=" not in api_provider:
                    cprint(
                        "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
                        color="red",
                        file=sys.stderr,
                    )
                    sys.exit(1)
                api, provider_type = api_provider.split("=")
                providers_for_api = get_provider_registry().get(Api(api), None)
                if providers_for_api is None:
                    cprint(
                        f"{api} is not a valid API.",
                        color="red",
                        file=sys.stderr,
                    )
                    sys.exit(1)
                if provider_type in providers_for_api:
                    provider = Provider(
                        provider_type=provider_type,
                        provider_id=provider_type.split("::")[1],
                    )
                    provider_list.setdefault(api, []).append(provider)
                else:
                    cprint(
                        f"{provider} is not a valid provider for the {api} API.",
                        color="red",
                        file=sys.stderr,
                    )
                    sys.exit(1)
            run_config = self._generate_run_config_from_providers(providers=provider_list)
            config_dict = run_config.model_dump(mode="json")
            # Write config to disk in providers-run directory
            distro_dir = DISTRIBS_BASE_DIR / "providers-run"
            config_file = distro_dir / "run.yaml"
            logger.info(f"Writing generated config to: {config_file}")
            with open(config_file, "w") as f:
                yaml.dump(config_dict, f, default_flow_style=False, sort_keys=False)
        else:
            config_file = None
@ -106,7 +168,8 @@ class StackRun(Subcommand):
            try:
                config = parse_and_maybe_upgrade_config(config_dict)
-                if not os.path.exists(str(config.external_providers_dir)):
+                # Create external_providers_dir if it's specified and doesn't exist
                if config.external_providers_dir and not os.path.exists(str(config.external_providers_dir)):
                    os.makedirs(str(config.external_providers_dir), exist_ok=True)
            except AttributeError as e:
                self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
@ -213,3 +276,44 @@ class StackRun(Subcommand):
            )
        except Exception as e:
            logger.error(f"Failed to start UI development server in {ui_dir}: {e}")
    def _generate_run_config_from_providers(self, providers: dict[str, list[Provider]]):
        apis = list(providers.keys())
        distro_dir = DISTRIBS_BASE_DIR / "providers-run"
        # need somewhere to put the storage.
        os.makedirs(distro_dir, exist_ok=True)
        storage = StorageConfig(
            backends={
                "kv_default": SqliteKVStoreConfig(
                    db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
                ),
                "sql_default": SqliteSqlStoreConfig(
                    db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
                ),
            },
            stores=ServerStoresConfig(
                metadata=KVStoreReference(
                    backend="kv_default",
                    namespace="registry",
                ),
                inference=InferenceStoreReference(
                    backend="sql_default",
                    table_name="inference_store",
                ),
                conversations=SqlStoreReference(
                    backend="sql_default",
                    table_name="openai_conversations",
                ),
                prompts=KVStoreReference(
                    backend="kv_default",
                    namespace="prompts",
                ),
            ),
        )
        return StackRunConfig(
            image_name="providers-run",
            apis=apis,
            providers=providers,
            storage=storage,
        )
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@ -17,7 +17,6 @@ from llama_stack.core.distribution import (
    get_provider_registry,
 )
 from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.log import get_logger
@ -194,19 +193,11 @@ def upgrade_from_routing_table(
 def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
    version = config_dict.get("version", None)
    if version == LLAMA_STACK_RUN_CONFIG_VERSION:
        processed_config_dict = replace_env_vars(config_dict)
        return StackRunConfig(**cast_image_name_to_string(processed_config_dict))
    if "routing_table" in config_dict:
        logger.info("Upgrading config...")
        config_dict = upgrade_from_routing_table(config_dict)
    config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION
    if not config_dict.get("external_providers_dir", None):
        config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
    processed_config_dict = replace_env_vars(config_dict)
    return StackRunConfig(**cast_image_name_to_string(processed_config_dict))
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@ -15,6 +15,7 @@ from llama_stack.apis.inspect import (
    RouteInfo,
    VersionInfo,
 )
 from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.core.datatypes import StackRunConfig
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.server.routes import get_all_api_routes
@ -39,9 +40,21 @@ class DistributionInspectImpl(Inspect):
    async def initialize(self) -> None:
        pass
-    async def list_routes(self) -> ListRoutesResponse:
+    async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse:
        run_config: StackRunConfig = self.config.run_config
        # Helper function to determine if a route should be included based on api_filter
        def should_include_route(webmethod) -> bool:
            if api_filter is None:
                # Default: only non-deprecated v1 APIs
                return not webmethod.deprecated and webmethod.level == LLAMA_STACK_API_V1
            elif api_filter == "deprecated":
                # Special filter: show deprecated routes regardless of their actual level
                return bool(webmethod.deprecated)
            else:
                # Filter by API level (non-deprecated routes only)
                return not webmethod.deprecated and webmethod.level == api_filter
        ret = []
        external_apis = load_external_apis(run_config)
        all_endpoints = get_all_api_routes(external_apis)
@ -55,8 +68,8 @@ class DistributionInspectImpl(Inspect):
                            method=next(iter([m for m in e.methods if m != "HEAD"])),
                            provider_types=[],  # These APIs don't have "real" providers - they're internal to the stack
                        )
-                        for e, _ in endpoints
+                        for e, webmethod in endpoints
-                        if e.methods is not None
+                        if e.methods is not None and should_include_route(webmethod)
                    ]
                )
            else:
@ -69,8 +82,8 @@ class DistributionInspectImpl(Inspect):
                                method=next(iter([m for m in e.methods if m != "HEAD"])),
                                provider_types=[p.provider_type for p in providers],
                            )
-                            for e, _ in endpoints
+                            for e, webmethod in endpoints
-                            if e.methods is not None
+                            if e.methods is not None and should_include_route(webmethod)
                        ]
                    )
--- a/src/llama_stack/core/telemetry/telemetry.py
+++ b/src/llama_stack/core/telemetry/telemetry.py
@ -427,6 +427,7 @@ _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
    "counters": {},
    "gauges": {},
    "up_down_counters": {},
    "histograms": {},
 }
 _global_lock = threading.Lock()
 _TRACER_PROVIDER = None
@ -540,6 +541,16 @@ class Telemetry:
            )
        return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
    def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
        assert self.meter is not None
        if name not in _GLOBAL_STORAGE["histograms"]:
            _GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
                name=name,
                unit=unit,
                description=f"Histogram for {name}",
            )
        return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
    def _log_metric(self, event: MetricEvent) -> None:
        # Add metric as an event to the current span
        try:
@ -571,7 +582,16 @@ class Telemetry:
        # Log to OpenTelemetry meter if available
        if self.meter is None:
            return
-        if isinstance(event.value, int):
+
        # Use histograms for token-related metrics (per-request measurements)
        # Use counters for other cumulative metrics
        token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
        if event.metric in token_metrics:
            # Token metrics are per-request measurements, use histogram
            histogram = self._get_or_create_histogram(event.metric, event.unit)
            histogram.record(event.value, attributes=_clean_attributes(event.attributes))
        elif isinstance(event.value, int):
            counter = self._get_or_create_counter(event.metric, event.unit)
            counter.add(event.value, attributes=_clean_attributes(event.attributes))
        elif isinstance(event.value, float):
--- a/src/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -1015,7 +1015,7 @@ async def load_data_from_url(url: str) -> str:
    if url.startswith("http"):
        async with httpx.AsyncClient() as client:
            r = await client.get(url)
-            resp = r.text
+            resp: str = r.text
            return resp
    raise ValueError(f"Unexpected URL: {type(url)}")
--- a/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
+++ b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@ -181,3 +181,22 @@ vlm_response = client.chat.completions.create(
 print(f"VLM Response: {vlm_response.choices[0].message.content}")
 ```
 ### Rerank Example
 The following example shows how to rerank documents using an NVIDIA NIM.
 ```python
 rerank_response = client.alpha.inference.rerank(
    model="nvidia/nvidia/llama-3.2-nv-rerankqa-1b-v2",
    query="query",
    items=[
        "item_1",
        "item_2",
        "item_3",
    ],
 )
 for i, result in enumerate(rerank_response):
    print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]")
 ```
--- a/src/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/config.py
@ -28,6 +28,7 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
    Attributes:
        url (str): A base url for accessing the NVIDIA NIM, e.g. http://localhost:8000
        api_key (str): The access key for the hosted NIM endpoints
        rerank_model_to_url (dict[str, str]): Mapping of rerank model identifiers to their API endpoints
    There are two ways to access NVIDIA NIMs -
     0. Hosted: Preview APIs hosted at https://integrate.api.nvidia.com
@ -55,6 +56,14 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
        default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
        description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
    )
    rerank_model_to_url: dict[str, str] = Field(
        default_factory=lambda: {
            "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
            "nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking",
            "nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
        },
        description="Mapping of rerank model identifiers to their API endpoints. ",
    )
    @classmethod
    def sample_run_config(
--- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
@ -5,6 +5,19 @@
 # the root directory of this source tree.
 from collections.abc import Iterable
 import aiohttp
 from llama_stack.apis.inference import (
    RerankData,
    RerankResponse,
 )
 from llama_stack.apis.inference.inference import (
    OpenAIChatCompletionContentPartImageParam,
    OpenAIChatCompletionContentPartTextParam,
 )
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@ -61,3 +74,101 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
        :return: The NVIDIA API base URL
        """
        return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
    async def list_provider_model_ids(self) -> Iterable[str]:
        """
        Return both dynamic model IDs and statically configured rerank model IDs.
        """
        dynamic_ids: Iterable[str] = []
        try:
            dynamic_ids = await super().list_provider_model_ids()
        except Exception:
            # If the dynamic listing fails, proceed with just configured rerank IDs
            dynamic_ids = []
        configured_rerank_ids = list(self.config.rerank_model_to_url.keys())
        return list(dict.fromkeys(list(dynamic_ids) + configured_rerank_ids))  # remove duplicates
    def construct_model_from_identifier(self, identifier: str) -> Model:
        """
        Classify rerank models from config; otherwise use the base behavior.
        """
        if identifier in self.config.rerank_model_to_url:
            return Model(
                provider_id=self.__provider_id__,  # type: ignore[attr-defined]
                provider_resource_id=identifier,
                identifier=identifier,
                model_type=ModelType.rerank,
            )
        return super().construct_model_from_identifier(identifier)
    async def rerank(
        self,
        model: str,
        query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
        items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
        max_num_results: int | None = None,
    ) -> RerankResponse:
        provider_model_id = await self._get_provider_model_id(model)
        ranking_url = self.get_base_url()
        if _is_nvidia_hosted(self.config) and provider_model_id in self.config.rerank_model_to_url:
            ranking_url = self.config.rerank_model_to_url[provider_model_id]
        logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}")
        # Convert query to text format
        if isinstance(query, str):
            query_text = query
        elif isinstance(query, OpenAIChatCompletionContentPartTextParam):
            query_text = query.text
        else:
            raise ValueError("Query must be a string or text content part")
        # Convert items to text format
        passages = []
        for item in items:
            if isinstance(item, str):
                passages.append({"text": item})
            elif isinstance(item, OpenAIChatCompletionContentPartTextParam):
                passages.append({"text": item.text})
            else:
                raise ValueError("Items must be strings or text content parts")
        payload = {
            "model": provider_model_id,
            "query": {"text": query_text},
            "passages": passages,
        }
        headers = {
            "Authorization": f"Bearer {self.get_api_key()}",
            "Content-Type": "application/json",
        }
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(ranking_url, headers=headers, json=payload) as response:
                    if response.status != 200:
                        response_text = await response.text()
                        raise ConnectionError(
                            f"NVIDIA rerank API request failed with status {response.status}: {response_text}"
                        )
                    result = await response.json()
                    rankings = result.get("rankings", [])
                    # Convert to RerankData format
                    rerank_data = []
                    for ranking in rankings:
                        rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"]))
                    # Apply max_num_results limit
                    if max_num_results is not None:
                        rerank_data = rerank_data[:max_num_results]
                    return RerankResponse(data=rerank_data)
        except aiohttp.ClientError as e:
            raise ConnectionError(f"Failed to connect to NVIDIA rerank API at {ranking_url}: {e}") from e
--- a/src/llama_stack/ui/app/api/v1/[...path]/route.ts
+++ b/src/llama_stack/ui/app/api/v1/[...path]/route.ts
@ -51,7 +51,11 @@ async function proxyRequest(request: NextRequest, method: string) {
    );
    // Create response with same status and headers
-    const proxyResponse = new NextResponse(responseText, {
+    // Handle 204 No Content responses specially
    const proxyResponse =
      response.status === 204
        ? new NextResponse(null, { status: 204 })
        : new NextResponse(responseText, {
            status: response.status,
            statusText: response.statusText,
          });
--- a/src/llama_stack/ui/app/prompts/page.tsx
+++ b/src/llama_stack/ui/app/prompts/page.tsx
@ -0,0 +1,5 @@
 import { PromptManagement } from "@/components/prompts";
 export default function PromptsPage() {
  return <PromptManagement />;
 }
--- a/src/llama_stack/ui/components/layout/app-sidebar.tsx
+++ b/src/llama_stack/ui/components/layout/app-sidebar.tsx
@ -8,6 +8,7 @@ import {
  MessageCircle,
  Settings2,
  Compass,
  FileText,
 } from "lucide-react";
 import Link from "next/link";
 import { usePathname } from "next/navigation";
@ -50,6 +51,11 @@ const manageItems = [
    url: "/logs/vector-stores",
    icon: Database,
  },
  {
    title: "Prompts",
    url: "/prompts",
    icon: FileText,
  },
  {
    title: "Documentation",
    url: "https://llama-stack.readthedocs.io/en/latest/references/api_reference/index.html",
--- a/src/llama_stack/ui/components/prompts/index.ts
+++ b/src/llama_stack/ui/components/prompts/index.ts
@ -0,0 +1,4 @@
 export { PromptManagement } from "./prompt-management";
 export { PromptList } from "./prompt-list";
 export { PromptEditor } from "./prompt-editor";
 export * from "./types";
--- a/src/llama_stack/ui/components/prompts/prompt-editor.test.tsx
+++ b/src/llama_stack/ui/components/prompts/prompt-editor.test.tsx
@ -0,0 +1,309 @@
 import React from "react";
 import { render, screen, fireEvent } from "@testing-library/react";
 import "@testing-library/jest-dom";
 import { PromptEditor } from "./prompt-editor";
 import type { Prompt, PromptFormData } from "./types";
 describe("PromptEditor", () => {
  const mockOnSave = jest.fn();
  const mockOnCancel = jest.fn();
  const mockOnDelete = jest.fn();
  const defaultProps = {
    onSave: mockOnSave,
    onCancel: mockOnCancel,
    onDelete: mockOnDelete,
  };
  beforeEach(() => {
    jest.clearAllMocks();
  });
  describe("Create Mode", () => {
    test("renders create form correctly", () => {
      render(<PromptEditor {...defaultProps} />);
      expect(screen.getByLabelText("Prompt Content *")).toBeInTheDocument();
      expect(screen.getByText("Variables")).toBeInTheDocument();
      expect(screen.getByText("Preview")).toBeInTheDocument();
      expect(screen.getByText("Create Prompt")).toBeInTheDocument();
      expect(screen.getByText("Cancel")).toBeInTheDocument();
    });
    test("shows preview placeholder when no content", () => {
      render(<PromptEditor {...defaultProps} />);
      expect(
        screen.getByText("Enter content to preview the compiled prompt")
      ).toBeInTheDocument();
    });
    test("submits form with correct data", () => {
      render(<PromptEditor {...defaultProps} />);
      const promptInput = screen.getByLabelText("Prompt Content *");
      fireEvent.change(promptInput, {
        target: { value: "Hello {{name}}, welcome!" },
      });
      fireEvent.click(screen.getByText("Create Prompt"));
      expect(mockOnSave).toHaveBeenCalledWith({
        prompt: "Hello {{name}}, welcome!",
        variables: [],
      });
    });
    test("prevents submission with empty prompt", () => {
      render(<PromptEditor {...defaultProps} />);
      fireEvent.click(screen.getByText("Create Prompt"));
      expect(mockOnSave).not.toHaveBeenCalled();
    });
  });
  describe("Edit Mode", () => {
    const mockPrompt: Prompt = {
      prompt_id: "prompt_123",
      prompt: "Hello {{name}}, how is {{weather}}?",
      version: 1,
      variables: ["name", "weather"],
      is_default: true,
    };
    test("renders edit form with existing data", () => {
      render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
      expect(
        screen.getByDisplayValue("Hello {{name}}, how is {{weather}}?")
      ).toBeInTheDocument();
      expect(screen.getAllByText("name")).toHaveLength(2); // One in variables, one in preview
      expect(screen.getAllByText("weather")).toHaveLength(2); // One in variables, one in preview
      expect(screen.getByText("Update Prompt")).toBeInTheDocument();
      expect(screen.getByText("Delete Prompt")).toBeInTheDocument();
    });
    test("submits updated data correctly", () => {
      render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
      const promptInput = screen.getByLabelText("Prompt Content *");
      fireEvent.change(promptInput, {
        target: { value: "Updated: Hello {{name}}!" },
      });
      fireEvent.click(screen.getByText("Update Prompt"));
      expect(mockOnSave).toHaveBeenCalledWith({
        prompt: "Updated: Hello {{name}}!",
        variables: ["name", "weather"],
      });
    });
  });
  describe("Variables Management", () => {
    test("adds new variable", () => {
      render(<PromptEditor {...defaultProps} />);
      const variableInput = screen.getByPlaceholderText(
        "Add variable name (e.g. user_name, topic)"
      );
      fireEvent.change(variableInput, { target: { value: "testVar" } });
      fireEvent.click(screen.getByText("Add"));
      expect(screen.getByText("testVar")).toBeInTheDocument();
    });
    test("prevents adding duplicate variables", () => {
      render(<PromptEditor {...defaultProps} />);
      const variableInput = screen.getByPlaceholderText(
        "Add variable name (e.g. user_name, topic)"
      );
      // Add first variable
      fireEvent.change(variableInput, { target: { value: "test" } });
      fireEvent.click(screen.getByText("Add"));
      // Try to add same variable again
      fireEvent.change(variableInput, { target: { value: "test" } });
      // Button should be disabled
      expect(screen.getByText("Add")).toBeDisabled();
    });
    test("removes variable", () => {
      const mockPrompt: Prompt = {
        prompt_id: "prompt_123",
        prompt: "Hello {{name}}",
        version: 1,
        variables: ["name", "location"],
        is_default: true,
      };
      render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
      // Check that both variables are present initially
      expect(screen.getAllByText("name").length).toBeGreaterThan(0);
      expect(screen.getAllByText("location").length).toBeGreaterThan(0);
      // Remove the location variable by clicking the X button with the specific title
      const removeLocationButton = screen.getByTitle(
        "Remove location variable"
      );
      fireEvent.click(removeLocationButton);
      // Name should still be there, location should be gone from the variables section
      expect(screen.getAllByText("name").length).toBeGreaterThan(0);
      expect(
        screen.queryByTitle("Remove location variable")
      ).not.toBeInTheDocument();
    });
    test("adds variable on Enter key", () => {
      render(<PromptEditor {...defaultProps} />);
      const variableInput = screen.getByPlaceholderText(
        "Add variable name (e.g. user_name, topic)"
      );
      fireEvent.change(variableInput, { target: { value: "enterVar" } });
      // Simulate Enter key press
      fireEvent.keyPress(variableInput, {
        key: "Enter",
        code: "Enter",
        charCode: 13,
        preventDefault: jest.fn(),
      });
      // Check if the variable was added by looking for the badge
      expect(screen.getAllByText("enterVar").length).toBeGreaterThan(0);
    });
  });
  describe("Preview Functionality", () => {
    test("shows live preview with variables", () => {
      render(<PromptEditor {...defaultProps} />);
      // Add prompt content
      const promptInput = screen.getByLabelText("Prompt Content *");
      fireEvent.change(promptInput, {
        target: { value: "Hello {{name}}, welcome to {{place}}!" },
      });
      // Add variables
      const variableInput = screen.getByPlaceholderText(
        "Add variable name (e.g. user_name, topic)"
      );
      fireEvent.change(variableInput, { target: { value: "name" } });
      fireEvent.click(screen.getByText("Add"));
      fireEvent.change(variableInput, { target: { value: "place" } });
      fireEvent.click(screen.getByText("Add"));
      // Check that preview area shows the content
      expect(screen.getByText("Compiled Prompt")).toBeInTheDocument();
    });
    test("shows variable value inputs in preview", () => {
      const mockPrompt: Prompt = {
        prompt_id: "prompt_123",
        prompt: "Hello {{name}}",
        version: 1,
        variables: ["name"],
        is_default: true,
      };
      render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
      expect(screen.getByText("Variable Values")).toBeInTheDocument();
      expect(
        screen.getByPlaceholderText("Enter value for name")
      ).toBeInTheDocument();
    });
    test("shows color legend for variable states", () => {
      render(<PromptEditor {...defaultProps} />);
      // Add content to show preview
      const promptInput = screen.getByLabelText("Prompt Content *");
      fireEvent.change(promptInput, {
        target: { value: "Hello {{name}}" },
      });
      expect(screen.getByText("Used")).toBeInTheDocument();
      expect(screen.getByText("Unused")).toBeInTheDocument();
      expect(screen.getByText("Undefined")).toBeInTheDocument();
    });
  });
  describe("Error Handling", () => {
    test("displays error message", () => {
      const errorMessage = "Prompt contains undeclared variables";
      render(<PromptEditor {...defaultProps} error={errorMessage} />);
      expect(screen.getByText(errorMessage)).toBeInTheDocument();
    });
  });
  describe("Delete Functionality", () => {
    const mockPrompt: Prompt = {
      prompt_id: "prompt_123",
      prompt: "Hello {{name}}",
      version: 1,
      variables: ["name"],
      is_default: true,
    };
    test("shows delete button in edit mode", () => {
      render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
      expect(screen.getByText("Delete Prompt")).toBeInTheDocument();
    });
    test("hides delete button in create mode", () => {
      render(<PromptEditor {...defaultProps} />);
      expect(screen.queryByText("Delete Prompt")).not.toBeInTheDocument();
    });
    test("calls onDelete with confirmation", () => {
      const originalConfirm = window.confirm;
      window.confirm = jest.fn(() => true);
      render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
      fireEvent.click(screen.getByText("Delete Prompt"));
      expect(window.confirm).toHaveBeenCalledWith(
        "Are you sure you want to delete this prompt? This action cannot be undone."
      );
      expect(mockOnDelete).toHaveBeenCalledWith("prompt_123");
      window.confirm = originalConfirm;
    });
    test("does not delete when confirmation is cancelled", () => {
      const originalConfirm = window.confirm;
      window.confirm = jest.fn(() => false);
      render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
      fireEvent.click(screen.getByText("Delete Prompt"));
      expect(mockOnDelete).not.toHaveBeenCalled();
      window.confirm = originalConfirm;
    });
  });
  describe("Cancel Functionality", () => {
    test("calls onCancel when cancel button is clicked", () => {
      render(<PromptEditor {...defaultProps} />);
      fireEvent.click(screen.getByText("Cancel"));
      expect(mockOnCancel).toHaveBeenCalled();
    });
  });
 });
--- a/src/llama_stack/ui/components/prompts/prompt-editor.tsx
+++ b/src/llama_stack/ui/components/prompts/prompt-editor.tsx
@ -0,0 +1,346 @@
 "use client";
 import { useState, useEffect } from "react";
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
 import { Textarea } from "@/components/ui/textarea";
 import { Badge } from "@/components/ui/badge";
 import {
  Card,
  CardContent,
  CardDescription,
  CardHeader,
  CardTitle,
 } from "@/components/ui/card";
 import { Separator } from "@/components/ui/separator";
 import { X, Plus, Save, Trash2 } from "lucide-react";
 import { Prompt, PromptFormData } from "./types";
 interface PromptEditorProps {
  prompt?: Prompt;
  onSave: (prompt: PromptFormData) => void;
  onCancel: () => void;
  onDelete?: (promptId: string) => void;
  error?: string | null;
 }
 export function PromptEditor({
  prompt,
  onSave,
  onCancel,
  onDelete,
  error,
 }: PromptEditorProps) {
  const [formData, setFormData] = useState<PromptFormData>({
    prompt: "",
    variables: [],
  });
  const [newVariable, setNewVariable] = useState("");
  const [variableValues, setVariableValues] = useState<Record<string, string>>(
    {}
  );
  useEffect(() => {
    if (prompt) {
      setFormData({
        prompt: prompt.prompt || "",
        variables: prompt.variables || [],
      });
    }
  }, [prompt]);
  const handleSubmit = (e: React.FormEvent) => {
    e.preventDefault();
    if (!formData.prompt.trim()) {
      return;
    }
    onSave(formData);
  };
  const addVariable = () => {
    if (
      newVariable.trim() &&
      !formData.variables.includes(newVariable.trim())
    ) {
      setFormData(prev => ({
        ...prev,
        variables: [...prev.variables, newVariable.trim()],
      }));
      setNewVariable("");
    }
  };
  const removeVariable = (variableToRemove: string) => {
    setFormData(prev => ({
      ...prev,
      variables: prev.variables.filter(
        variable => variable !== variableToRemove
      ),
    }));
  };
  const renderPreview = () => {
    const text = formData.prompt;
    if (!text) return text;
    // Split text by variable patterns and process each part
    const parts = text.split(/(\{\{\s*\w+\s*\}\})/g);
    return parts.map((part, index) => {
      const variableMatch = part.match(/\{\{\s*(\w+)\s*\}\}/);
      if (variableMatch) {
        const variableName = variableMatch[1];
        const isDefined = formData.variables.includes(variableName);
        const value = variableValues[variableName];
        if (!isDefined) {
          // Variable not in variables list - likely a typo/bug (RED)
          return (
            <span
              key={index}
              className="bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-200 px-1 rounded font-medium"
            >
              {part}
            </span>
          );
        } else if (value && value.trim()) {
          // Variable defined and has value - show the value (GREEN)
          return (
            <span
              key={index}
              className="bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200 px-1 rounded font-medium"
            >
              {value}
            </span>
          );
        } else {
          // Variable defined but empty (YELLOW)
          return (
            <span
              key={index}
              className="bg-yellow-100 text-yellow-800 dark:bg-yellow-900 dark:text-yellow-200 px-1 rounded font-medium"
            >
              {part}
            </span>
          );
        }
      }
      return part;
    });
  };
  const updateVariableValue = (variable: string, value: string) => {
    setVariableValues(prev => ({
      ...prev,
      [variable]: value,
    }));
  };
  return (
    <form onSubmit={handleSubmit} className="space-y-6">
      {error && (
        <div className="p-4 bg-destructive/10 border border-destructive/20 rounded-md">
          <p className="text-destructive text-sm">{error}</p>
        </div>
      )}
      <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
        {/* Form Section */}
        <div className="space-y-4">
          <div>
            <Label htmlFor="prompt">Prompt Content *</Label>
            <Textarea
              id="prompt"
              value={formData.prompt}
              onChange={e =>
                setFormData(prev => ({ ...prev, prompt: e.target.value }))
              }
              placeholder="Enter your prompt content here. Use {{variable_name}} for dynamic variables."
              className="min-h-32 font-mono mt-2"
              required
            />
            <p className="text-xs text-muted-foreground mt-2">
              Use double curly braces around variable names, e.g.,{" "}
              {`{{user_name}}`} or {`{{topic}}`}
            </p>
          </div>
          <div className="space-y-3">
            <Label className="text-sm font-medium">Variables</Label>
            <div className="flex gap-2 mt-2">
              <Input
                value={newVariable}
                onChange={e => setNewVariable(e.target.value)}
                placeholder="Add variable name (e.g. user_name, topic)"
                onKeyPress={e =>
                  e.key === "Enter" && (e.preventDefault(), addVariable())
                }
                className="flex-1"
              />
              <Button
                type="button"
                onClick={addVariable}
                size="sm"
                disabled={
                  !newVariable.trim() ||
                  formData.variables.includes(newVariable.trim())
                }
              >
                <Plus className="h-4 w-4" />
                Add
              </Button>
            </div>
            {formData.variables.length > 0 && (
              <div className="border rounded-lg p-3 bg-muted/20">
                <div className="flex flex-wrap gap-2">
                  {formData.variables.map(variable => (
                    <Badge
                      key={variable}
                      variant="secondary"
                      className="text-sm px-2 py-1"
                    >
                      {variable}
                      <button
                        type="button"
                        onClick={() => removeVariable(variable)}
                        className="ml-2 hover:text-destructive transition-colors"
                        title={`Remove ${variable} variable`}
                      >
                        <X className="h-3 w-3" />
                      </button>
                    </Badge>
                  ))}
                </div>
              </div>
            )}
            <p className="text-xs text-muted-foreground">
              Variables that can be used in the prompt template. Each variable
              should match a {`{{variable}}`} placeholder in the content above.
            </p>
          </div>
        </div>
        {/* Preview Section */}
        <div className="space-y-4">
          <Card>
            <CardHeader>
              <CardTitle className="text-lg">Preview</CardTitle>
              <CardDescription>
                Live preview of compiled prompt and variable substitution.
              </CardDescription>
            </CardHeader>
            <CardContent className="space-y-4">
              {formData.prompt ? (
                <>
                  {/* Variable Values */}
                  {formData.variables.length > 0 && (
                    <div className="space-y-3">
                      <Label className="text-sm font-medium">
                        Variable Values
                      </Label>
                      <div className="space-y-2">
                        {formData.variables.map(variable => (
                          <div
                            key={variable}
                            className="grid grid-cols-2 gap-3 items-center"
                          >
                            <div className="text-sm font-mono text-muted-foreground">
                              {variable}
                            </div>
                            <Input
                              id={`var-${variable}`}
                              value={variableValues[variable] || ""}
                              onChange={e =>
                                updateVariableValue(variable, e.target.value)
                              }
                              placeholder={`Enter value for ${variable}`}
                              className="text-sm"
                            />
                          </div>
                        ))}
                      </div>
                      <Separator />
                    </div>
                  )}
                  {/* Live Preview */}
                  <div>
                    <Label className="text-sm font-medium mb-2 block">
                      Compiled Prompt
                    </Label>
                    <div className="bg-muted/50 p-4 rounded-lg border">
                      <div className="text-sm leading-relaxed whitespace-pre-wrap">
                        {renderPreview()}
                      </div>
                    </div>
                    <div className="flex flex-wrap gap-4 mt-2 text-xs">
                      <div className="flex items-center gap-1">
                        <div className="w-3 h-3 bg-green-500 dark:bg-green-400 border rounded"></div>
                        <span className="text-muted-foreground">Used</span>
                      </div>
                      <div className="flex items-center gap-1">
                        <div className="w-3 h-3 bg-yellow-500 dark:bg-yellow-400 border rounded"></div>
                        <span className="text-muted-foreground">Unused</span>
                      </div>
                      <div className="flex items-center gap-1">
                        <div className="w-3 h-3 bg-red-500 dark:bg-red-400 border rounded"></div>
                        <span className="text-muted-foreground">Undefined</span>
                      </div>
                    </div>
                  </div>
                </>
              ) : (
                <div className="text-center py-8">
                  <div className="text-muted-foreground text-sm">
                    Enter content to preview the compiled prompt
                  </div>
                  <div className="text-xs text-muted-foreground mt-2">
                    Use {`{{variable_name}}`} to add dynamic variables
                  </div>
                </div>
              )}
            </CardContent>
          </Card>
        </div>
      </div>
      <Separator />
      <div className="flex justify-between">
        <div>
          {prompt && onDelete && (
            <Button
              type="button"
              variant="destructive"
              onClick={() => {
                if (
                  confirm(
                    `Are you sure you want to delete this prompt? This action cannot be undone.`
                  )
                ) {
                  onDelete(prompt.prompt_id);
                }
              }}
            >
              <Trash2 className="h-4 w-4 mr-2" />
              Delete Prompt
            </Button>
          )}
        </div>
        <div className="flex gap-2">
          <Button type="button" variant="outline" onClick={onCancel}>
            Cancel
          </Button>
          <Button type="submit">
            <Save className="h-4 w-4 mr-2" />
            {prompt ? "Update" : "Create"} Prompt
          </Button>
        </div>
      </div>
    </form>
  );
 }
--- a/src/llama_stack/ui/components/prompts/prompt-list.test.tsx
+++ b/src/llama_stack/ui/components/prompts/prompt-list.test.tsx
@ -0,0 +1,259 @@
 import React from "react";
 import { render, screen, fireEvent } from "@testing-library/react";
 import "@testing-library/jest-dom";
 import { PromptList } from "./prompt-list";
 import type { Prompt } from "./types";
 describe("PromptList", () => {
  const mockOnEdit = jest.fn();
  const mockOnDelete = jest.fn();
  const defaultProps = {
    prompts: [],
    onEdit: mockOnEdit,
    onDelete: mockOnDelete,
  };
  beforeEach(() => {
    jest.clearAllMocks();
  });
  describe("Empty State", () => {
    test("renders empty message when no prompts", () => {
      render(<PromptList {...defaultProps} />);
      expect(screen.getByText("No prompts yet")).toBeInTheDocument();
    });
    test("shows filtered empty message when search has no results", () => {
      const prompts: Prompt[] = [
        {
          prompt_id: "prompt_123",
          prompt: "Hello world",
          version: 1,
          variables: [],
          is_default: false,
        },
      ];
      render(<PromptList {...defaultProps} prompts={prompts} />);
      // Search for something that doesn't exist
      const searchInput = screen.getByPlaceholderText("Search prompts...");
      fireEvent.change(searchInput, { target: { value: "nonexistent" } });
      expect(
        screen.getByText("No prompts match your filters")
      ).toBeInTheDocument();
    });
  });
  describe("Prompts Display", () => {
    const mockPrompts: Prompt[] = [
      {
        prompt_id: "prompt_123",
        prompt: "Hello {{name}}, how are you?",
        version: 1,
        variables: ["name"],
        is_default: true,
      },
      {
        prompt_id: "prompt_456",
        prompt: "Summarize this {{text}} in {{length}} words",
        version: 2,
        variables: ["text", "length"],
        is_default: false,
      },
      {
        prompt_id: "prompt_789",
        prompt: "Simple prompt with no variables",
        version: 1,
        variables: [],
        is_default: false,
      },
    ];
    test("renders prompts table with correct headers", () => {
      render(<PromptList {...defaultProps} prompts={mockPrompts} />);
      expect(screen.getByText("ID")).toBeInTheDocument();
      expect(screen.getByText("Content")).toBeInTheDocument();
      expect(screen.getByText("Variables")).toBeInTheDocument();
      expect(screen.getByText("Version")).toBeInTheDocument();
      expect(screen.getByText("Actions")).toBeInTheDocument();
    });
    test("renders prompt data correctly", () => {
      render(<PromptList {...defaultProps} prompts={mockPrompts} />);
      // Check prompt IDs
      expect(screen.getByText("prompt_123")).toBeInTheDocument();
      expect(screen.getByText("prompt_456")).toBeInTheDocument();
      expect(screen.getByText("prompt_789")).toBeInTheDocument();
      // Check content
      expect(
        screen.getByText("Hello {{name}}, how are you?")
      ).toBeInTheDocument();
      expect(
        screen.getByText("Summarize this {{text}} in {{length}} words")
      ).toBeInTheDocument();
      expect(
        screen.getByText("Simple prompt with no variables")
      ).toBeInTheDocument();
      // Check versions
      expect(screen.getAllByText("1")).toHaveLength(2); // Two prompts with version 1
      expect(screen.getByText("2")).toBeInTheDocument();
      // Check default badge
      expect(screen.getByText("Default")).toBeInTheDocument();
    });
    test("renders variables correctly", () => {
      render(<PromptList {...defaultProps} prompts={mockPrompts} />);
      // Check variables display
      expect(screen.getByText("name")).toBeInTheDocument();
      expect(screen.getByText("text")).toBeInTheDocument();
      expect(screen.getByText("length")).toBeInTheDocument();
      expect(screen.getByText("None")).toBeInTheDocument(); // For prompt with no variables
    });
    test("prompt ID links are clickable and call onEdit", () => {
      render(<PromptList {...defaultProps} prompts={mockPrompts} />);
      // Click on the first prompt ID link
      const promptLink = screen.getByRole("button", { name: "prompt_123" });
      fireEvent.click(promptLink);
      expect(mockOnEdit).toHaveBeenCalledWith(mockPrompts[0]);
    });
    test("edit buttons call onEdit", () => {
      const { container } = render(
        <PromptList {...defaultProps} prompts={mockPrompts} />
      );
      // Find the action buttons in the table - they should be in the last column
      const actionCells = container.querySelectorAll("td:last-child");
      const firstActionCell = actionCells[0];
      const editButton = firstActionCell?.querySelector("button");
      expect(editButton).toBeInTheDocument();
      fireEvent.click(editButton!);
      expect(mockOnEdit).toHaveBeenCalledWith(mockPrompts[0]);
    });
    test("delete buttons call onDelete with confirmation", () => {
      const originalConfirm = window.confirm;
      window.confirm = jest.fn(() => true);
      const { container } = render(
        <PromptList {...defaultProps} prompts={mockPrompts} />
      );
      // Find the delete button (second button in the first action cell)
      const actionCells = container.querySelectorAll("td:last-child");
      const firstActionCell = actionCells[0];
      const buttons = firstActionCell?.querySelectorAll("button");
      const deleteButton = buttons?.[1]; // Second button should be delete
      expect(deleteButton).toBeInTheDocument();
      fireEvent.click(deleteButton!);
      expect(window.confirm).toHaveBeenCalledWith(
        "Are you sure you want to delete this prompt? This action cannot be undone."
      );
      expect(mockOnDelete).toHaveBeenCalledWith("prompt_123");
      window.confirm = originalConfirm;
    });
    test("delete does not execute when confirmation is cancelled", () => {
      const originalConfirm = window.confirm;
      window.confirm = jest.fn(() => false);
      const { container } = render(
        <PromptList {...defaultProps} prompts={mockPrompts} />
      );
      const actionCells = container.querySelectorAll("td:last-child");
      const firstActionCell = actionCells[0];
      const buttons = firstActionCell?.querySelectorAll("button");
      const deleteButton = buttons?.[1]; // Second button should be delete
      expect(deleteButton).toBeInTheDocument();
      fireEvent.click(deleteButton!);
      expect(mockOnDelete).not.toHaveBeenCalled();
      window.confirm = originalConfirm;
    });
  });
  describe("Search Functionality", () => {
    const mockPrompts: Prompt[] = [
      {
        prompt_id: "user_greeting",
        prompt: "Hello {{name}}, welcome!",
        version: 1,
        variables: ["name"],
        is_default: true,
      },
      {
        prompt_id: "system_summary",
        prompt: "Summarize the following text",
        version: 1,
        variables: [],
        is_default: false,
      },
    ];
    test("filters prompts by prompt ID", () => {
      render(<PromptList {...defaultProps} prompts={mockPrompts} />);
      const searchInput = screen.getByPlaceholderText("Search prompts...");
      fireEvent.change(searchInput, { target: { value: "user" } });
      expect(screen.getByText("user_greeting")).toBeInTheDocument();
      expect(screen.queryByText("system_summary")).not.toBeInTheDocument();
    });
    test("filters prompts by content", () => {
      render(<PromptList {...defaultProps} prompts={mockPrompts} />);
      const searchInput = screen.getByPlaceholderText("Search prompts...");
      fireEvent.change(searchInput, { target: { value: "welcome" } });
      expect(screen.getByText("user_greeting")).toBeInTheDocument();
      expect(screen.queryByText("system_summary")).not.toBeInTheDocument();
    });
    test("search is case insensitive", () => {
      render(<PromptList {...defaultProps} prompts={mockPrompts} />);
      const searchInput = screen.getByPlaceholderText("Search prompts...");
      fireEvent.change(searchInput, { target: { value: "HELLO" } });
      expect(screen.getByText("user_greeting")).toBeInTheDocument();
      expect(screen.queryByText("system_summary")).not.toBeInTheDocument();
    });
    test("clearing search shows all prompts", () => {
      render(<PromptList {...defaultProps} prompts={mockPrompts} />);
      const searchInput = screen.getByPlaceholderText("Search prompts...");
      // Filter first
      fireEvent.change(searchInput, { target: { value: "user" } });
      expect(screen.queryByText("system_summary")).not.toBeInTheDocument();
      // Clear search
      fireEvent.change(searchInput, { target: { value: "" } });
      expect(screen.getByText("user_greeting")).toBeInTheDocument();
      expect(screen.getByText("system_summary")).toBeInTheDocument();
    });
  });
 });
--- a/src/llama_stack/ui/components/prompts/prompt-list.tsx
+++ b/src/llama_stack/ui/components/prompts/prompt-list.tsx
@ -0,0 +1,164 @@
 "use client";
 import { useState } from "react";
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
 } from "@/components/ui/table";
 import { Input } from "@/components/ui/input";
 import { Edit, Search, Trash2 } from "lucide-react";
 import { Prompt, PromptFilters } from "./types";
 interface PromptListProps {
  prompts: Prompt[];
  onEdit: (prompt: Prompt) => void;
  onDelete: (promptId: string) => void;
 }
 export function PromptList({ prompts, onEdit, onDelete }: PromptListProps) {
  const [filters, setFilters] = useState<PromptFilters>({});
  const filteredPrompts = prompts.filter(prompt => {
    if (
      filters.searchTerm &&
      !(
        prompt.prompt
          ?.toLowerCase()
          .includes(filters.searchTerm.toLowerCase()) ||
        prompt.prompt_id
          .toLowerCase()
          .includes(filters.searchTerm.toLowerCase())
      )
    ) {
      return false;
    }
    return true;
  });
  return (
    <div className="space-y-4">
      {/* Filters */}
      <div className="flex flex-col sm:flex-row gap-4">
        <div className="relative flex-1">
          <Search className="absolute left-3 top-1/2 transform -translate-y-1/2 text-muted-foreground h-4 w-4" />
          <Input
            placeholder="Search prompts..."
            value={filters.searchTerm || ""}
            onChange={e =>
              setFilters(prev => ({ ...prev, searchTerm: e.target.value }))
            }
            className="pl-10"
          />
        </div>
      </div>
      {/* Prompts Table */}
      <div className="overflow-auto">
        <Table>
          <TableHeader>
            <TableRow>
              <TableHead>ID</TableHead>
              <TableHead>Content</TableHead>
              <TableHead>Variables</TableHead>
              <TableHead>Version</TableHead>
              <TableHead>Actions</TableHead>
            </TableRow>
          </TableHeader>
          <TableBody>
            {filteredPrompts.map(prompt => (
              <TableRow key={prompt.prompt_id}>
                <TableCell className="max-w-48">
                  <Button
                    variant="link"
                    className="p-0 h-auto font-mono text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300 max-w-full justify-start"
                    onClick={() => onEdit(prompt)}
                    title={prompt.prompt_id}
                  >
                    <div className="truncate">{prompt.prompt_id}</div>
                  </Button>
                </TableCell>
                <TableCell className="max-w-64">
                  <div
                    className="font-mono text-xs text-muted-foreground truncate"
                    title={prompt.prompt || "No content"}
                  >
                    {prompt.prompt || "No content"}
                  </div>
                </TableCell>
                <TableCell>
                  {prompt.variables.length > 0 ? (
                    <div className="flex flex-wrap gap-1">
                      {prompt.variables.map(variable => (
                        <Badge
                          key={variable}
                          variant="outline"
                          className="text-xs"
                        >
                          {variable}
                        </Badge>
                      ))}
                    </div>
                  ) : (
                    <span className="text-muted-foreground text-sm">None</span>
                  )}
                </TableCell>
                <TableCell className="text-sm">
                  {prompt.version}
                  {prompt.is_default && (
                    <Badge variant="secondary" className="text-xs ml-2">
                      Default
                    </Badge>
                  )}
                </TableCell>
                <TableCell>
                  <div className="flex gap-1">
                    <Button
                      size="sm"
                      variant="outline"
                      onClick={() => onEdit(prompt)}
                      className="h-8 w-8 p-0"
                    >
                      <Edit className="h-3 w-3" />
                    </Button>
                    <Button
                      size="sm"
                      variant="outline"
                      onClick={() => {
                        if (
                          confirm(
                            `Are you sure you want to delete this prompt? This action cannot be undone.`
                          )
                        ) {
                          onDelete(prompt.prompt_id);
                        }
                      }}
                      className="h-8 w-8 p-0 text-destructive hover:text-destructive"
                    >
                      <Trash2 className="h-3 w-3" />
                    </Button>
                  </div>
                </TableCell>
              </TableRow>
            ))}
          </TableBody>
        </Table>
      </div>
      {filteredPrompts.length === 0 && (
        <div className="text-center py-12">
          <div className="text-muted-foreground">
            {prompts.length === 0
              ? "No prompts yet"
              : "No prompts match your filters"}
          </div>
        </div>
      )}
    </div>
  );
 }
--- a/src/llama_stack/ui/components/prompts/prompt-management.test.tsx
+++ b/src/llama_stack/ui/components/prompts/prompt-management.test.tsx
@ -0,0 +1,304 @@
 import React from "react";
 import { render, screen, fireEvent, waitFor } from "@testing-library/react";
 import "@testing-library/jest-dom";
 import { PromptManagement } from "./prompt-management";
 import type { Prompt } from "./types";
 // Mock the auth client
 const mockPromptsClient = {
  list: jest.fn(),
  create: jest.fn(),
  update: jest.fn(),
  delete: jest.fn(),
 };
 jest.mock("@/hooks/use-auth-client", () => ({
  useAuthClient: () => ({
    prompts: mockPromptsClient,
  }),
 }));
 describe("PromptManagement", () => {
  beforeEach(() => {
    jest.clearAllMocks();
  });
  describe("Loading State", () => {
    test("renders loading state initially", () => {
      mockPromptsClient.list.mockReturnValue(new Promise(() => {})); // Never resolves
      render(<PromptManagement />);
      expect(screen.getByText("Loading prompts...")).toBeInTheDocument();
      expect(screen.getByText("Prompts")).toBeInTheDocument();
    });
  });
  describe("Empty State", () => {
    test("renders empty state when no prompts", async () => {
      mockPromptsClient.list.mockResolvedValue([]);
      render(<PromptManagement />);
      await waitFor(() => {
        expect(screen.getByText("No prompts found.")).toBeInTheDocument();
      });
      expect(screen.getByText("Create Your First Prompt")).toBeInTheDocument();
    });
    test("opens modal when clicking 'Create Your First Prompt'", async () => {
      mockPromptsClient.list.mockResolvedValue([]);
      render(<PromptManagement />);
      await waitFor(() => {
        expect(
          screen.getByText("Create Your First Prompt")
        ).toBeInTheDocument();
      });
      fireEvent.click(screen.getByText("Create Your First Prompt"));
      expect(screen.getByText("Create New Prompt")).toBeInTheDocument();
    });
  });
  describe("Error State", () => {
    test("renders error state when API fails", async () => {
      const error = new Error("API not found");
      mockPromptsClient.list.mockRejectedValue(error);
      render(<PromptManagement />);
      await waitFor(() => {
        expect(screen.getByText(/Error:/)).toBeInTheDocument();
      });
    });
    test("renders specific error for 404", async () => {
      const error = new Error("404 Not found");
      mockPromptsClient.list.mockRejectedValue(error);
      render(<PromptManagement />);
      await waitFor(() => {
        expect(
          screen.getByText(/Prompts API endpoint not found/)
        ).toBeInTheDocument();
      });
    });
  });
  describe("Prompts List", () => {
    const mockPrompts: Prompt[] = [
      {
        prompt_id: "prompt_123",
        prompt: "Hello {{name}}, how are you?",
        version: 1,
        variables: ["name"],
        is_default: true,
      },
      {
        prompt_id: "prompt_456",
        prompt: "Summarize this {{text}}",
        version: 2,
        variables: ["text"],
        is_default: false,
      },
    ];
    test("renders prompts list correctly", async () => {
      mockPromptsClient.list.mockResolvedValue(mockPrompts);
      render(<PromptManagement />);
      await waitFor(() => {
        expect(screen.getByText("prompt_123")).toBeInTheDocument();
      });
      expect(screen.getByText("prompt_456")).toBeInTheDocument();
      expect(
        screen.getByText("Hello {{name}}, how are you?")
      ).toBeInTheDocument();
      expect(screen.getByText("Summarize this {{text}}")).toBeInTheDocument();
    });
    test("opens modal when clicking 'New Prompt' button", async () => {
      mockPromptsClient.list.mockResolvedValue(mockPrompts);
      render(<PromptManagement />);
      await waitFor(() => {
        expect(screen.getByText("prompt_123")).toBeInTheDocument();
      });
      fireEvent.click(screen.getByText("New Prompt"));
      expect(screen.getByText("Create New Prompt")).toBeInTheDocument();
    });
  });
  describe("Modal Operations", () => {
    const mockPrompts: Prompt[] = [
      {
        prompt_id: "prompt_123",
        prompt: "Hello {{name}}",
        version: 1,
        variables: ["name"],
        is_default: true,
      },
    ];
    test("closes modal when clicking cancel", async () => {
      mockPromptsClient.list.mockResolvedValue(mockPrompts);
      render(<PromptManagement />);
      await waitFor(() => {
        expect(screen.getByText("prompt_123")).toBeInTheDocument();
      });
      // Open modal
      fireEvent.click(screen.getByText("New Prompt"));
      expect(screen.getByText("Create New Prompt")).toBeInTheDocument();
      // Close modal
      fireEvent.click(screen.getByText("Cancel"));
      expect(screen.queryByText("Create New Prompt")).not.toBeInTheDocument();
    });
    test("creates new prompt successfully", async () => {
      const newPrompt: Prompt = {
        prompt_id: "prompt_new",
        prompt: "New prompt content",
        version: 1,
        variables: [],
        is_default: false,
      };
      mockPromptsClient.list.mockResolvedValue(mockPrompts);
      mockPromptsClient.create.mockResolvedValue(newPrompt);
      render(<PromptManagement />);
      await waitFor(() => {
        expect(screen.getByText("prompt_123")).toBeInTheDocument();
      });
      // Open modal
      fireEvent.click(screen.getByText("New Prompt"));
      // Fill form
      const promptInput = screen.getByLabelText("Prompt Content *");
      fireEvent.change(promptInput, {
        target: { value: "New prompt content" },
      });
      // Submit form
      fireEvent.click(screen.getByText("Create Prompt"));
      await waitFor(() => {
        expect(mockPromptsClient.create).toHaveBeenCalledWith({
          prompt: "New prompt content",
          variables: [],
        });
      });
    });
    test("handles create error gracefully", async () => {
      const error = {
        detail: {
          errors: [{ msg: "Prompt contains undeclared variables: ['test']" }],
        },
      };
      mockPromptsClient.list.mockResolvedValue(mockPrompts);
      mockPromptsClient.create.mockRejectedValue(error);
      render(<PromptManagement />);
      await waitFor(() => {
        expect(screen.getByText("prompt_123")).toBeInTheDocument();
      });
      // Open modal
      fireEvent.click(screen.getByText("New Prompt"));
      // Fill form
      const promptInput = screen.getByLabelText("Prompt Content *");
      fireEvent.change(promptInput, { target: { value: "Hello {{test}}" } });
      // Submit form
      fireEvent.click(screen.getByText("Create Prompt"));
      await waitFor(() => {
        expect(
          screen.getByText("Prompt contains undeclared variables: ['test']")
        ).toBeInTheDocument();
      });
    });
    test("updates existing prompt successfully", async () => {
      const updatedPrompt: Prompt = {
        ...mockPrompts[0],
        prompt: "Updated content",
      };
      mockPromptsClient.list.mockResolvedValue(mockPrompts);
      mockPromptsClient.update.mockResolvedValue(updatedPrompt);
      const { container } = render(<PromptManagement />);
      await waitFor(() => {
        expect(screen.getByText("prompt_123")).toBeInTheDocument();
      });
      // Click edit button (first button in the action cell of the first row)
      const actionCells = container.querySelectorAll("td:last-child");
      const firstActionCell = actionCells[0];
      const editButton = firstActionCell?.querySelector("button");
      expect(editButton).toBeInTheDocument();
      fireEvent.click(editButton!);
      expect(screen.getByText("Edit Prompt")).toBeInTheDocument();
      // Update content
      const promptInput = screen.getByLabelText("Prompt Content *");
      fireEvent.change(promptInput, { target: { value: "Updated content" } });
      // Submit form
      fireEvent.click(screen.getByText("Update Prompt"));
      await waitFor(() => {
        expect(mockPromptsClient.update).toHaveBeenCalledWith("prompt_123", {
          prompt: "Updated content",
          variables: ["name"],
          version: 1,
          set_as_default: true,
        });
      });
    });
    test("deletes prompt successfully", async () => {
      mockPromptsClient.list.mockResolvedValue(mockPrompts);
      mockPromptsClient.delete.mockResolvedValue(undefined);
      // Mock window.confirm
      const originalConfirm = window.confirm;
      window.confirm = jest.fn(() => true);
      const { container } = render(<PromptManagement />);
      await waitFor(() => {
        expect(screen.getByText("prompt_123")).toBeInTheDocument();
      });
      // Click delete button (second button in the action cell of the first row)
      const actionCells = container.querySelectorAll("td:last-child");
      const firstActionCell = actionCells[0];
      const buttons = firstActionCell?.querySelectorAll("button");
      const deleteButton = buttons?.[1]; // Second button should be delete
      expect(deleteButton).toBeInTheDocument();
      fireEvent.click(deleteButton!);
      await waitFor(() => {
        expect(mockPromptsClient.delete).toHaveBeenCalledWith("prompt_123");
      });
      // Restore window.confirm
      window.confirm = originalConfirm;
    });
  });
 });
--- a/src/llama_stack/ui/components/prompts/prompt-management.tsx
+++ b/src/llama_stack/ui/components/prompts/prompt-management.tsx
@ -0,0 +1,233 @@
 "use client";
 import { useState, useEffect } from "react";
 import { Button } from "@/components/ui/button";
 import { Plus } from "lucide-react";
 import { PromptList } from "./prompt-list";
 import { PromptEditor } from "./prompt-editor";
 import { Prompt, PromptFormData } from "./types";
 import { useAuthClient } from "@/hooks/use-auth-client";
 export function PromptManagement() {
  const [prompts, setPrompts] = useState<Prompt[]>([]);
  const [showPromptModal, setShowPromptModal] = useState(false);
  const [editingPrompt, setEditingPrompt] = useState<Prompt | undefined>();
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState<string | null>(null); // For main page errors (loading, etc.)
  const [modalError, setModalError] = useState<string | null>(null); // For form submission errors
  const client = useAuthClient();
  // Load prompts from API on component mount
  useEffect(() => {
    const fetchPrompts = async () => {
      try {
        setLoading(true);
        setError(null);
        const response = await client.prompts.list();
        setPrompts(response || []);
      } catch (err: unknown) {
        console.error("Failed to load prompts:", err);
        // Handle different types of errors
        const error = err as Error & { status?: number };
        if (error?.message?.includes("404") || error?.status === 404) {
          setError(
            "Prompts API endpoint not found. Please ensure your Llama Stack server supports the prompts API."
          );
        } else if (
          error?.message?.includes("not implemented") ||
          error?.message?.includes("not supported")
        ) {
          setError(
            "Prompts API is not yet implemented on this Llama Stack server."
          );
        } else {
          setError(
            `Failed to load prompts: ${error?.message || "Unknown error"}`
          );
        }
      } finally {
        setLoading(false);
      }
    };
    fetchPrompts();
  }, [client]);
  const handleSavePrompt = async (formData: PromptFormData) => {
    try {
      setModalError(null);
      if (editingPrompt) {
        // Update existing prompt
        const response = await client.prompts.update(editingPrompt.prompt_id, {
          prompt: formData.prompt,
          variables: formData.variables,
          version: editingPrompt.version,
          set_as_default: true,
        });
        // Update local state
        setPrompts(prev =>
          prev.map(p =>
            p.prompt_id === editingPrompt.prompt_id ? response : p
          )
        );
      } else {
        // Create new prompt
        const response = await client.prompts.create({
          prompt: formData.prompt,
          variables: formData.variables,
        });
        // Add to local state
        setPrompts(prev => [response, ...prev]);
      }
      setShowPromptModal(false);
      setEditingPrompt(undefined);
    } catch (err) {
      console.error("Failed to save prompt:", err);
      // Extract specific error message from API response
      const error = err as Error & {
        message?: string;
        detail?: { errors?: Array<{ msg?: string }> };
      };
      // Try to parse JSON from error message if it's a string
      let parsedError = error;
      if (typeof error?.message === "string" && error.message.includes("{")) {
        try {
          const jsonMatch = error.message.match(/\d+\s+(.+)/);
          if (jsonMatch) {
            parsedError = JSON.parse(jsonMatch[1]);
          }
        } catch {
          // If parsing fails, use original error
        }
      }
      // Try to get the specific validation error message
      const validationError = parsedError?.detail?.errors?.[0]?.msg;
      if (validationError) {
        // Clean up validation error messages (remove "Value error, " prefix if present)
        const cleanMessage = validationError.replace(/^Value error,\s*/i, "");
        setModalError(cleanMessage);
      } else {
        // For other errors, format them nicely with line breaks
        const statusMatch = error?.message?.match(/(\d+)\s+(.+)/);
        if (statusMatch) {
          const statusCode = statusMatch[1];
          const response = statusMatch[2];
          setModalError(
            `Failed to save prompt: Status Code ${statusCode}\n\nResponse: ${response}`
          );
        } else {
          const message = error?.message || error?.detail || "Unknown error";
          setModalError(`Failed to save prompt: ${message}`);
        }
      }
    }
  };
  const handleEditPrompt = (prompt: Prompt) => {
    setEditingPrompt(prompt);
    setShowPromptModal(true);
    setModalError(null); // Clear any previous modal errors
  };
  const handleDeletePrompt = async (promptId: string) => {
    try {
      setError(null);
      await client.prompts.delete(promptId);
      setPrompts(prev => prev.filter(p => p.prompt_id !== promptId));
      // If we're deleting the currently editing prompt, close the modal
      if (editingPrompt && editingPrompt.prompt_id === promptId) {
        setShowPromptModal(false);
        setEditingPrompt(undefined);
      }
    } catch (err) {
      console.error("Failed to delete prompt:", err);
      setError("Failed to delete prompt");
    }
  };
  const handleCreateNew = () => {
    setEditingPrompt(undefined);
    setShowPromptModal(true);
    setModalError(null); // Clear any previous modal errors
  };
  const handleCancel = () => {
    setShowPromptModal(false);
    setEditingPrompt(undefined);
  };
  const renderContent = () => {
    if (loading) {
      return <div className="text-muted-foreground">Loading prompts...</div>;
    }
    if (error) {
      return <div className="text-destructive">Error: {error}</div>;
    }
    if (!prompts || prompts.length === 0) {
      return (
        <div className="text-center py-12">
          <p className="text-muted-foreground mb-4">No prompts found.</p>
          <Button onClick={handleCreateNew}>
            <Plus className="h-4 w-4 mr-2" />
            Create Your First Prompt
          </Button>
        </div>
      );
    }
    return (
      <PromptList
        prompts={prompts}
        onEdit={handleEditPrompt}
        onDelete={handleDeletePrompt}
      />
    );
  };
  return (
    <div className="space-y-4">
      <div className="flex items-center justify-between">
        <h1 className="text-2xl font-semibold">Prompts</h1>
        <Button onClick={handleCreateNew} disabled={loading}>
          <Plus className="h-4 w-4 mr-2" />
          New Prompt
        </Button>
      </div>
      {renderContent()}
      {/* Create/Edit Prompt Modal */}
      {showPromptModal && (
        <div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50">
          <div className="bg-background border rounded-lg shadow-lg max-w-4xl w-full mx-4 max-h-[90vh] overflow-hidden">
            <div className="p-6 border-b">
              <h2 className="text-2xl font-bold">
                {editingPrompt ? "Edit Prompt" : "Create New Prompt"}
              </h2>
            </div>
            <div className="p-6 overflow-y-auto max-h-[calc(90vh-120px)]">
              <PromptEditor
                prompt={editingPrompt}
                onSave={handleSavePrompt}
                onCancel={handleCancel}
                onDelete={handleDeletePrompt}
                error={modalError}
              />
            </div>
          </div>
        </div>
      )}
    </div>
  );
 }
--- a/src/llama_stack/ui/components/prompts/types.ts
+++ b/src/llama_stack/ui/components/prompts/types.ts
@ -0,0 +1,16 @@
 export interface Prompt {
  prompt_id: string;
  prompt: string | null;
  version: number;
  variables: string[];
  is_default: boolean;
 }
 export interface PromptFormData {
  prompt: string;
  variables: string[];
 }
 export interface PromptFilters {
  searchTerm?: string;
 }
--- a/src/llama_stack/ui/components/ui/badge.tsx
+++ b/src/llama_stack/ui/components/ui/badge.tsx
@ -0,0 +1,36 @@
 import * as React from "react";
 import { cva, type VariantProps } from "class-variance-authority";
 import { cn } from "@/lib/utils";
 const badgeVariants = cva(
  "inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2",
  {
    variants: {
      variant: {
        default:
          "border-transparent bg-primary text-primary-foreground hover:bg-primary/80",
        secondary:
          "border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80",
        destructive:
          "border-transparent bg-destructive text-destructive-foreground hover:bg-destructive/80",
        outline: "text-foreground",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  }
 );
 export interface BadgeProps
  extends React.HTMLAttributes<HTMLDivElement>,
    VariantProps<typeof badgeVariants> {}
 function Badge({ className, variant, ...props }: BadgeProps) {
  return (
    <div className={cn(badgeVariants({ variant }), className)} {...props} />
  );
 }
 export { Badge, badgeVariants };
--- a/src/llama_stack/ui/components/ui/label.tsx
+++ b/src/llama_stack/ui/components/ui/label.tsx
@ -0,0 +1,24 @@
 import * as React from "react";
 import * as LabelPrimitive from "@radix-ui/react-label";
 import { cva, type VariantProps } from "class-variance-authority";
 import { cn } from "@/lib/utils";
 const labelVariants = cva(
  "text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70"
 );
 const Label = React.forwardRef<
  React.ElementRef<typeof LabelPrimitive.Root>,
  React.ComponentPropsWithoutRef<typeof LabelPrimitive.Root> &
    VariantProps<typeof labelVariants>
 >(({ className, ...props }, ref) => (
  <LabelPrimitive.Root
    ref={ref}
    className={cn(labelVariants(), className)}
    {...props}
  />
 ));
 Label.displayName = LabelPrimitive.Root.displayName;
 export { Label };
--- a/src/llama_stack/ui/components/ui/tabs.tsx
+++ b/src/llama_stack/ui/components/ui/tabs.tsx
@ -0,0 +1,53 @@
 import * as React from "react";
 import * as TabsPrimitive from "@radix-ui/react-tabs";
 import { cn } from "@/lib/utils";
 const Tabs = TabsPrimitive.Root;
 const TabsList = React.forwardRef<
  React.ElementRef<typeof TabsPrimitive.List>,
  React.ComponentPropsWithoutRef<typeof TabsPrimitive.List>
 >(({ className, ...props }, ref) => (
  <TabsPrimitive.List
    ref={ref}
    className={cn(
      "inline-flex h-10 items-center justify-center rounded-md bg-muted p-1 text-muted-foreground",
      className
    )}
    {...props}
  />
 ));
 TabsList.displayName = TabsPrimitive.List.displayName;
 const TabsTrigger = React.forwardRef<
  React.ElementRef<typeof TabsPrimitive.Trigger>,
  React.ComponentPropsWithoutRef<typeof TabsPrimitive.Trigger>
 >(({ className, ...props }, ref) => (
  <TabsPrimitive.Trigger
    ref={ref}
    className={cn(
      "inline-flex items-center justify-center whitespace-nowrap rounded-sm px-3 py-1.5 text-sm font-medium ring-offset-background transition-all focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 data-[state=active]:bg-background data-[state=active]:text-foreground data-[state=active]:shadow-sm",
      className
    )}
    {...props}
  />
 ));
 TabsTrigger.displayName = TabsPrimitive.Trigger.displayName;
 const TabsContent = React.forwardRef<
  React.ElementRef<typeof TabsPrimitive.Content>,
  React.ComponentPropsWithoutRef<typeof TabsPrimitive.Content>
 >(({ className, ...props }, ref) => (
  <TabsPrimitive.Content
    ref={ref}
    className={cn(
      "mt-2 ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2",
      className
    )}
    {...props}
  />
 ));
 TabsContent.displayName = TabsPrimitive.Content.displayName;
 export { Tabs, TabsList, TabsTrigger, TabsContent };
--- a/src/llama_stack/ui/components/ui/textarea.tsx
+++ b/src/llama_stack/ui/components/ui/textarea.tsx
@ -0,0 +1,23 @@
 import * as React from "react";
 import { cn } from "@/lib/utils";
 export type TextareaProps = React.TextareaHTMLAttributes<HTMLTextAreaElement>;
 const Textarea = React.forwardRef<HTMLTextAreaElement, TextareaProps>(
  ({ className, ...props }, ref) => {
    return (
      <textarea
        className={cn(
          "flex min-h-[80px] w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
          className
        )}
        ref={ref}
        {...props}
      />
    );
  }
 );
 Textarea.displayName = "Textarea";
 export { Textarea };
--- a/src/llama_stack/ui/package-lock.json
+++ b/src/llama_stack/ui/package-lock.json
@ -11,14 +11,16 @@
        "@radix-ui/react-collapsible": "^1.1.12",
        "@radix-ui/react-dialog": "^1.1.15",
        "@radix-ui/react-dropdown-menu": "^2.1.16",
        "@radix-ui/react-label": "^2.1.7",
        "@radix-ui/react-select": "^2.2.6",
        "@radix-ui/react-separator": "^1.1.7",
        "@radix-ui/react-slot": "^1.2.3",
        "@radix-ui/react-tabs": "^1.1.13",
        "@radix-ui/react-tooltip": "^1.2.8",
        "class-variance-authority": "^0.7.1",
        "clsx": "^2.1.1",
        "framer-motion": "^12.23.24",
-        "llama-stack-client": "^0.3.0",
+        "llama-stack-client": "github:llamastack/llama-stack-client-typescript",
        "lucide-react": "^0.545.0",
        "next": "15.5.4",
        "next-auth": "^4.24.11",
@ -2597,6 +2599,29 @@
        }
      }
    },
    "node_modules/@radix-ui/react-label": {
      "version": "2.1.7",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-label/-/react-label-2.1.7.tgz",
      "integrity": "sha512-YT1GqPSL8kJn20djelMX7/cTRp/Y9w5IZHvfxQTVHrOqa2yMl7i/UfMqKRU5V7mEyKTrUVgJXhNQPVCG8PBLoQ==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-primitive": "2.1.3"
      },
      "peerDependencies": {
        "@types/react": "*",
        "@types/react-dom": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        },
        "@types/react-dom": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-menu": {
      "version": "2.1.16",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz",
@ -2855,6 +2880,36 @@
        }
      }
    },
    "node_modules/@radix-ui/react-tabs": {
      "version": "1.1.13",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-tabs/-/react-tabs-1.1.13.tgz",
      "integrity": "sha512-7xdcatg7/U+7+Udyoj2zodtI9H/IIopqo+YOIcZOq1nJwXWBZ9p8xiu5llXlekDbZkca79a/fozEYQXIA4sW6A==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/primitive": "1.1.3",
        "@radix-ui/react-context": "1.1.2",
        "@radix-ui/react-direction": "1.1.1",
        "@radix-ui/react-id": "1.1.1",
        "@radix-ui/react-presence": "1.1.5",
        "@radix-ui/react-primitive": "2.1.3",
        "@radix-ui/react-roving-focus": "1.1.11",
        "@radix-ui/react-use-controllable-state": "1.2.2"
      },
      "peerDependencies": {
        "@types/react": "*",
        "@types/react-dom": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        },
        "@types/react-dom": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-tooltip": {
      "version": "1.2.8",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz",
@ -9629,9 +9684,8 @@
      "license": "MIT"
    },
    "node_modules/llama-stack-client": {
-      "version": "0.3.0",
+      "version": "0.4.0-alpha.1",
-      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.0.tgz",
+      "resolved": "git+ssh://git@github.com/llamastack/llama-stack-client-typescript.git#78de4862c4b7d77939ac210fa9f9bde77a2c5c5f",
      "integrity": "sha512-76K/t1doaGmlBbDxCADaral9Vccvys9P8pqAMIhwBhMAqWudCEORrMMhUSg+pjhamWmEKj3wa++d4zeOGbfN/w==",
      "license": "MIT",
      "dependencies": {
        "@types/node": "^18.11.18",
--- a/src/llama_stack/ui/package.json
+++ b/src/llama_stack/ui/package.json
@ -16,14 +16,16 @@
    "@radix-ui/react-collapsible": "^1.1.12",
    "@radix-ui/react-dialog": "^1.1.15",
    "@radix-ui/react-dropdown-menu": "^2.1.16",
    "@radix-ui/react-label": "^2.1.7",
    "@radix-ui/react-select": "^2.2.6",
    "@radix-ui/react-separator": "^1.1.7",
    "@radix-ui/react-slot": "^1.2.3",
    "@radix-ui/react-tabs": "^1.1.13",
    "@radix-ui/react-tooltip": "^1.2.8",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "framer-motion": "^12.23.24",
-    "llama-stack-client": "^0.3.0",
+    "llama-stack-client": "github:llamastack/llama-stack-client-typescript",
    "lucide-react": "^0.545.0",
    "next": "15.5.4",
    "next-auth": "^4.24.11",
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-826d44c3.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-826d44c3.json
@ -84,5 +84,6 @@
      }
    ],
    "is_streaming": false
-  }
+  },
  "id_normalization_mapping": {}
 }
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -171,6 +171,10 @@ def pytest_addoption(parser):
        "--embedding-model",
        help="comma-separated list of embedding models. Fixture name: embedding_model_id",
    )
    parser.addoption(
        "--rerank-model",
        help="comma-separated list of rerank models. Fixture name: rerank_model_id",
    )
    parser.addoption(
        "--safety-shield",
        help="comma-separated list of safety shields. Fixture name: shield_id",
@ -249,6 +253,7 @@ def pytest_generate_tests(metafunc):
        "shield_id": ("--safety-shield", "shield"),
        "judge_model_id": ("--judge-model", "judge"),
        "embedding_dimension": ("--embedding-dimension", "dim"),
        "rerank_model_id": ("--rerank-model", "rerank"),
    }
    # Collect all parameters and their values
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -153,6 +153,7 @@ def client_with_models(
    vision_model_id,
    embedding_model_id,
    judge_model_id,
    rerank_model_id,
 ):
    client = llama_stack_client
@ -170,6 +171,9 @@ def client_with_models(
    if embedding_model_id and embedding_model_id not in model_ids:
        raise ValueError(f"embedding_model_id {embedding_model_id} not found")
    if rerank_model_id and rerank_model_id not in model_ids:
        raise ValueError(f"rerank_model_id {rerank_model_id} not found")
    return client
@ -185,7 +189,14 @@ def model_providers(llama_stack_client):
@pytest.fixture(autouse=True)
 def skip_if_no_model(request):
-    model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id", "shield_id"]
+    model_fixtures = [
        "text_model_id",
        "vision_model_id",
        "embedding_model_id",
        "judge_model_id",
        "shield_id",
        "rerank_model_id",
    ]
    test_func = request.node.function
    actual_params = inspect.signature(test_func).parameters.keys()
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -721,6 +721,6 @@ def test_openai_chat_completion_structured_output(openai_client, text_model_id,
    print(response.choices[0].message.content)
    answer = AnswerFormat.model_validate_json(response.choices[0].message.content)
    expected = tc["expected"]
-    assert answer.first_name == expected["first_name"]
+    assert expected["first_name"].lower() in answer.first_name.lower()
-    assert answer.last_name == expected["last_name"]
+    assert expected["last_name"].lower() in answer.last_name.lower()
    assert answer.year_of_birth == expected["year_of_birth"]
--- a/tests/integration/inference/test_rerank.py
+++ b/tests/integration/inference/test_rerank.py
@ -0,0 +1,214 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import pytest
 from llama_stack_client import BadRequestError as LlamaStackBadRequestError
 from llama_stack_client.types.alpha import InferenceRerankResponse
 from llama_stack_client.types.shared.interleaved_content import (
    ImageContentItem,
    ImageContentItemImage,
    ImageContentItemImageURL,
    TextContentItem,
 )
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 # Test data
 DUMMY_STRING = "string_1"
 DUMMY_STRING2 = "string_2"
 DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")
 DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text")
 DUMMY_IMAGE_URL = ImageContentItem(
    image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
 )
 DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
 PROVIDERS_SUPPORTING_MEDIA = {}  # Providers that support media input for rerank models
 def skip_if_provider_doesnt_support_rerank(inference_provider_type):
    supported_providers = {"remote::nvidia"}
    if inference_provider_type not in supported_providers:
        pytest.skip(f"{inference_provider_type} doesn't support rerank models")
 def _validate_rerank_response(response: InferenceRerankResponse, items: list) -> None:
    """
    Validate that a rerank response has the correct structure and ordering.
    Args:
        response: The InferenceRerankResponse to validate
        items: The original items list that was ranked
    Raises:
        AssertionError: If any validation fails
    """
    seen = set()
    last_score = float("inf")
    for d in response:
        assert 0 <= d.index < len(items), f"Index {d.index} out of bounds for {len(items)} items"
        assert d.index not in seen, f"Duplicate index {d.index} found"
        seen.add(d.index)
        assert isinstance(d.relevance_score, float), f"Score must be float, got {type(d.relevance_score)}"
        assert d.relevance_score <= last_score, f"Scores not in descending order: {d.relevance_score} > {last_score}"
        last_score = d.relevance_score
 def _validate_semantic_ranking(response: InferenceRerankResponse, items: list, expected_first_item: str) -> None:
    """
    Validate that the expected most relevant item ranks first.
    Args:
        response: The InferenceRerankResponse to validate
        items: The original items list that was ranked
        expected_first_item: The expected first item in the ranking
    Raises:
        AssertionError: If any validation fails
    """
    if not response:
        raise AssertionError("No ranking data returned in response")
    actual_first_index = response[0].index
    actual_first_item = items[actual_first_index]
    assert actual_first_item == expected_first_item, (
        f"Expected '{expected_first_item}' to rank first, but '{actual_first_item}' ranked first instead."
    )
@pytest.mark.parametrize(
    "query,items",
    [
        (DUMMY_STRING, [DUMMY_STRING, DUMMY_STRING2]),
        (DUMMY_TEXT, [DUMMY_TEXT, DUMMY_TEXT2]),
        (DUMMY_STRING, [DUMMY_STRING2, DUMMY_TEXT]),
        (DUMMY_TEXT, [DUMMY_STRING, DUMMY_TEXT2]),
    ],
    ids=[
        "string-query-string-items",
        "text-query-text-items",
        "mixed-content-1",
        "mixed-content-2",
    ],
 )
 def test_rerank_text(client_with_models, rerank_model_id, query, items, inference_provider_type):
    skip_if_provider_doesnt_support_rerank(inference_provider_type)
    response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
    assert isinstance(response, list)
    # TODO: Add type validation for response items once InferenceRerankResponseItem is exported from llama stack client.
    assert len(response) <= len(items)
    _validate_rerank_response(response, items)
@pytest.mark.parametrize(
    "query,items",
    [
        (DUMMY_IMAGE_URL, [DUMMY_STRING]),
        (DUMMY_IMAGE_BASE64, [DUMMY_TEXT]),
        (DUMMY_TEXT, [DUMMY_IMAGE_URL]),
        (DUMMY_IMAGE_BASE64, [DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT]),
        (DUMMY_TEXT, [DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT]),
    ],
    ids=[
        "image-query-url",
        "image-query-base64",
        "text-query-image-item",
        "mixed-content-1",
        "mixed-content-2",
    ],
 )
 def test_rerank_image(client_with_models, rerank_model_id, query, items, inference_provider_type):
    skip_if_provider_doesnt_support_rerank(inference_provider_type)
    if rerank_model_id not in PROVIDERS_SUPPORTING_MEDIA:
        error_type = (
            ValueError if isinstance(client_with_models, LlamaStackAsLibraryClient) else LlamaStackBadRequestError
        )
        with pytest.raises(error_type):
            client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
    else:
        response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
        assert isinstance(response, list)
        assert len(response) <= len(items)
        _validate_rerank_response(response, items)
 def test_rerank_max_results(client_with_models, rerank_model_id, inference_provider_type):
    skip_if_provider_doesnt_support_rerank(inference_provider_type)
    items = [DUMMY_STRING, DUMMY_STRING2, DUMMY_TEXT, DUMMY_TEXT2]
    max_num_results = 2
    response = client_with_models.alpha.inference.rerank(
        model=rerank_model_id,
        query=DUMMY_STRING,
        items=items,
        max_num_results=max_num_results,
    )
    assert isinstance(response, list)
    assert len(response) == max_num_results
    _validate_rerank_response(response, items)
 def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_id, inference_provider_type):
    skip_if_provider_doesnt_support_rerank(inference_provider_type)
    items = [DUMMY_STRING, DUMMY_STRING2]
    response = client_with_models.alpha.inference.rerank(
        model=rerank_model_id,
        query=DUMMY_STRING,
        items=items,
        max_num_results=10,  # Larger than items length
    )
    assert isinstance(response, list)
    assert len(response) <= len(items)  # Should return at most len(items)
@pytest.mark.parametrize(
    "query,items,expected_first_item",
    [
        (
            "What is a reranking model? ",
            [
                "A reranking model reranks a list of items based on the query. ",
                "Machine learning algorithms learn patterns from data. ",
                "Python is a programming language. ",
            ],
            "A reranking model reranks a list of items based on the query. ",
        ),
        (
            "What is C++?",
            [
                "Learning new things is interesting. ",
                "C++ is a programming language. ",
                "Books provide knowledge and entertainment. ",
            ],
            "C++ is a programming language. ",
        ),
        (
            "What are good learning habits? ",
            [
                "Cooking pasta is a fun activity. ",
                "Plants need water and sunlight. ",
                "Good learning habits include reading daily and taking notes. ",
            ],
            "Good learning habits include reading daily and taking notes. ",
        ),
    ],
 )
 def test_rerank_semantic_correctness(
    client_with_models, rerank_model_id, query, items, expected_first_item, inference_provider_type
 ):
    skip_if_provider_doesnt_support_rerank(inference_provider_type)
    response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
    _validate_rerank_response(response, items)
    _validate_semantic_ranking(response, items, expected_first_item)
--- a/tests/integration/inspect/test_inspect.py
+++ b/tests/integration/inspect/test_inspect.py
@ -4,18 +4,75 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import pytest
 from llama_stack_client import LlamaStackClient
 from llama_stack import LlamaStackAsLibraryClient
 class TestInspect:
    @pytest.mark.skip(reason="inspect tests disabled")
    def test_health(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
        health = llama_stack_client.inspect.health()
        assert health is not None
        assert health.status == "OK"
    @pytest.mark.skip(reason="inspect tests disabled")
    def test_version(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
        version = llama_stack_client.inspect.version()
        assert version is not None
        assert version.version is not None
    @pytest.mark.skip(reason="inspect tests disabled")
    def test_list_routes_default(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
        """Test list_routes with default filter (non-deprecated v1 routes)."""
        response = llama_stack_client.routes.list()
        assert response is not None
        assert hasattr(response, "data")
        routes = response.data
        assert len(routes) > 0
        # All routes should be non-deprecated
        # Check that we don't see any /openai/ routes (which are deprecated)
        openai_routes = [r for r in routes if "/openai/" in r.route]
        assert len(openai_routes) == 0, "Default filter should not include deprecated /openai/ routes"
        # Should see standard v1 routes like /inspect/routes, /health, /version
        paths = [r.route for r in routes]
        assert "/inspect/routes" in paths or "/v1/inspect/routes" in paths
        assert "/health" in paths or "/v1/health" in paths
    @pytest.mark.skip(reason="inspect tests disabled")
    def test_list_routes_filter_by_deprecated(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
        """Test list_routes with deprecated filter."""
        response = llama_stack_client.routes.list(api_filter="deprecated")
        assert response is not None
        assert hasattr(response, "data")
        routes = response.data
        # When filtering for deprecated, we should get deprecated routes
        # At minimum, we should see some /openai/ routes which are deprecated
        if len(routes) > 0:
            # If there are any deprecated routes, they should include openai routes
            openai_routes = [r for r in routes if "/openai/" in r.route]
            assert len(openai_routes) > 0, "Deprecated filter should include /openai/ routes"
    @pytest.mark.skip(reason="inspect tests disabled")
    def test_list_routes_filter_by_v1(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
        """Test list_routes with v1 filter."""
        response = llama_stack_client.routes.list(api_filter="v1")
        assert response is not None
        assert hasattr(response, "data")
        routes = response.data
        assert len(routes) > 0
        # Should not include deprecated routes
        openai_routes = [r for r in routes if "/openai/" in r.route]
        assert len(openai_routes) == 0
        # Should include v1 routes
        paths = [r.route for r in routes]
        assert any(
            "/v1/" in p or p.startswith("/inspect/") or p.startswith("/health") or p.startswith("/version")
            for p in paths
        )
--- a/tests/integration/telemetry/collectors/base.py
+++ b/tests/integration/telemetry/collectors/base.py
@ -6,20 +6,88 @@
 """Shared helpers for telemetry test collectors."""
 import time
 from collections.abc import Iterable
 from dataclasses import dataclass
 from typing import Any
@dataclass
-class SpanStub:
+class MetricStub:
    """Unified metric interface for both in-memory and OTLP collectors."""
    name: str
-    attributes: dict[str, Any]
+    value: Any
    attributes: dict[str, Any] | None = None
@dataclass
 class SpanStub:
    """Unified span interface for both in-memory and OTLP collectors."""
    name: str
    attributes: dict[str, Any] | None = None
    resource_attributes: dict[str, Any] | None = None
    events: list[dict[str, Any]] | None = None
    trace_id: str | None = None
    span_id: str | None = None
    @property
    def context(self):
        """Provide context-like interface for trace_id compatibility."""
        if self.trace_id is None:
            return None
        return type("Context", (), {"trace_id": int(self.trace_id, 16)})()
    def get_trace_id(self) -> str | None:
        """Get trace ID in hex format.
        Tries context.trace_id first, then falls back to direct trace_id.
        """
        context = getattr(self, "context", None)
        if context and getattr(context, "trace_id", None) is not None:
            return f"{context.trace_id:032x}"
        return getattr(self, "trace_id", None)
    def has_message(self, text: str) -> bool:
        """Check if span contains a specific message in its args."""
        if self.attributes is None:
            return False
        args = self.attributes.get("__args__")
        if not args or not isinstance(args, str):
            return False
        return text in args
    def is_root_span(self) -> bool:
        """Check if this is a root span."""
        if self.attributes is None:
            return False
        return self.attributes.get("__root__") is True
    def is_autotraced(self) -> bool:
        """Check if this span was automatically traced."""
        if self.attributes is None:
            return False
        return self.attributes.get("__autotraced__") is True
    def get_span_type(self) -> str | None:
        """Get the span type (async, sync, async_generator)."""
        if self.attributes is None:
            return None
        return self.attributes.get("__type__")
    def get_class_method(self) -> tuple[str | None, str | None]:
        """Get the class and method names for autotraced spans."""
        if self.attributes is None:
            return None, None
        return (self.attributes.get("__class__"), self.attributes.get("__method__"))
    def get_location(self) -> str | None:
        """Get the location (library_client, server) for root spans."""
        if self.attributes is None:
            return None
        return self.attributes.get("__location__")
 def _value_to_python(value: Any) -> Any:
    kind = value.WhichOneof("value")
@ -56,14 +124,18 @@ def events_to_list(events: Iterable[Any]) -> list[dict[str, Any]]:
 class BaseTelemetryCollector:
    """Base class for telemetry collectors that ensures consistent return types.
    All collectors must return SpanStub objects to ensure test compatibility
    across both library-client and server modes.
    """
    def get_spans(
        self,
        expected_count: int | None = None,
        timeout: float = 5.0,
        poll_interval: float = 0.05,
-    ) -> tuple[Any, ...]:
+    ) -> tuple[SpanStub, ...]:
        import time
        deadline = time.time() + timeout
        min_count = expected_count if expected_count is not None else 1
        last_len: int | None = None
@ -91,16 +163,206 @@ class BaseTelemetryCollector:
            last_len = len(spans)
            time.sleep(poll_interval)
-    def get_metrics(self) -> Any | None:
+    def get_metrics(
-        return self._snapshot_metrics()
+        self,
        expected_count: int | None = None,
        timeout: float = 5.0,
        poll_interval: float = 0.05,
        expect_model_id: str | None = None,
    ) -> dict[str, MetricStub]:
        """Get metrics with polling until metrics are available or timeout is reached."""
        # metrics need to be collected since get requests delete stored metrics
        deadline = time.time() + timeout
        min_count = expected_count if expected_count is not None else 1
        accumulated_metrics = {}
        count_metrics_with_model_id = 0
        while time.time() < deadline:
            current_metrics = self._snapshot_metrics()
            if current_metrics:
                for metric in current_metrics:
                    metric_name = metric.name
                    if metric_name not in accumulated_metrics:
                        accumulated_metrics[metric_name] = metric
                        if (
                            expect_model_id
                            and metric.attributes
                            and metric.attributes.get("model_id") == expect_model_id
                        ):
                            count_metrics_with_model_id += 1
                    else:
                        accumulated_metrics[metric_name] = metric
            # Check if we have enough metrics
            if len(accumulated_metrics) >= min_count:
                if not expect_model_id:
                    return accumulated_metrics
                if count_metrics_with_model_id >= min_count:
                    return accumulated_metrics
            time.sleep(poll_interval)
        return accumulated_metrics
    @staticmethod
    def _convert_attributes_to_dict(attrs: Any) -> dict[str, Any]:
        """Convert various attribute types to a consistent dictionary format.
        Handles mappingproxy, dict, and other attribute types.
        """
        if attrs is None:
            return {}
        try:
            return dict(attrs.items())  # type: ignore[attr-defined]
        except AttributeError:
            try:
                return dict(attrs)
            except TypeError:
                return dict(attrs) if attrs else {}
    @staticmethod
    def _extract_trace_span_ids(span: Any) -> tuple[str | None, str | None]:
        """Extract trace_id and span_id from OpenTelemetry span object.
        Handles both context-based and direct attribute access.
        """
        trace_id = None
        span_id = None
        context = getattr(span, "context", None)
        if context:
            trace_id = f"{context.trace_id:032x}"
            span_id = f"{context.span_id:016x}"
        else:
            trace_id = getattr(span, "trace_id", None)
            span_id = getattr(span, "span_id", None)
        return trace_id, span_id
    @staticmethod
    def _create_span_stub_from_opentelemetry(span: Any) -> SpanStub:
        """Create SpanStub from OpenTelemetry span object.
        This helper reduces code duplication between collectors.
        """
        trace_id, span_id = BaseTelemetryCollector._extract_trace_span_ids(span)
        attributes = BaseTelemetryCollector._convert_attributes_to_dict(span.attributes) or {}
        return SpanStub(
            name=span.name,
            attributes=attributes,
            trace_id=trace_id,
            span_id=span_id,
        )
    @staticmethod
    def _create_span_stub_from_protobuf(span: Any, resource_attrs: dict[str, Any] | None = None) -> SpanStub:
        """Create SpanStub from protobuf span object.
        This helper handles the different structure of protobuf spans.
        """
        attributes = attributes_to_dict(span.attributes) or {}
        events = events_to_list(span.events) if span.events else None
        trace_id = span.trace_id.hex() if span.trace_id else None
        span_id = span.span_id.hex() if span.span_id else None
        return SpanStub(
            name=span.name,
            attributes=attributes,
            resource_attributes=resource_attrs,
            events=events,
            trace_id=trace_id,
            span_id=span_id,
        )
    @staticmethod
    def _extract_metric_from_opentelemetry(metric: Any) -> MetricStub | None:
        """Extract MetricStub from OpenTelemetry metric object.
        This helper reduces code duplication between collectors.
        """
        if not (hasattr(metric, "name") and hasattr(metric, "data") and hasattr(metric.data, "data_points")):
            return None
        if not (metric.data.data_points and len(metric.data.data_points) > 0):
            return None
        # Get the value from the first data point
        data_point = metric.data.data_points[0]
        # Handle different metric types
        if hasattr(data_point, "value"):
            # Counter or Gauge
            value = data_point.value
        elif hasattr(data_point, "sum"):
            # Histogram - use the sum of all recorded values
            value = data_point.sum
        else:
            return None
        # Extract attributes if available
        attributes = {}
        if hasattr(data_point, "attributes"):
            attrs = data_point.attributes
            if attrs is not None and hasattr(attrs, "items"):
                attributes = dict(attrs.items())
            elif attrs is not None and not isinstance(attrs, dict):
                attributes = dict(attrs)
        return MetricStub(
            name=metric.name,
            value=value,
            attributes=attributes or {},
        )
    @staticmethod
    def _create_metric_stub_from_protobuf(metric: Any) -> MetricStub | None:
        """Create MetricStub from protobuf metric object.
        Protobuf metrics have a different structure than OpenTelemetry metrics.
        They can have sum, gauge, or histogram data.
        """
        if not hasattr(metric, "name"):
            return None
        # Try to extract value from different metric types
        for metric_type in ["sum", "gauge", "histogram"]:
            if hasattr(metric, metric_type):
                metric_data = getattr(metric, metric_type)
                if metric_data and hasattr(metric_data, "data_points"):
                    data_points = metric_data.data_points
                    if data_points and len(data_points) > 0:
                        data_point = data_points[0]
                        # Extract attributes first (needed for all metric types)
                        attributes = (
                            attributes_to_dict(data_point.attributes) if hasattr(data_point, "attributes") else {}
                        )
                        # Extract value based on metric type
                        if metric_type == "sum":
                            value = data_point.as_int
                        elif metric_type == "gauge":
                            value = data_point.as_double
                        else:  # histogram
                            value = data_point.sum
                        return MetricStub(
                            name=metric.name,
                            value=value,
                            attributes=attributes,
                        )
        return None
    def clear(self) -> None:
        self._clear_impl()
-    def _snapshot_spans(self) -> tuple[Any, ...]:  # pragma: no cover - interface hook
+    def _snapshot_spans(self) -> tuple[SpanStub, ...]:  # pragma: no cover - interface hook
        raise NotImplementedError
-    def _snapshot_metrics(self) -> Any | None:  # pragma: no cover - interface hook
+    def _snapshot_metrics(self) -> tuple[MetricStub, ...] | None:  # pragma: no cover - interface hook
        raise NotImplementedError
    def _clear_impl(self) -> None:  # pragma: no cover - interface hook
--- a/tests/integration/telemetry/collectors/in_memory.py
+++ b/tests/integration/telemetry/collectors/in_memory.py
@ -6,8 +6,6 @@
 """In-memory telemetry collector for library-client tests."""
 from typing import Any
 import opentelemetry.metrics as otel_metrics
 import opentelemetry.trace as otel_trace
 from opentelemetry import metrics, trace
@ -19,47 +17,42 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanE
 import llama_stack.core.telemetry.telemetry as telemetry_module
-from .base import BaseTelemetryCollector, SpanStub
+from .base import BaseTelemetryCollector, MetricStub, SpanStub
 class InMemoryTelemetryCollector(BaseTelemetryCollector):
    """In-memory telemetry collector for library-client tests.
    Converts OpenTelemetry span objects to SpanStub objects to ensure
    consistent interface with OTLP collector used in server mode.
    """
    def __init__(self, span_exporter: InMemorySpanExporter, metric_reader: InMemoryMetricReader) -> None:
        self._span_exporter = span_exporter
        self._metric_reader = metric_reader
-    def _snapshot_spans(self) -> tuple[Any, ...]:
+    def _snapshot_spans(self) -> tuple[SpanStub, ...]:
        spans = []
        for span in self._span_exporter.get_finished_spans():
-            trace_id = None
+            spans.append(self._create_span_stub_from_opentelemetry(span))
            span_id = None
            context = getattr(span, "context", None)
            if context:
                trace_id = f"{context.trace_id:032x}"
                span_id = f"{context.span_id:016x}"
            else:
                trace_id = getattr(span, "trace_id", None)
                span_id = getattr(span, "span_id", None)
            stub = SpanStub(
                span.name,
                span.attributes,
                getattr(span, "resource", None),
                getattr(span, "events", None),
                trace_id,
                span_id,
            )
            spans.append(stub)
        return tuple(spans)
-    def _snapshot_metrics(self) -> Any | None:
+    def _snapshot_metrics(self) -> tuple[MetricStub, ...] | None:
        data = self._metric_reader.get_metrics_data()
-        if data and data.resource_metrics:
+        if not data or not data.resource_metrics:
            resource_metric = data.resource_metrics[0]
            if resource_metric.scope_metrics:
                return resource_metric.scope_metrics[0].metrics
            return None
        metric_stubs = []
        for resource_metric in data.resource_metrics:
            if resource_metric.scope_metrics:
                for scope_metric in resource_metric.scope_metrics:
                    for metric in scope_metric.metrics:
                        metric_stub = self._extract_metric_from_opentelemetry(metric)
                        if metric_stub:
                            metric_stubs.append(metric_stub)
        return tuple(metric_stubs) if metric_stubs else None
    def _clear_impl(self) -> None:
        self._span_exporter.clear()
        self._metric_reader.get_metrics_data()
--- a/tests/integration/telemetry/collectors/otlp.py
+++ b/tests/integration/telemetry/collectors/otlp.py
@ -9,20 +9,20 @@
 import gzip
 import os
 import threading
 import time
 from http.server import BaseHTTPRequestHandler, HTTPServer
 from socketserver import ThreadingMixIn
 from typing import Any
 from opentelemetry.proto.collector.metrics.v1.metrics_service_pb2 import ExportMetricsServiceRequest
 from opentelemetry.proto.collector.trace.v1.trace_service_pb2 import ExportTraceServiceRequest
-from .base import BaseTelemetryCollector, SpanStub, attributes_to_dict, events_to_list
+from .base import BaseTelemetryCollector, MetricStub, SpanStub, attributes_to_dict
 class OtlpHttpTestCollector(BaseTelemetryCollector):
    def __init__(self) -> None:
        self._spans: list[SpanStub] = []
-        self._metrics: list[Any] = []
+        self._metrics: list[MetricStub] = []
        self._lock = threading.Lock()
        class _ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
@ -47,11 +47,7 @@ class OtlpHttpTestCollector(BaseTelemetryCollector):
            for scope_spans in resource_spans.scope_spans:
                for span in scope_spans.spans:
-                    attributes = attributes_to_dict(span.attributes)
+                    new_spans.append(self._create_span_stub_from_protobuf(span, resource_attrs or None))
                    events = events_to_list(span.events) if span.events else None
                    trace_id = span.trace_id.hex() if span.trace_id else None
                    span_id = span.span_id.hex() if span.span_id else None
                    new_spans.append(SpanStub(span.name, attributes, resource_attrs or None, events, trace_id, span_id))
        if not new_spans:
            return
@ -60,10 +56,13 @@ class OtlpHttpTestCollector(BaseTelemetryCollector):
            self._spans.extend(new_spans)
    def _handle_metrics(self, request: ExportMetricsServiceRequest) -> None:
-        new_metrics: list[Any] = []
+        new_metrics: list[MetricStub] = []
        for resource_metrics in request.resource_metrics:
            for scope_metrics in resource_metrics.scope_metrics:
-                new_metrics.extend(scope_metrics.metrics)
+                for metric in scope_metrics.metrics:
                    metric_stub = self._create_metric_stub_from_protobuf(metric)
                    if metric_stub:
                        new_metrics.append(metric_stub)
        if not new_metrics:
            return
@ -75,11 +74,40 @@ class OtlpHttpTestCollector(BaseTelemetryCollector):
        with self._lock:
            return tuple(self._spans)
-    def _snapshot_metrics(self) -> Any | None:
+    def _snapshot_metrics(self) -> tuple[MetricStub, ...] | None:
        with self._lock:
-            return list(self._metrics) if self._metrics else None
+            return tuple(self._metrics) if self._metrics else None
    def _clear_impl(self) -> None:
        """Clear telemetry over a period of time to prevent race conditions between tests."""
        with self._lock:
            self._spans.clear()
            self._metrics.clear()
        # Prevent race conditions where telemetry arrives after clear() but before
        # the test starts, causing contamination between tests
        deadline = time.time() + 2.0  # Maximum wait time
        last_span_count = 0
        last_metric_count = 0
        stable_iterations = 0
        while time.time() < deadline:
            with self._lock:
                current_span_count = len(self._spans)
                current_metric_count = len(self._metrics)
            if current_span_count == last_span_count and current_metric_count == last_metric_count:
                stable_iterations += 1
                if stable_iterations >= 4:  # 4 * 50ms = 200ms of stability
                    break
            else:
                stable_iterations = 0
                last_span_count = current_span_count
                last_metric_count = current_metric_count
            time.sleep(0.05)
        # Final clear to remove any telemetry that arrived during stabilization
        with self._lock:
            self._spans.clear()
            self._metrics.clear()
--- a/tests/integration/telemetry/recordings/1fcfd86d8111374dc852cfdea6bfdb6a511f92cee84a6325b04ae84878512c30.json
+++ b/tests/integration/telemetry/recordings/1fcfd86d8111374dc852cfdea6bfdb6a511f92cee84a6325b04ae84878512c30.json
@ -30,7 +30,7 @@
            "index": 0,
            "logprobs": null,
            "message": {
-              "content": "import torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load the pre-trained model and tokenizer\nmodel_name = \"CompVis/transformers-base-uncased\"\nmodel = AutoModelForCausalLM.from_pretrained(model_name)\ntokenizer = AutoTokenizer.from_pretrained(model_name)\n\n# Set the temperature to 0.7\ntemperature = 0.7\n\n# Define a function to generate text\ndef generate_text(prompt, max_length=100):\n    input",
+              "content": "To test the trace function from OpenAI's API with a temperature of 0.7, you can use the following Python code:\n\n```python\nimport json\n\n# Import the required libraries\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Set the API endpoint and model name\nmodel_name = \"dalle-mini\"\n\n# Initialize the model and tokenizer\nmodel = AutoModelForCausalLM.from_pretrained(model_name)\ntokenizer = AutoTokenizer.from_pretrained(model_name)\n\n",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
@ -55,5 +55,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
  "id_normalization_mapping": {}
 }
--- a/tests/integration/telemetry/test_completions.py
+++ b/tests/integration/telemetry/test_completions.py
@ -4,48 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-"""Telemetry tests verifying @trace_protocol decorator format across stack modes."""
+"""Telemetry tests verifying @trace_protocol decorator format across stack modes.
 Note: The mock_otlp_collector fixture automatically clears telemetry data
 before and after each test, ensuring test isolation.
 """
 import json
 def _span_attributes(span):
    attrs = getattr(span, "attributes", None)
    if attrs is None:
        return {}
    # ReadableSpan.attributes acts like a mapping
    try:
        return dict(attrs.items())  # type: ignore[attr-defined]
    except AttributeError:
        try:
            return dict(attrs)
        except TypeError:
            return attrs
 def _span_attr(span, key):
    attrs = _span_attributes(span)
    return attrs.get(key)
 def _span_trace_id(span):
    context = getattr(span, "context", None)
    if context and getattr(context, "trace_id", None) is not None:
        return f"{context.trace_id:032x}"
    return getattr(span, "trace_id", None)
 def _span_has_message(span, text: str) -> bool:
    args = _span_attr(span, "__args__")
    if not args or not isinstance(args, str):
        return False
    return text in args
 def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_model_id):
    """Verify streaming adds chunk_count and __type__=async_generator."""
    mock_otlp_collector.clear()
    stream = llama_stack_client.chat.completions.create(
        model=text_model_id,
        messages=[{"role": "user", "content": "Test trace openai 1"}],
@ -62,16 +31,16 @@ def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_mod
        (
            span
            for span in reversed(spans)
-            if _span_attr(span, "__type__") == "async_generator"
+            if span.get_span_type() == "async_generator"
-            and _span_attr(span, "chunk_count")
+            and span.attributes.get("chunk_count")
-            and _span_has_message(span, "Test trace openai 1")
+            and span.has_message("Test trace openai 1")
        ),
        None,
    )
    assert async_generator_span is not None
-    raw_chunk_count = _span_attr(async_generator_span, "chunk_count")
+    raw_chunk_count = async_generator_span.attributes.get("chunk_count")
    assert raw_chunk_count is not None
    chunk_count = int(raw_chunk_count)
@ -80,7 +49,6 @@ def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_mod
 def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, text_model_id):
    """Comprehensive validation of telemetry data format including spans and metrics."""
    mock_otlp_collector.clear()
    response = llama_stack_client.chat.completions.create(
        model=text_model_id,
@ -101,37 +69,36 @@ def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client,
    # Verify spans
    spans = mock_otlp_collector.get_spans(expected_count=7)
    target_span = next(
-        (span for span in reversed(spans) if _span_has_message(span, "Test trace openai with temperature 0.7")),
+        (span for span in reversed(spans) if span.has_message("Test trace openai with temperature 0.7")),
        None,
    )
    assert target_span is not None
-    trace_id = _span_trace_id(target_span)
+    trace_id = target_span.get_trace_id()
    assert trace_id is not None
-    spans = [span for span in spans if _span_trace_id(span) == trace_id]
+    spans = [span for span in spans if span.get_trace_id() == trace_id]
-    spans = [span for span in spans if _span_attr(span, "__root__") or _span_attr(span, "__autotraced__")]
+    spans = [span for span in spans if span.is_root_span() or span.is_autotraced()]
    assert len(spans) >= 4
    # Collect all model_ids found in spans
    logged_model_ids = []
    for span in spans:
-        attrs = _span_attributes(span)
+        attrs = span.attributes
        assert attrs is not None
        # Root span is created manually by tracing middleware, not by @trace_protocol decorator
-        is_root_span = attrs.get("__root__") is True
+        if span.is_root_span():
-
+            assert span.get_location() in ["library_client", "server"]
        if is_root_span:
            assert attrs.get("__location__") in ["library_client", "server"]
            continue
-        assert attrs.get("__autotraced__")
+        assert span.is_autotraced()
-        assert attrs.get("__class__") and attrs.get("__method__")
+        class_name, method_name = span.get_class_method()
-        assert attrs.get("__type__") in ["async", "sync", "async_generator"]
+        assert class_name and method_name
        assert span.get_span_type() in ["async", "sync", "async_generator"]
-        args_field = attrs.get("__args__")
+        args_field = span.attributes.get("__args__")
        if args_field:
            args = json.loads(args_field)
            if "model_id" in args:
@ -140,21 +107,40 @@ def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client,
    # At least one span should capture the fully qualified model ID
    assert text_model_id in logged_model_ids, f"Expected to find {text_model_id} in spans, but got {logged_model_ids}"
-    # TODO: re-enable this once metrics get fixed
+    # Verify token usage metrics in response using polling
-    """
+    expected_metrics = ["completion_tokens", "total_tokens", "prompt_tokens"]
-    # Verify token usage metrics in response
+    metrics = mock_otlp_collector.get_metrics(expected_count=len(expected_metrics), expect_model_id=text_model_id)
-    metrics = mock_otlp_collector.get_metrics()
+    assert len(metrics) > 0, "No metrics found within timeout"
-    assert metrics
+    # Filter metrics to only those from the specific model used in the request
-    for metric in metrics:
+    # This prevents issues when multiple metrics with the same name exist from different models
-        assert metric.name in ["completion_tokens", "total_tokens", "prompt_tokens"]
+    # (e.g., when safety models like llama-guard are also called)
-        assert metric.unit == "tokens"
+    inference_model_metrics = {}
-        assert metric.data.data_points and len(metric.data.data_points) == 1
+    all_model_ids = set()
-        match metric.name:
+
-            case "completion_tokens":
+    for name, metric in metrics.items():
-                assert metric.data.data_points[0].value == usage["completion_tokens"]
+        if name in expected_metrics:
-            case "total_tokens":
+            model_id = metric.attributes.get("model_id")
-                assert metric.data.data_points[0].value == usage["total_tokens"]
+            all_model_ids.add(model_id)
-            case "prompt_tokens":
+            # Only include metrics from the specific model used in the test request
-                assert metric.data.data_points[0].value == usage["prompt_tokens"
+            if model_id == text_model_id:
-    """
+                inference_model_metrics[name] = metric
    # Verify expected metrics are present for our specific model
    for metric_name in expected_metrics:
        assert metric_name in inference_model_metrics, (
            f"Expected metric {metric_name} for model {text_model_id} not found. "
            f"Available models: {sorted(all_model_ids)}, "
            f"Available metrics for {text_model_id}: {list(inference_model_metrics.keys())}"
        )
    # Verify metric values match usage data
    assert inference_model_metrics["completion_tokens"].value == usage["completion_tokens"], (
        f"Expected {usage['completion_tokens']} for completion_tokens, but got {inference_model_metrics['completion_tokens'].value}"
    )
    assert inference_model_metrics["total_tokens"].value == usage["total_tokens"], (
        f"Expected {usage['total_tokens']} for total_tokens, but got {inference_model_metrics['total_tokens'].value}"
    )
    assert inference_model_metrics["prompt_tokens"].value == usage["prompt_tokens"], (
        f"Expected {usage['prompt_tokens']} for prompt_tokens, but got {inference_model_metrics['prompt_tokens'].value}"
    )
--- a/tests/unit/cli/test_stack_config.py
+++ b/tests/unit/cli/test_stack_config.py
@ -206,3 +206,65 @@ def test_parse_and_maybe_upgrade_config_invalid(invalid_config):
 def test_parse_and_maybe_upgrade_config_image_name_int(config_with_image_name_int):
    result = parse_and_maybe_upgrade_config(config_with_image_name_int)
    assert isinstance(result.image_name, str)
 def test_parse_and_maybe_upgrade_config_sets_external_providers_dir(up_to_date_config):
    """Test that external_providers_dir is None when not specified (deprecated field)."""
    # Ensure the config doesn't have external_providers_dir set
    assert "external_providers_dir" not in up_to_date_config
    result = parse_and_maybe_upgrade_config(up_to_date_config)
    # Verify external_providers_dir is None (not set to default)
    # This aligns with the deprecation of external_providers_dir
    assert result.external_providers_dir is None
 def test_parse_and_maybe_upgrade_config_preserves_custom_external_providers_dir(up_to_date_config):
    """Test that custom external_providers_dir values are preserved."""
    custom_dir = "/custom/providers/dir"
    up_to_date_config["external_providers_dir"] = custom_dir
    result = parse_and_maybe_upgrade_config(up_to_date_config)
    # Verify the custom value was preserved
    assert str(result.external_providers_dir) == custom_dir
 def test_generate_run_config_from_providers():
    """Test that _generate_run_config_from_providers creates a valid config"""
    import argparse
    from llama_stack.cli.stack.run import StackRun
    from llama_stack.core.datatypes import Provider
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers()
    stack_run = StackRun(subparsers)
    providers = {
        "inference": [
            Provider(
                provider_type="inline::meta-reference",
                provider_id="meta-reference",
            )
        ]
    }
    config = stack_run._generate_run_config_from_providers(providers=providers)
    config_dict = config.model_dump(mode="json")
    # Verify basic structure
    assert config_dict["image_name"] == "providers-run"
    assert "inference" in config_dict["apis"]
    assert "inference" in config_dict["providers"]
    # Verify storage has all required stores including prompts
    assert "storage" in config_dict
    stores = config_dict["storage"]["stores"]
    assert "prompts" in stores
    assert stores["prompts"]["namespace"] == "prompts"
    # Verify config can be parsed back
    parsed = parse_and_maybe_upgrade_config(config_dict)
    assert parsed.image_name == "providers-run"
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@ -0,0 +1,251 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from unittest.mock import AsyncMock, MagicMock, patch
 import aiohttp
 import pytest
 from llama_stack.apis.models import ModelType
 from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 class MockResponse:
    def __init__(self, status=200, json_data=None, text_data="OK"):
        self.status = status
        self._json_data = json_data or {"rankings": []}
        self._text_data = text_data
    async def json(self):
        return self._json_data
    async def text(self):
        return self._text_data
 class MockSession:
    def __init__(self, response):
        self.response = response
        self.post_calls = []
    async def __aenter__(self):
        return self
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        return False
    def post(self, url, **kwargs):
        self.post_calls.append((url, kwargs))
        class PostContext:
            def __init__(self, response):
                self.response = response
            async def __aenter__(self):
                return self.response
            async def __aexit__(self, exc_type, exc_val, exc_tb):
                return False
        return PostContext(self.response)
 def create_adapter(config=None, rerank_endpoints=None):
    if config is None:
        config = NVIDIAConfig(api_key="test-key")
    adapter = NVIDIAInferenceAdapter(config=config)
    class MockModel:
        provider_resource_id = "test-model"
        metadata = {}
    adapter.model_store = AsyncMock()
    adapter.model_store.get_model = AsyncMock(return_value=MockModel())
    if rerank_endpoints is not None:
        adapter.config.rerank_model_to_url = rerank_endpoints
    return adapter
 async def test_rerank_basic_functionality():
    adapter = create_adapter()
    mock_response = MockResponse(json_data={"rankings": [{"index": 0, "logit": 0.5}]})
    mock_session = MockSession(mock_response)
    with patch("aiohttp.ClientSession", return_value=mock_session):
        result = await adapter.rerank(model="test-model", query="test query", items=["item1", "item2"])
    assert len(result.data) == 1
    assert result.data[0].index == 0
    assert result.data[0].relevance_score == 0.5
    url, kwargs = mock_session.post_calls[0]
    payload = kwargs["json"]
    assert payload["model"] == "test-model"
    assert payload["query"] == {"text": "test query"}
    assert payload["passages"] == [{"text": "item1"}, {"text": "item2"}]
 async def test_missing_rankings_key():
    adapter = create_adapter()
    mock_session = MockSession(MockResponse(json_data={}))
    with patch("aiohttp.ClientSession", return_value=mock_session):
        result = await adapter.rerank(model="test-model", query="q", items=["a"])
    assert len(result.data) == 0
 async def test_hosted_with_endpoint():
    adapter = create_adapter(
        config=NVIDIAConfig(api_key="key"), rerank_endpoints={"test-model": "https://model.endpoint/rerank"}
    )
    mock_session = MockSession(MockResponse())
    with patch("aiohttp.ClientSession", return_value=mock_session):
        await adapter.rerank(model="test-model", query="q", items=["a"])
    url, _ = mock_session.post_calls[0]
    assert url == "https://model.endpoint/rerank"
 async def test_hosted_without_endpoint():
    adapter = create_adapter(
        config=NVIDIAConfig(api_key="key"),  # This creates hosted config (integrate.api.nvidia.com).
        rerank_endpoints={},  # No endpoint mapping for test-model
    )
    mock_session = MockSession(MockResponse())
    with patch("aiohttp.ClientSession", return_value=mock_session):
        await adapter.rerank(model="test-model", query="q", items=["a"])
    url, _ = mock_session.post_calls[0]
    assert "https://integrate.api.nvidia.com" in url
 async def test_hosted_model_not_in_endpoint_mapping():
    adapter = create_adapter(
        config=NVIDIAConfig(api_key="key"), rerank_endpoints={"other-model": "https://other.endpoint/rerank"}
    )
    mock_session = MockSession(MockResponse())
    with patch("aiohttp.ClientSession", return_value=mock_session):
        await adapter.rerank(model="test-model", query="q", items=["a"])
    url, _ = mock_session.post_calls[0]
    assert "https://integrate.api.nvidia.com" in url
    assert url != "https://other.endpoint/rerank"
 async def test_self_hosted_ignores_endpoint():
    adapter = create_adapter(
        config=NVIDIAConfig(url="http://localhost:8000", api_key=None),
        rerank_endpoints={"test-model": "https://model.endpoint/rerank"},  # This should be ignored for self-hosted.
    )
    mock_session = MockSession(MockResponse())
    with patch("aiohttp.ClientSession", return_value=mock_session):
        await adapter.rerank(model="test-model", query="q", items=["a"])
    url, _ = mock_session.post_calls[0]
    assert "http://localhost:8000" in url
    assert "model.endpoint/rerank" not in url
 async def test_max_num_results():
    adapter = create_adapter()
    rankings = [{"index": 0, "logit": 0.8}, {"index": 1, "logit": 0.6}]
    mock_session = MockSession(MockResponse(json_data={"rankings": rankings}))
    with patch("aiohttp.ClientSession", return_value=mock_session):
        result = await adapter.rerank(model="test-model", query="q", items=["a", "b"], max_num_results=1)
    assert len(result.data) == 1
    assert result.data[0].index == 0
    assert result.data[0].relevance_score == 0.8
 async def test_http_error():
    adapter = create_adapter()
    mock_session = MockSession(MockResponse(status=500, text_data="Server Error"))
    with patch("aiohttp.ClientSession", return_value=mock_session):
        with pytest.raises(ConnectionError, match="status 500.*Server Error"):
            await adapter.rerank(model="test-model", query="q", items=["a"])
 async def test_client_error():
    adapter = create_adapter()
    mock_session = AsyncMock()
    mock_session.__aenter__.side_effect = aiohttp.ClientError("Network error")
    with patch("aiohttp.ClientSession", return_value=mock_session):
        with pytest.raises(ConnectionError, match="Failed to connect.*Network error"):
            await adapter.rerank(model="test-model", query="q", items=["a"])
 async def test_list_models_includes_configured_rerank_models():
    """Test that list_models adds rerank models to the dynamic model list."""
    adapter = create_adapter()
    adapter.__provider_id__ = "nvidia"
    adapter.__provider_spec__ = MagicMock()
    dynamic_ids = ["llm-1", "embedding-1"]
    with patch.object(OpenAIMixin, "list_provider_model_ids", new=AsyncMock(return_value=dynamic_ids)):
        result = await adapter.list_models()
        assert result is not None
        # Check that the rerank models are added
        model_ids = [m.identifier for m in result]
        assert "nv-rerank-qa-mistral-4b:1" in model_ids
        assert "nvidia/nv-rerankqa-mistral-4b-v3" in model_ids
        assert "nvidia/llama-3.2-nv-rerankqa-1b-v2" in model_ids
        rerank_models = [m for m in result if m.model_type == ModelType.rerank]
        assert len(rerank_models) == 3
        for m in rerank_models:
            assert m.provider_id == "nvidia"
            assert m.model_type == ModelType.rerank
            assert m.metadata == {}
            assert m.identifier in adapter._model_cache
 async def test_list_provider_model_ids_has_no_duplicates():
    adapter = create_adapter()
    dynamic_ids = [
        "llm-1",
        "nvidia/nv-rerankqa-mistral-4b-v3",  # overlaps configured rerank ids
        "embedding-1",
        "llm-1",
    ]
    with patch.object(OpenAIMixin, "list_provider_model_ids", new=AsyncMock(return_value=dynamic_ids)):
        ids = list(await adapter.list_provider_model_ids())
    assert len(ids) == len(set(ids))
    assert ids.count("nvidia/nv-rerankqa-mistral-4b-v3") == 1
    assert "nv-rerank-qa-mistral-4b:1" in ids
    assert "nvidia/llama-3.2-nv-rerankqa-1b-v2" in ids
 async def test_list_provider_model_ids_uses_configured_on_dynamic_failure():
    adapter = create_adapter()
    # Simulate dynamic listing failure
    with patch.object(OpenAIMixin, "list_provider_model_ids", new=AsyncMock(side_effect=Exception)):
        ids = list(await adapter.list_provider_model_ids())
    # Should still return configured rerank ids
    configured_ids = list(adapter.config.rerank_model_to_url.keys())
    assert set(ids) == set(configured_ids)
--- a/uv.lock
+++ b/uv.lock
@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.12"
 resolution-markers = [
    "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@ -1933,7 +1933,7 @@ wheels = [
 [[package]]
 name = "llama-stack"
-version = "0.3.0"
+version = "0.4.0.dev0"
 source = { editable = "." }
 dependencies = [
    { name = "aiohttp" },
@ -3530,8 +3530,10 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/2d/75/364847b879eb630b3ac8293798e380e441a957c53657995053c5ec39a316/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab8905b5dcb05bf3fb22e0cf90e10f469563486ffb6a96569e51f897c750a76a", size = 4411159, upload-time = "2025-10-10T11:12:00.49Z" },
    { url = "https://files.pythonhosted.org/packages/6f/a0/567f7ea38b6e1c62aafd58375665a547c00c608a471620c0edc364733e13/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:bf940cd7e7fec19181fdbc29d76911741153d51cab52e5c21165f3262125685e", size = 4468234, upload-time = "2025-10-10T11:12:04.892Z" },
    { url = "https://files.pythonhosted.org/packages/30/da/4e42788fb811bbbfd7b7f045570c062f49e350e1d1f3df056c3fb5763353/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa0f693d3c68ae925966f0b14b8edda71696608039f4ed61b1fe9ffa468d16db", size = 4166236, upload-time = "2025-10-10T11:12:11.674Z" },
    { url = "https://files.pythonhosted.org/packages/3c/94/c1777c355bc560992af848d98216148be5f1be001af06e06fc49cbded578/psycopg2_binary-2.9.11-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a1cf393f1cdaf6a9b57c0a719a1068ba1069f022a59b8b1fe44b006745b59757", size = 3983083, upload-time = "2025-10-30T02:55:15.73Z" },
    { url = "https://files.pythonhosted.org/packages/bd/42/c9a21edf0e3daa7825ed04a4a8588686c6c14904344344a039556d78aa58/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7a6beb4beaa62f88592ccc65df20328029d721db309cb3250b0aae0fa146c3", size = 3652281, upload-time = "2025-10-10T11:12:17.713Z" },
    { url = "https://files.pythonhosted.org/packages/12/22/dedfbcfa97917982301496b6b5e5e6c5531d1f35dd2b488b08d1ebc52482/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:31b32c457a6025e74d233957cc9736742ac5a6cb196c6b68499f6bb51390bd6a", size = 3298010, upload-time = "2025-10-10T11:12:22.671Z" },
    { url = "https://files.pythonhosted.org/packages/66/ea/d3390e6696276078bd01b2ece417deac954dfdd552d2edc3d03204416c0c/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:edcb3aeb11cb4bf13a2af3c53a15b3d612edeb6409047ea0b5d6a21a9d744b34", size = 3044641, upload-time = "2025-10-30T02:55:19.929Z" },
    { url = "https://files.pythonhosted.org/packages/12/9a/0402ded6cbd321da0c0ba7d34dc12b29b14f5764c2fc10750daa38e825fc/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b6d93d7c0b61a1dd6197d208ab613eb7dcfdcca0a49c42ceb082257991de9d", size = 3347940, upload-time = "2025-10-10T11:12:26.529Z" },
    { url = "https://files.pythonhosted.org/packages/b1/d2/99b55e85832ccde77b211738ff3925a5d73ad183c0b37bcbbe5a8ff04978/psycopg2_binary-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:b33fabeb1fde21180479b2d4667e994de7bbf0eec22832ba5d9b5e4cf65b6c6d", size = 2714147, upload-time = "2025-10-10T11:12:29.535Z" },
    { url = "https://files.pythonhosted.org/packages/ff/a8/a2709681b3ac11b0b1786def10006b8995125ba268c9a54bea6f5ae8bd3e/psycopg2_binary-2.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b8fb3db325435d34235b044b199e56cdf9ff41223a4b9752e8576465170bb38c", size = 3756572, upload-time = "2025-10-10T11:12:32.873Z" },
@ -3539,8 +3541,10 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/11/32/b2ffe8f3853c181e88f0a157c5fb4e383102238d73c52ac6d93a5c8bffe6/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c55b385daa2f92cb64b12ec4536c66954ac53654c7f15a203578da4e78105c0", size = 4411242, upload-time = "2025-10-10T11:12:42.388Z" },
    { url = "https://files.pythonhosted.org/packages/10/04/6ca7477e6160ae258dc96f67c371157776564679aefd247b66f4661501a2/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c0377174bf1dd416993d16edc15357f6eb17ac998244cca19bc67cdc0e2e5766", size = 4468258, upload-time = "2025-10-10T11:12:48.654Z" },
    { url = "https://files.pythonhosted.org/packages/3c/7e/6a1a38f86412df101435809f225d57c1a021307dd0689f7a5e7fe83588b1/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c6ff3335ce08c75afaed19e08699e8aacf95d4a260b495a4a8545244fe2ceb3", size = 4166295, upload-time = "2025-10-10T11:12:52.525Z" },
    { url = "https://files.pythonhosted.org/packages/f2/7d/c07374c501b45f3579a9eb761cbf2604ddef3d96ad48679112c2c5aa9c25/psycopg2_binary-2.9.11-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:84011ba3109e06ac412f95399b704d3d6950e386b7994475b231cf61eec2fc1f", size = 3983133, upload-time = "2025-10-30T02:55:24.329Z" },
    { url = "https://files.pythonhosted.org/packages/82/56/993b7104cb8345ad7d4516538ccf8f0d0ac640b1ebd8c754a7b024e76878/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba34475ceb08cccbdd98f6b46916917ae6eeb92b5ae111df10b544c3a4621dc4", size = 3652383, upload-time = "2025-10-10T11:12:56.387Z" },
    { url = "https://files.pythonhosted.org/packages/2d/ac/eaeb6029362fd8d454a27374d84c6866c82c33bfc24587b4face5a8e43ef/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b31e90fdd0f968c2de3b26ab014314fe814225b6c324f770952f7d38abf17e3c", size = 3298168, upload-time = "2025-10-10T11:13:00.403Z" },
    { url = "https://files.pythonhosted.org/packages/2b/39/50c3facc66bded9ada5cbc0de867499a703dc6bca6be03070b4e3b65da6c/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d526864e0f67f74937a8fce859bd56c979f5e2ec57ca7c627f5f1071ef7fee60", size = 3044712, upload-time = "2025-10-30T02:55:27.975Z" },
    { url = "https://files.pythonhosted.org/packages/9c/8e/b7de019a1f562f72ada81081a12823d3c1590bedc48d7d2559410a2763fe/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04195548662fa544626c8ea0f06561eb6203f1984ba5b4562764fbeb4c3d14b1", size = 3347549, upload-time = "2025-10-10T11:13:03.971Z" },
    { url = "https://files.pythonhosted.org/packages/80/2d/1bb683f64737bbb1f86c82b7359db1eb2be4e2c0c13b947f80efefa7d3e5/psycopg2_binary-2.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:efff12b432179443f54e230fdf60de1f6cc726b6c832db8701227d089310e8aa", size = 2714215, upload-time = "2025-10-10T11:13:07.14Z" },
    { url = "https://files.pythonhosted.org/packages/64/12/93ef0098590cf51d9732b4f139533732565704f45bdc1ffa741b7c95fb54/psycopg2_binary-2.9.11-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:92e3b669236327083a2e33ccfa0d320dd01b9803b3e14dd986a4fc54aa00f4e1", size = 3756567, upload-time = "2025-10-10T11:13:11.885Z" },
@ -3548,8 +3552,10 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/13/1e/98874ce72fd29cbde93209977b196a2edae03f8490d1bd8158e7f1daf3a0/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b52a3f9bb540a3e4ec0f6ba6d31339727b2950c9772850d6545b7eae0b9d7c5", size = 4411646, upload-time = "2025-10-10T11:13:24.432Z" },
    { url = "https://files.pythonhosted.org/packages/5a/bd/a335ce6645334fb8d758cc358810defca14a1d19ffbc8a10bd38a2328565/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:db4fd476874ccfdbb630a54426964959e58da4c61c9feba73e6094d51303d7d8", size = 4468701, upload-time = "2025-10-10T11:13:29.266Z" },
    { url = "https://files.pythonhosted.org/packages/44/d6/c8b4f53f34e295e45709b7568bf9b9407a612ea30387d35eb9fa84f269b4/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47f212c1d3be608a12937cc131bd85502954398aaa1320cb4c14421a0ffccf4c", size = 4166293, upload-time = "2025-10-10T11:13:33.336Z" },
    { url = "https://files.pythonhosted.org/packages/4b/e0/f8cc36eadd1b716ab36bb290618a3292e009867e5c97ce4aba908cb99644/psycopg2_binary-2.9.11-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e35b7abae2b0adab776add56111df1735ccc71406e56203515e228a8dc07089f", size = 3983184, upload-time = "2025-10-30T02:55:32.483Z" },
    { url = "https://files.pythonhosted.org/packages/53/3e/2a8fe18a4e61cfb3417da67b6318e12691772c0696d79434184a511906dc/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fcf21be3ce5f5659daefd2b3b3b6e4727b028221ddc94e6c1523425579664747", size = 3652650, upload-time = "2025-10-10T11:13:38.181Z" },
    { url = "https://files.pythonhosted.org/packages/76/36/03801461b31b29fe58d228c24388f999fe814dfc302856e0d17f97d7c54d/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:9bd81e64e8de111237737b29d68039b9c813bdf520156af36d26819c9a979e5f", size = 3298663, upload-time = "2025-10-10T11:13:44.878Z" },
    { url = "https://files.pythonhosted.org/packages/97/77/21b0ea2e1a73aa5fa9222b2a6b8ba325c43c3a8d54272839c991f2345656/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:32770a4d666fbdafab017086655bcddab791d7cb260a16679cc5a7338b64343b", size = 3044737, upload-time = "2025-10-30T02:55:35.69Z" },
    { url = "https://files.pythonhosted.org/packages/67/69/f36abe5f118c1dca6d3726ceae164b9356985805480731ac6712a63f24f0/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3cb3a676873d7506825221045bd70e0427c905b9c8ee8d6acd70cfcbd6e576d", size = 3347643, upload-time = "2025-10-10T11:13:53.499Z" },
    { url = "https://files.pythonhosted.org/packages/e1/36/9c0c326fe3a4227953dfb29f5d0c8ae3b8eb8c1cd2967aa569f50cb3c61f/psycopg2_binary-2.9.11-cp314-cp314-win_amd64.whl", hash = "sha256:4012c9c954dfaccd28f94e84ab9f94e12df76b4afb22331b1f0d3154893a6316", size = 2803913, upload-time = "2025-10-10T11:13:57.058Z" },
 ]