diff --git a/.github/actions/install-llama-stack-client/action.yml b/.github/actions/install-llama-stack-client/action.yml new file mode 100644 index 000000000..3c1c77d9c --- /dev/null +++ b/.github/actions/install-llama-stack-client/action.yml @@ -0,0 +1,60 @@ +name: Install llama-stack-client +description: Install llama-stack-client based on branch context and client-version input + +inputs: + client-version: + description: 'Client version to install on non-release branches (latest or published). Ignored on release branches.' + required: false + default: "" + +outputs: + uv-extra-index-url: + description: 'UV_EXTRA_INDEX_URL to use (set for release branches)' + value: ${{ steps.configure.outputs.uv-extra-index-url }} + install-after-sync: + description: 'Whether to install client after uv sync' + value: ${{ steps.configure.outputs.install-after-sync }} + install-source: + description: 'Where to install client from after sync' + value: ${{ steps.configure.outputs.install-source }} + +runs: + using: "composite" + steps: + - name: Configure client installation + id: configure + shell: bash + run: | + # Determine the branch we're working with + BRANCH="${{ github.base_ref || github.ref }}" + BRANCH="${BRANCH#refs/heads/}" + + echo "Working with branch: $BRANCH" + + # On release branches: use test.pypi for uv sync, then install from git + # On non-release branches: install based on client-version after sync + if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then + echo "Detected release branch: $BRANCH" + + # Check if matching branch exists in client repo + if ! git ls-remote --exit-code --heads https://github.com/llamastack/llama-stack-client-python.git "$BRANCH" > /dev/null 2>&1; then + echo "::error::Branch $BRANCH not found in llama-stack-client-python repository" + echo "::error::Please create the matching release branch in llama-stack-client-python before testing" + exit 1 + fi + + # Configure to use test.pypi as extra index (PyPI is primary) + echo "uv-extra-index-url=https://test.pypi.org/simple/" >> $GITHUB_OUTPUT + echo "install-after-sync=true" >> $GITHUB_OUTPUT + echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@$BRANCH" >> $GITHUB_OUTPUT + elif [ "${{ inputs.client-version }}" = "latest" ]; then + # Install from main git after sync + echo "install-after-sync=true" >> $GITHUB_OUTPUT + echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@main" >> $GITHUB_OUTPUT + elif [ "${{ inputs.client-version }}" = "published" ]; then + # Use published version from PyPI (installed by sync) + echo "install-after-sync=false" >> $GITHUB_OUTPUT + elif [ -n "${{ inputs.client-version }}" ]; then + echo "::error::Invalid client-version: ${{ inputs.client-version }}" + exit 1 + fi diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml index ac600d570..ec4d7f977 100644 --- a/.github/actions/run-and-record-tests/action.yml +++ b/.github/actions/run-and-record-tests/action.yml @@ -94,7 +94,7 @@ runs: if: ${{ always() }} uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: - name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }} + name: logs-${{ github.run_id }}-${{ github.run_attempt || '1' }}-${{ strategy.job-index || github.job }}-${{ github.action }} path: | *.log retention-days: 1 diff --git a/.github/actions/setup-runner/action.yml b/.github/actions/setup-runner/action.yml index 905d6b73a..3237abb67 100644 --- a/.github/actions/setup-runner/action.yml +++ b/.github/actions/setup-runner/action.yml @@ -18,25 +18,35 @@ runs: python-version: ${{ inputs.python-version }} version: 0.7.6 + - name: Configure client installation + id: client-config + uses: ./.github/actions/install-llama-stack-client + with: + client-version: ${{ inputs.client-version }} + - name: Install dependencies shell: bash + env: + UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }} run: | + # Export UV env vars for current step and persist to GITHUB_ENV for subsequent steps + if [ -n "$UV_EXTRA_INDEX_URL" ]; then + export UV_INDEX_STRATEGY=unsafe-best-match + echo "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" >> $GITHUB_ENV + echo "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" >> $GITHUB_ENV + echo "Exported UV environment variables for current and subsequent steps" + fi + echo "Updating project dependencies via uv sync" uv sync --all-groups echo "Installing ad-hoc dependencies" uv pip install faiss-cpu - # Install llama-stack-client-python based on the client-version input - if [ "${{ inputs.client-version }}" = "latest" ]; then - echo "Installing latest llama-stack-client-python from main branch" - uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@main - elif [ "${{ inputs.client-version }}" = "published" ]; then - echo "Installing published llama-stack-client-python from PyPI" - uv pip install llama-stack-client - else - echo "Invalid client-version: ${{ inputs.client-version }}" - exit 1 + # Install specific client version after sync if needed + if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then + echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}" + uv pip install ${{ steps.client-config.outputs.install-source }} fi echo "Installed llama packages" diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml index ee9011ed8..7b306fef5 100644 --- a/.github/actions/setup-test-environment/action.yml +++ b/.github/actions/setup-test-environment/action.yml @@ -42,18 +42,7 @@ runs: - name: Build Llama Stack shell: bash run: | - # Install llama-stack-client-python based on the client-version input - if [ "${{ inputs.client-version }}" = "latest" ]; then - echo "Installing latest llama-stack-client-python from main branch" - export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@main - elif [ "${{ inputs.client-version }}" = "published" ]; then - echo "Installing published llama-stack-client-python from PyPI" - unset LLAMA_STACK_CLIENT_DIR - else - echo "Invalid client-version: ${{ inputs.client-version }}" - exit 1 - fi - + # Client is already installed by setup-runner (handles both main and release branches) echo "Building Llama Stack" LLAMA_STACK_DIR=. \ diff --git a/.github/workflows/README.md b/.github/workflows/README.md index ef6a8bb3c..88b2d5106 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -13,7 +13,6 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl | Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode | | Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers | | Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks | -| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR | | Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build | | Test llama stack list-deps | [providers-list-deps.yml](providers-list-deps.yml) | Test llama stack list-deps | | Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project | diff --git a/.github/workflows/backward-compat.yml b/.github/workflows/backward-compat.yml index 72d2b0c27..cf91b851e 100644 --- a/.github/workflows/backward-compat.yml +++ b/.github/workflows/backward-compat.yml @@ -4,7 +4,11 @@ run-name: Check backward compatibility for run.yaml configs on: pull_request: - branches: [main] + branches: + - main + - 'release-[0-9]+.[0-9]+.[0-9]+.[0-9]+' + - 'release-[0-9]+.[0-9]+.[0-9]+' + - 'release-[0-9]+.[0-9]+' paths: - 'src/llama_stack/core/datatypes.py' - 'src/llama_stack/providers/datatypes.py' diff --git a/.github/workflows/install-script-ci.yml b/.github/workflows/install-script-ci.yml index 82aa56482..bbdaefb50 100644 --- a/.github/workflows/install-script-ci.yml +++ b/.github/workflows/install-script-ci.yml @@ -30,10 +30,16 @@ jobs: - name: Build a single provider run: | + BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=starter" + if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" + fi + if [ -n "${UV_INDEX_STRATEGY:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" + fi docker build . \ -f containers/Containerfile \ - --build-arg INSTALL_MODE=editable \ - --build-arg DISTRO_NAME=starter \ + $BUILD_ARGS \ --tag llama-stack:starter-ci - name: Run installer end-to-end diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index 2de3fe9df..560ab4293 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -4,9 +4,13 @@ run-name: Run the integration test suite with Kubernetes authentication on: push: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' pull_request: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' paths: - 'distributions/**' - 'src/llama_stack/**' diff --git a/.github/workflows/integration-sql-store-tests.yml b/.github/workflows/integration-sql-store-tests.yml index 0653b3fa8..8c3e51dd4 100644 --- a/.github/workflows/integration-sql-store-tests.yml +++ b/.github/workflows/integration-sql-store-tests.yml @@ -4,9 +4,13 @@ run-name: Run the integration test suite with SqlStore on: push: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' pull_request: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' paths: - 'src/llama_stack/providers/utils/sqlstore/**' - 'tests/integration/sqlstore/**' diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 2b8965aad..ac70f0960 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -4,9 +4,13 @@ run-name: Run the integration test suites from tests/integration in replay mode on: push: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' pull_request: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' types: [opened, synchronize, reopened] paths: - 'src/llama_stack/**' @@ -47,7 +51,7 @@ jobs: strategy: fail-fast: false matrix: - client-type: [library, docker] + client-type: [library, docker, server] # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml index 0b4e174bc..952141f3b 100644 --- a/.github/workflows/integration-vector-io-tests.yml +++ b/.github/workflows/integration-vector-io-tests.yml @@ -4,9 +4,13 @@ run-name: Run the integration test suite with various VectorIO providers on: push: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' pull_request: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' paths: - 'src/llama_stack/**' - '!src/llama_stack/ui/**' diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 485009578..6aca8d106 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -5,7 +5,9 @@ run-name: Run pre-commit checks on: pull_request: push: - branches: [main] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' concurrency: group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }} @@ -50,19 +52,34 @@ jobs: run: npm ci working-directory: src/llama_stack/ui + - name: Install pre-commit + run: python -m pip install pre-commit + + - name: Cache pre-commit + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 + with: + path: ~/.cache/pre-commit + key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }} + - name: Run pre-commit id: precommit - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 - continue-on-error: true + run: | + set +e + pre-commit run --show-diff-on-failure --color=always --all-files 2>&1 | tee /tmp/precommit.log + status=${PIPESTATUS[0]} + echo "status=$status" >> $GITHUB_OUTPUT + exit 0 env: SKIP: no-commit-to-branch,mypy RUFF_OUTPUT_FORMAT: github - name: Check pre-commit results - if: steps.precommit.outcome == 'failure' + if: steps.precommit.outputs.status != '0' run: | echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes." - echo "::warning::Some pre-commit hooks failed. Check the output above for details." + echo "" + echo "Failed hooks output:" + cat /tmp/precommit.log exit 1 - name: Debug @@ -113,11 +130,34 @@ jobs: exit 1 fi + - name: Configure client installation + id: client-config + uses: ./.github/actions/install-llama-stack-client + - name: Sync dev + type_checking dependencies - run: uv sync --group dev --group type_checking + env: + UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }} + run: | + if [ -n "$UV_EXTRA_INDEX_URL" ]; then + export UV_INDEX_STRATEGY="unsafe-best-match" + fi + + uv sync --group dev --group type_checking + + # Install specific client version after sync if needed + if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then + echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}" + uv pip install ${{ steps.client-config.outputs.install-source }} + fi - name: Run mypy (full type_checking) + env: + UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }} run: | + if [ -n "$UV_EXTRA_INDEX_URL" ]; then + export UV_INDEX_STRATEGY="unsafe-best-match" + fi + set +e uv run --group dev --group type_checking mypy status=$? diff --git a/.github/workflows/precommit-trigger.yml b/.github/workflows/precommit-trigger.yml deleted file mode 100644 index 502230448..000000000 --- a/.github/workflows/precommit-trigger.yml +++ /dev/null @@ -1,227 +0,0 @@ -name: Pre-commit Bot - -run-name: Pre-commit bot for PR #${{ github.event.issue.number }} - -on: - issue_comment: - types: [created] - -jobs: - pre-commit: - # Only run on pull request comments - if: github.event.issue.pull_request && contains(github.event.comment.body, '@github-actions run precommit') - runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - - steps: - - name: Check comment author and get PR details - id: check_author - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - // Get PR details - const pr = await github.rest.pulls.get({ - owner: context.repo.owner, - repo: context.repo.repo, - pull_number: context.issue.number - }); - - // Check if commenter has write access or is the PR author - const commenter = context.payload.comment.user.login; - const prAuthor = pr.data.user.login; - - let hasPermission = false; - - // Check if commenter is PR author - if (commenter === prAuthor) { - hasPermission = true; - console.log(`Comment author ${commenter} is the PR author`); - } else { - // Check if commenter has write/admin access - try { - const permission = await github.rest.repos.getCollaboratorPermissionLevel({ - owner: context.repo.owner, - repo: context.repo.repo, - username: commenter - }); - - const level = permission.data.permission; - hasPermission = ['write', 'admin', 'maintain'].includes(level); - console.log(`Comment author ${commenter} has permission: ${level}`); - } catch (error) { - console.log(`Could not check permissions for ${commenter}: ${error.message}`); - } - } - - if (!hasPermission) { - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - body: `āŒ @${commenter} You don't have permission to trigger pre-commit. Only PR authors or repository collaborators can run this command.` - }); - core.setFailed(`User ${commenter} does not have permission`); - return; - } - - // Save PR info for later steps - core.setOutput('pr_number', context.issue.number); - core.setOutput('pr_head_ref', pr.data.head.ref); - core.setOutput('pr_head_sha', pr.data.head.sha); - core.setOutput('pr_head_repo', pr.data.head.repo.full_name); - core.setOutput('pr_base_ref', pr.data.base.ref); - core.setOutput('is_fork', pr.data.head.repo.full_name !== context.payload.repository.full_name); - core.setOutput('authorized', 'true'); - - - name: React to comment - if: steps.check_author.outputs.authorized == 'true' - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - await github.rest.reactions.createForIssueComment({ - owner: context.repo.owner, - repo: context.repo.repo, - comment_id: context.payload.comment.id, - content: 'rocket' - }); - - - name: Comment starting - if: steps.check_author.outputs.authorized == 'true' - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: ${{ steps.check_author.outputs.pr_number }}, - body: `ā³ Running [pre-commit hooks](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) on PR #${{ steps.check_author.outputs.pr_number }}...` - }); - - - name: Checkout PR branch (same-repo) - if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'false' - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - ref: ${{ steps.check_author.outputs.pr_head_ref }} - fetch-depth: 0 - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Checkout PR branch (fork) - if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'true' - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - repository: ${{ steps.check_author.outputs.pr_head_repo }} - ref: ${{ steps.check_author.outputs.pr_head_ref }} - fetch-depth: 0 - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Verify checkout - if: steps.check_author.outputs.authorized == 'true' - run: | - echo "Current SHA: $(git rev-parse HEAD)" - echo "Expected SHA: ${{ steps.check_author.outputs.pr_head_sha }}" - if [[ "$(git rev-parse HEAD)" != "${{ steps.check_author.outputs.pr_head_sha }}" ]]; then - echo "::error::Checked out SHA does not match expected SHA" - exit 1 - fi - - - name: Set up Python - if: steps.check_author.outputs.authorized == 'true' - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 - with: - python-version: '3.12' - cache: pip - cache-dependency-path: | - **/requirements*.txt - .pre-commit-config.yaml - - - name: Set up Node.js - if: steps.check_author.outputs.authorized == 'true' - uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 - with: - node-version: '20' - cache: 'npm' - cache-dependency-path: 'src/llama_stack/ui/' - - - name: Install npm dependencies - if: steps.check_author.outputs.authorized == 'true' - run: npm ci - working-directory: src/llama_stack/ui - - - name: Run pre-commit - if: steps.check_author.outputs.authorized == 'true' - id: precommit - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 - continue-on-error: true - env: - SKIP: no-commit-to-branch - RUFF_OUTPUT_FORMAT: github - - - name: Check for changes - if: steps.check_author.outputs.authorized == 'true' - id: changes - run: | - if ! git diff --exit-code || [ -n "$(git ls-files --others --exclude-standard)" ]; then - echo "has_changes=true" >> $GITHUB_OUTPUT - echo "Changes detected after pre-commit" - else - echo "has_changes=false" >> $GITHUB_OUTPUT - echo "No changes after pre-commit" - fi - - - name: Commit and push changes - if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true' - run: | - git config --local user.email "github-actions[bot]@users.noreply.github.com" - git config --local user.name "github-actions[bot]" - - git add -A - git commit -m "style: apply pre-commit fixes - - šŸ¤– Applied by @github-actions bot via pre-commit workflow" - - # Push changes - git push origin HEAD:${{ steps.check_author.outputs.pr_head_ref }} - - - name: Comment success with changes - if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true' - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: ${{ steps.check_author.outputs.pr_number }}, - body: `āœ… Pre-commit hooks completed successfully!\n\nšŸ”§ Changes have been committed and pushed to the PR branch.` - }); - - - name: Comment success without changes - if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success' - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: ${{ steps.check_author.outputs.pr_number }}, - body: `āœ… Pre-commit hooks passed!\n\n✨ No changes needed - your code is already formatted correctly.` - }); - - - name: Comment failure - if: failure() - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: ${{ steps.check_author.outputs.pr_number }}, - body: `āŒ Pre-commit workflow failed!\n\nPlease check the [workflow logs](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) for details.` - }); diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index 2b2ca6330..f2559a258 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -72,10 +72,16 @@ jobs: - name: Build container image if: matrix.image-type == 'container' run: | + BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=${{ matrix.distro }}" + if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" + fi + if [ -n "${UV_INDEX_STRATEGY:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" + fi docker build . \ -f containers/Containerfile \ - --build-arg INSTALL_MODE=editable \ - --build-arg DISTRO_NAME=${{ matrix.distro }} \ + $BUILD_ARGS \ --tag llama-stack:${{ matrix.distro }}-ci - name: Print dependencies in the image @@ -108,12 +114,18 @@ jobs: - name: Build container image run: | BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml) + BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests" + BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE" + BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml" + if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" + fi + if [ -n "${UV_INDEX_STRATEGY:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" + fi docker build . \ -f containers/Containerfile \ - --build-arg INSTALL_MODE=editable \ - --build-arg DISTRO_NAME=ci-tests \ - --build-arg BASE_IMAGE="$BASE_IMAGE" \ - --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \ + $BUILD_ARGS \ -t llama-stack:ci-tests - name: Inspect the container image entrypoint @@ -148,12 +160,18 @@ jobs: - name: Build UBI9 container image run: | BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml) + BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests" + BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE" + BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml" + if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" + fi + if [ -n "${UV_INDEX_STRATEGY:-}" ]; then + BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" + fi docker build . \ -f containers/Containerfile \ - --build-arg INSTALL_MODE=editable \ - --build-arg DISTRO_NAME=ci-tests \ - --build-arg BASE_IMAGE="$BASE_IMAGE" \ - --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \ + $BUILD_ARGS \ -t llama-stack:ci-tests-ubi9 - name: Inspect UBI9 image diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 182643721..92c0a6a19 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -4,9 +4,13 @@ run-name: Run the unit test suite on: push: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' pull_request: - branches: [ main ] + branches: + - main + - 'release-[0-9]+.[0-9]+.x' paths: - 'src/llama_stack/**' - '!src/llama_stack/ui/**' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9990b6342..ce0d79b21 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,10 +52,6 @@ repos: additional_dependencies: - black==24.3.0 -- repo: https://github.com/astral-sh/uv-pre-commit - rev: 0.7.20 - hooks: - - id: uv-lock - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.18.2 @@ -63,22 +59,13 @@ repos: - id: mypy additional_dependencies: - uv==0.6.2 + - mypy - pytest - rich - types-requests - pydantic - - httpx pass_filenames: false -- repo: local - hooks: - - id: mypy-full - name: mypy (full type_checking) - entry: uv run --group dev --group type_checking mypy - language: system - pass_filenames: false - stages: [manual] - # - repo: https://github.com/tcort/markdown-link-check # rev: v3.11.2 # hooks: @@ -87,11 +74,26 @@ repos: - repo: local hooks: + - id: uv-lock + name: uv-lock + additional_dependencies: + - uv==0.7.20 + entry: ./scripts/uv-run-with-index.sh lock + language: python + pass_filenames: false + require_serial: true + files: ^(pyproject\.toml|uv\.lock)$ + - id: mypy-full + name: mypy (full type_checking) + entry: ./scripts/uv-run-with-index.sh run --group dev --group type_checking mypy + language: system + pass_filenames: false + stages: [manual] - id: distro-codegen name: Distribution Template Codegen additional_dependencies: - uv==0.7.8 - entry: uv run --group codegen ./scripts/distro_codegen.py + entry: ./scripts/uv-run-with-index.sh run --group codegen ./scripts/distro_codegen.py language: python pass_filenames: false require_serial: true @@ -100,7 +102,7 @@ repos: name: Provider Codegen additional_dependencies: - uv==0.7.8 - entry: uv run --group codegen ./scripts/provider_codegen.py + entry: ./scripts/uv-run-with-index.sh run --group codegen ./scripts/provider_codegen.py language: python pass_filenames: false require_serial: true @@ -109,7 +111,7 @@ repos: name: API Spec Codegen additional_dependencies: - uv==0.7.8 - entry: sh -c 'uv run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null' + entry: sh -c './scripts/uv-run-with-index.sh run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null' language: python pass_filenames: false require_serial: true @@ -150,7 +152,7 @@ repos: name: Generate CI documentation additional_dependencies: - uv==0.7.8 - entry: uv run ./scripts/gen-ci-docs.py + entry: ./scripts/uv-run-with-index.sh run ./scripts/gen-ci-docs.py language: python pass_filenames: false require_serial: true @@ -162,6 +164,7 @@ repos: files: ^src/llama_stack/ui/.*\.(ts|tsx)$ pass_filenames: false require_serial: true + - id: check-log-usage name: Ensure 'llama_stack.log' usage for logging entry: bash @@ -197,6 +200,7 @@ repos: echo; exit 1; } || true + ci: autofix_commit_msg: šŸŽØ [pre-commit.ci] Auto format from pre-commit.com hooks autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index f531542c5..a1085c9eb 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -956,7 +956,22 @@ paths: List routes. List all available API routes with their methods and implementing providers. - parameters: [] + parameters: + - name: api_filter + in: query + description: >- + Optional filter to control which routes are returned. Can be an API level + ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, + or 'deprecated' to show deprecated routes across all levels. If not specified, + returns only non-deprecated v1 routes. + required: false + schema: + type: string + enum: + - v1 + - v1alpha + - v1beta + - deprecated deprecated: false /v1/models: get: diff --git a/containers/Containerfile b/containers/Containerfile index 1c878ea9b..d2d066845 100644 --- a/containers/Containerfile +++ b/containers/Containerfile @@ -19,6 +19,8 @@ ARG KEEP_WORKSPACE="" ARG DISTRO_NAME="starter" ARG RUN_CONFIG_PATH="" ARG UV_HTTP_TIMEOUT=500 +ARG UV_EXTRA_INDEX_URL="" +ARG UV_INDEX_STRATEGY="" ENV UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT} ENV PYTHONDONTWRITEBYTECODE=1 ENV PIP_DISABLE_PIP_VERSION_CHECK=1 @@ -45,7 +47,7 @@ RUN set -eux; \ exit 1; \ fi -RUN pip install --no-cache uv +RUN pip install --no-cache-dir uv ENV UV_SYSTEM_PYTHON=1 ENV INSTALL_MODE=${INSTALL_MODE} @@ -62,47 +64,60 @@ COPY . /workspace # Install the client package if it is provided # NOTE: this is installed before llama-stack since llama-stack depends on llama-stack-client-python +# Unset UV index env vars to ensure we only use PyPI for the client RUN set -eux; \ + unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \ if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \ if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \ echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \ exit 1; \ fi; \ - uv pip install --no-cache -e "$LLAMA_STACK_CLIENT_DIR"; \ + uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \ fi; # Install llama-stack +# Use UV_EXTRA_INDEX_URL inline only for editable install with RC dependencies RUN set -eux; \ + SAVED_UV_EXTRA_INDEX_URL="${UV_EXTRA_INDEX_URL:-}"; \ + SAVED_UV_INDEX_STRATEGY="${UV_INDEX_STRATEGY:-}"; \ + unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \ if [ "$INSTALL_MODE" = "editable" ]; then \ if [ ! -d "$LLAMA_STACK_DIR" ]; then \ echo "INSTALL_MODE=editable requires LLAMA_STACK_DIR to point to a directory inside the build context" >&2; \ exit 1; \ fi; \ - uv pip install --no-cache -e "$LLAMA_STACK_DIR"; \ - elif [ "$INSTALL_MODE" = "test-pypi" ]; then \ - uv pip install --no-cache fastapi libcst; \ - if [ -n "$TEST_PYPI_VERSION" ]; then \ - uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \ + if [ -n "$SAVED_UV_EXTRA_INDEX_URL" ] && [ -n "$SAVED_UV_INDEX_STRATEGY" ]; then \ + UV_EXTRA_INDEX_URL="$SAVED_UV_EXTRA_INDEX_URL" UV_INDEX_STRATEGY="$SAVED_UV_INDEX_STRATEGY" \ + uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \ else \ - uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \ + uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \ + fi; \ + elif [ "$INSTALL_MODE" = "test-pypi" ]; then \ + uv pip install --no-cache-dir fastapi libcst; \ + if [ -n "$TEST_PYPI_VERSION" ]; then \ + uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \ + else \ + uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \ fi; \ else \ if [ -n "$PYPI_VERSION" ]; then \ - uv pip install --no-cache "llama-stack==$PYPI_VERSION"; \ + uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \ else \ - uv pip install --no-cache llama-stack; \ + uv pip install --no-cache-dir llama-stack; \ fi; \ fi; # Install the dependencies for the distribution +# Explicitly unset UV index env vars to ensure we only use PyPI for distribution deps RUN set -eux; \ + unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \ if [ -z "$DISTRO_NAME" ]; then \ echo "DISTRO_NAME must be provided" >&2; \ exit 1; \ fi; \ deps="$(llama stack list-deps "$DISTRO_NAME")"; \ if [ -n "$deps" ]; then \ - printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache; \ + printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \ fi # Cleanup diff --git a/docs/docs/providers/inference/remote_nvidia.mdx b/docs/docs/providers/inference/remote_nvidia.mdx index b4e04176c..57c64ab46 100644 --- a/docs/docs/providers/inference/remote_nvidia.mdx +++ b/docs/docs/providers/inference/remote_nvidia.mdx @@ -20,6 +20,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services. | `url` | `` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | | `timeout` | `` | No | 60 | Timeout for the HTTP requests | | `append_api_version` | `` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. | +| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. | ## Sample Configuration diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index c1d3658f4..c376e4b80 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -1258,7 +1258,23 @@ ], "summary": "List routes.", "description": "List routes.\nList all available API routes with their methods and implementing providers.", - "parameters": [], + "parameters": [ + { + "name": "api_filter", + "in": "query", + "description": "Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns only non-deprecated v1 routes.", + "required": false, + "schema": { + "type": "string", + "enum": [ + "v1", + "v1alpha", + "v1beta", + "deprecated" + ] + } + } + ], "deprecated": false } }, diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 5b629a474..e35287952 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -953,7 +953,22 @@ paths: List routes. List all available API routes with their methods and implementing providers. - parameters: [] + parameters: + - name: api_filter + in: query + description: >- + Optional filter to control which routes are returned. Can be an API level + ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, + or 'deprecated' to show deprecated routes across all levels. If not specified, + returns only non-deprecated v1 routes. + required: false + schema: + type: string + enum: + - v1 + - v1alpha + - v1beta + - deprecated deprecated: false /v1/models: get: diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 59b6c28e7..a6208bf6f 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -1258,7 +1258,23 @@ ], "summary": "List routes.", "description": "List routes.\nList all available API routes with their methods and implementing providers.", - "parameters": [], + "parameters": [ + { + "name": "api_filter", + "in": "query", + "description": "Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns only non-deprecated v1 routes.", + "required": false, + "schema": { + "type": "string", + "enum": [ + "v1", + "v1alpha", + "v1beta", + "deprecated" + ] + } + } + ], "deprecated": false } }, diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index f531542c5..a1085c9eb 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -956,7 +956,22 @@ paths: List routes. List all available API routes with their methods and implementing providers. - parameters: [] + parameters: + - name: api_filter + in: query + description: >- + Optional filter to control which routes are returned. Can be an API level + ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, + or 'deprecated' to show deprecated routes across all levels. If not specified, + returns only non-deprecated v1 routes. + required: false + schema: + type: string + enum: + - v1 + - v1alpha + - v1beta + - deprecated deprecated: false /v1/models: get: diff --git a/scripts/docker.sh b/scripts/docker.sh index a0690c8a9..b56df8c03 100755 --- a/scripts/docker.sh +++ b/scripts/docker.sh @@ -215,6 +215,16 @@ build_image() { --build-arg "LLAMA_STACK_DIR=/workspace" ) + # Pass UV index configuration for release branches + if [[ -n "${UV_EXTRA_INDEX_URL:-}" ]]; then + echo "Adding UV_EXTRA_INDEX_URL to docker build: $UV_EXTRA_INDEX_URL" + build_cmd+=(--build-arg "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL") + fi + if [[ -n "${UV_INDEX_STRATEGY:-}" ]]; then + echo "Adding UV_INDEX_STRATEGY to docker build: $UV_INDEX_STRATEGY" + build_cmd+=(--build-arg "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY") + fi + if ! "${build_cmd[@]}"; then echo "āŒ Failed to build Docker image" exit 1 diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index a09dc8621..985952167 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -23,7 +23,7 @@ COLLECT_ONLY=false # Function to display usage usage() { - cat << EOF + cat < /dev/null; then +if [[ "$COLLECT_ONLY" == false ]] && ! command -v llama &>/dev/null; then echo "llama could not be found, ensure llama-stack is installed" exit 1 fi -if ! command -v pytest &> /dev/null; then +if ! command -v pytest &>/dev/null; then echo "pytest could not be found, ensure pytest is installed" exit 1 fi @@ -208,9 +207,18 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then echo "=== Starting Llama Stack Server ===" export LLAMA_STACK_LOG_WIDTH=120 + # Configure telemetry collector for server mode + # Use a fixed port for the OTEL collector so the server can connect to it + COLLECTOR_PORT=4317 + export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}" + export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}" + export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf" + export OTEL_BSP_SCHEDULE_DELAY="200" + export OTEL_BSP_EXPORT_TIMEOUT="2000" + # remove "server:" from STACK_CONFIG stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://') - nohup llama stack run $stack_config > server.log 2>&1 & + nohup llama stack run $stack_config >server.log 2>&1 & echo "Waiting for Llama Stack Server to start..." for i in {1..30}; do @@ -239,7 +247,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then container_name="llama-stack-test-$DISTRO" if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then echo "Dumping container logs before stopping..." - docker logs "$container_name" > "docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true + docker logs "$container_name" >"docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true echo "Stopping and removing container: $container_name" docker stop "$container_name" 2>/dev/null || true docker rm "$container_name" 2>/dev/null || true @@ -271,6 +279,16 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then --build-arg "LLAMA_STACK_DIR=/workspace" ) + # Pass UV index configuration for release branches + if [[ -n "${UV_EXTRA_INDEX_URL:-}" ]]; then + echo "Adding UV_EXTRA_INDEX_URL to docker build: $UV_EXTRA_INDEX_URL" + build_cmd+=(--build-arg "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL") + fi + if [[ -n "${UV_INDEX_STRATEGY:-}" ]]; then + echo "Adding UV_INDEX_STRATEGY to docker build: $UV_INDEX_STRATEGY" + build_cmd+=(--build-arg "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY") + fi + if ! "${build_cmd[@]}"; then echo "āŒ Failed to build Docker image" exit 1 @@ -428,17 +446,13 @@ elif [ $exit_code -eq 5 ]; then else echo "āŒ Tests failed" echo "" - echo "=== Dumping last 100 lines of logs for debugging ===" - # Output server or container logs based on stack config if [[ "$STACK_CONFIG" == *"server:"* && -f "server.log" ]]; then - echo "--- Last 100 lines of server.log ---" - tail -100 server.log + echo "--- Server side failures can be located inside server.log (available from artifacts on CI) ---" elif [[ "$STACK_CONFIG" == *"docker:"* ]]; then docker_log_file="docker-${DISTRO}-${INFERENCE_MODE}.log" if [[ -f "$docker_log_file" ]]; then - echo "--- Last 100 lines of $docker_log_file ---" - tail -100 "$docker_log_file" + echo "--- Server side failures can be located inside $docker_log_file (available from artifacts on CI) ---" fi fi diff --git a/scripts/uv-run-with-index.sh b/scripts/uv-run-with-index.sh new file mode 100755 index 000000000..18d0a0e9c --- /dev/null +++ b/scripts/uv-run-with-index.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +set -euo pipefail + +# Detect current branch and target branch +# In GitHub Actions, use GITHUB_REF/GITHUB_BASE_REF +if [[ -n "${GITHUB_REF:-}" ]]; then + BRANCH="${GITHUB_REF#refs/heads/}" +else + BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "") +fi + +# For PRs, check the target branch +if [[ -n "${GITHUB_BASE_REF:-}" ]]; then + TARGET_BRANCH="${GITHUB_BASE_REF}" +else + TARGET_BRANCH=$(git rev-parse --abbrev-ref HEAD@{upstream} 2>/dev/null | sed 's|origin/||' || echo "") +fi + +# Check if on a release branch or targeting one, or LLAMA_STACK_RELEASE_MODE is set +IS_RELEASE=false +if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then + IS_RELEASE=true +elif [[ "$TARGET_BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then + IS_RELEASE=true +elif [[ "${LLAMA_STACK_RELEASE_MODE:-}" == "true" ]]; then + IS_RELEASE=true +fi + +# On release branches, use test.pypi as extra index for RC versions +if [[ "$IS_RELEASE" == "true" ]]; then + export UV_EXTRA_INDEX_URL="https://test.pypi.org/simple/" + export UV_INDEX_STRATEGY="unsafe-best-match" +fi + +# Run uv with all arguments passed through +exec uv "$@" diff --git a/src/llama_stack/apis/inspect/inspect.py b/src/llama_stack/apis/inspect/inspect.py index 8b0996e69..4e0e2548b 100644 --- a/src/llama_stack/apis/inspect/inspect.py +++ b/src/llama_stack/apis/inspect/inspect.py @@ -4,14 +4,21 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Protocol, runtime_checkable +from typing import Literal, Protocol, runtime_checkable from pydantic import BaseModel -from llama_stack.apis.version import LLAMA_STACK_API_V1 +from llama_stack.apis.version import ( + LLAMA_STACK_API_V1, +) from llama_stack.providers.datatypes import HealthStatus from llama_stack.schema_utils import json_schema_type, webmethod +# Valid values for the route filter parameter. +# Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated) +# Special filter value: "deprecated" (shows deprecated routes regardless of level) +ApiFilter = Literal["v1", "v1alpha", "v1beta", "deprecated"] + @json_schema_type class RouteInfo(BaseModel): @@ -64,11 +71,12 @@ class Inspect(Protocol): """ @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1) - async def list_routes(self) -> ListRoutesResponse: + async def list_routes(self, api_filter: ApiFilter | None = None) -> ListRoutesResponse: """List routes. List all available API routes with their methods and implementing providers. + :param api_filter: Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns only non-deprecated v1 routes. :returns: Response containing information about all available routes. """ ... diff --git a/src/llama_stack/cli/stack/run.py b/src/llama_stack/cli/stack/run.py index dbf531297..fc3287192 100644 --- a/src/llama_stack/cli/stack/run.py +++ b/src/llama_stack/cli/stack/run.py @@ -13,11 +13,23 @@ from pathlib import Path import uvicorn import yaml +from termcolor import cprint from llama_stack.cli.stack.utils import ImageType from llama_stack.cli.subcommand import Subcommand -from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.datatypes import Api, Provider, StackRunConfig +from llama_stack.core.distribution import get_provider_registry from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) +from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro from llama_stack.log import LoggingConfig, get_logger @@ -69,6 +81,12 @@ class StackRun(Subcommand): action="store_true", help="Start the UI server", ) + self.parser.add_argument( + "--providers", + type=str, + default=None, + help="Run a stack with only a list of providers. This list is formatted like: api1=provider1,api1=provider2,api2=provider3. Where there can be multiple providers per API.", + ) def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: import yaml @@ -94,6 +112,49 @@ class StackRun(Subcommand): config_file = resolve_config_or_distro(args.config, Mode.RUN) except ValueError as e: self.parser.error(str(e)) + elif args.providers: + provider_list: dict[str, list[Provider]] = dict() + for api_provider in args.providers.split(","): + if "=" not in api_provider: + cprint( + "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2", + color="red", + file=sys.stderr, + ) + sys.exit(1) + api, provider_type = api_provider.split("=") + providers_for_api = get_provider_registry().get(Api(api), None) + if providers_for_api is None: + cprint( + f"{api} is not a valid API.", + color="red", + file=sys.stderr, + ) + sys.exit(1) + if provider_type in providers_for_api: + provider = Provider( + provider_type=provider_type, + provider_id=provider_type.split("::")[1], + ) + provider_list.setdefault(api, []).append(provider) + else: + cprint( + f"{provider} is not a valid provider for the {api} API.", + color="red", + file=sys.stderr, + ) + sys.exit(1) + run_config = self._generate_run_config_from_providers(providers=provider_list) + config_dict = run_config.model_dump(mode="json") + + # Write config to disk in providers-run directory + distro_dir = DISTRIBS_BASE_DIR / "providers-run" + config_file = distro_dir / "run.yaml" + + logger.info(f"Writing generated config to: {config_file}") + with open(config_file, "w") as f: + yaml.dump(config_dict, f, default_flow_style=False, sort_keys=False) + else: config_file = None @@ -107,7 +168,8 @@ class StackRun(Subcommand): try: config = parse_and_maybe_upgrade_config(config_dict) - if not os.path.exists(str(config.external_providers_dir)): + # Create external_providers_dir if it's specified and doesn't exist + if config.external_providers_dir and not os.path.exists(str(config.external_providers_dir)): os.makedirs(str(config.external_providers_dir), exist_ok=True) except AttributeError as e: self.parser.error(f"failed to parse config file '{config_file}':\n {e}") @@ -128,7 +190,7 @@ class StackRun(Subcommand): config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents))) port = args.port or config.server.port - host = config.server.host or ["::", "0.0.0.0"] + host = config.server.host or "0.0.0.0" # Set the config file in environment so create_app can find it os.environ["LLAMA_STACK_CONFIG"] = str(config_file) @@ -140,6 +202,7 @@ class StackRun(Subcommand): "lifespan": "on", "log_level": logger.getEffectiveLevel(), "log_config": logger_config, + "workers": config.server.workers, } keyfile = config.server.tls_keyfile @@ -340,3 +403,44 @@ class StackRun(Subcommand): ) except Exception as e: logger.error(f"Failed to start UI development server in {ui_dir}: {e}") + + def _generate_run_config_from_providers(self, providers: dict[str, list[Provider]]): + apis = list(providers.keys()) + distro_dir = DISTRIBS_BASE_DIR / "providers-run" + # need somewhere to put the storage. + os.makedirs(distro_dir, exist_ok=True) + storage = StorageConfig( + backends={ + "kv_default": SqliteKVStoreConfig( + db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db", + ), + "sql_default": SqliteSqlStoreConfig( + db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db", + ), + }, + stores=ServerStoresConfig( + metadata=KVStoreReference( + backend="kv_default", + namespace="registry", + ), + inference=InferenceStoreReference( + backend="sql_default", + table_name="inference_store", + ), + conversations=SqlStoreReference( + backend="sql_default", + table_name="openai_conversations", + ), + prompts=KVStoreReference( + backend="kv_default", + namespace="prompts", + ), + ), + ) + + return StackRunConfig( + image_name="providers-run", + apis=apis, + providers=providers, + storage=storage, + ) diff --git a/src/llama_stack/core/configure.py b/src/llama_stack/core/configure.py index 734839ea9..5d4a54184 100644 --- a/src/llama_stack/core/configure.py +++ b/src/llama_stack/core/configure.py @@ -17,7 +17,6 @@ from llama_stack.core.distribution import ( get_provider_registry, ) from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars -from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.prompt_for_config import prompt_for_config from llama_stack.log import get_logger @@ -194,19 +193,11 @@ def upgrade_from_routing_table( def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig: - version = config_dict.get("version", None) - if version == LLAMA_STACK_RUN_CONFIG_VERSION: - processed_config_dict = replace_env_vars(config_dict) - return StackRunConfig(**cast_image_name_to_string(processed_config_dict)) - if "routing_table" in config_dict: logger.info("Upgrading config...") config_dict = upgrade_from_routing_table(config_dict) config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION - if not config_dict.get("external_providers_dir", None): - config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR - processed_config_dict = replace_env_vars(config_dict) return StackRunConfig(**cast_image_name_to_string(processed_config_dict)) diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index 95907adcf..2182ea4e5 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -473,6 +473,10 @@ class ServerConfig(BaseModel): "- true: Enable localhost CORS for development\n" "- {allow_origins: [...], allow_methods: [...], ...}: Full configuration", ) + workers: int = Field( + default=1, + description="Number of workers to use for the server", + ) class StackRunConfig(BaseModel): diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py index 37dab4199..6352af00f 100644 --- a/src/llama_stack/core/inspect.py +++ b/src/llama_stack/core/inspect.py @@ -15,6 +15,7 @@ from llama_stack.apis.inspect import ( RouteInfo, VersionInfo, ) +from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.core.datatypes import StackRunConfig from llama_stack.core.external import load_external_apis from llama_stack.core.server.routes import get_all_api_routes @@ -39,9 +40,21 @@ class DistributionInspectImpl(Inspect): async def initialize(self) -> None: pass - async def list_routes(self) -> ListRoutesResponse: + async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse: run_config: StackRunConfig = self.config.run_config + # Helper function to determine if a route should be included based on api_filter + def should_include_route(webmethod) -> bool: + if api_filter is None: + # Default: only non-deprecated v1 APIs + return not webmethod.deprecated and webmethod.level == LLAMA_STACK_API_V1 + elif api_filter == "deprecated": + # Special filter: show deprecated routes regardless of their actual level + return bool(webmethod.deprecated) + else: + # Filter by API level (non-deprecated routes only) + return not webmethod.deprecated and webmethod.level == api_filter + ret = [] external_apis = load_external_apis(run_config) all_endpoints = get_all_api_routes(external_apis) @@ -55,8 +68,8 @@ class DistributionInspectImpl(Inspect): method=next(iter([m for m in e.methods if m != "HEAD"])), provider_types=[], # These APIs don't have "real" providers - they're internal to the stack ) - for e, _ in endpoints - if e.methods is not None + for e, webmethod in endpoints + if e.methods is not None and should_include_route(webmethod) ] ) else: @@ -69,8 +82,8 @@ class DistributionInspectImpl(Inspect): method=next(iter([m for m in e.methods if m != "HEAD"])), provider_types=[p.provider_type for p in providers], ) - for e, _ in endpoints - if e.methods is not None + for e, webmethod in endpoints + if e.methods is not None and should_include_route(webmethod) ] ) diff --git a/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index f1a828413..97fa95a1f 100644 --- a/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -181,3 +181,22 @@ vlm_response = client.chat.completions.create( print(f"VLM Response: {vlm_response.choices[0].message.content}") ``` + +### Rerank Example + +The following example shows how to rerank documents using an NVIDIA NIM. + +```python +rerank_response = client.alpha.inference.rerank( + model="nvidia/nvidia/llama-3.2-nv-rerankqa-1b-v2", + query="query", + items=[ + "item_1", + "item_2", + "item_3", + ], +) + +for i, result in enumerate(rerank_response): + print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]") +``` \ No newline at end of file diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py index 3545d2b11..618bbe078 100644 --- a/src/llama_stack/providers/remote/inference/nvidia/config.py +++ b/src/llama_stack/providers/remote/inference/nvidia/config.py @@ -28,6 +28,7 @@ class NVIDIAConfig(RemoteInferenceProviderConfig): Attributes: url (str): A base url for accessing the NVIDIA NIM, e.g. http://localhost:8000 api_key (str): The access key for the hosted NIM endpoints + rerank_model_to_url (dict[str, str]): Mapping of rerank model identifiers to their API endpoints There are two ways to access NVIDIA NIMs - 0. Hosted: Preview APIs hosted at https://integrate.api.nvidia.com @@ -55,6 +56,14 @@ class NVIDIAConfig(RemoteInferenceProviderConfig): default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false", description="When set to false, the API version will not be appended to the base_url. By default, it is true.", ) + rerank_model_to_url: dict[str, str] = Field( + default_factory=lambda: { + "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking", + "nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking", + "nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking", + }, + description="Mapping of rerank model identifiers to their API endpoints. ", + ) @classmethod def sample_run_config( diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py index ea11b49cd..bc5aa7953 100644 --- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -5,6 +5,19 @@ # the root directory of this source tree. +from collections.abc import Iterable + +import aiohttp + +from llama_stack.apis.inference import ( + RerankData, + RerankResponse, +) +from llama_stack.apis.inference.inference import ( + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, +) +from llama_stack.apis.models import Model, ModelType from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -61,3 +74,101 @@ class NVIDIAInferenceAdapter(OpenAIMixin): :return: The NVIDIA API base URL """ return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url + + async def list_provider_model_ids(self) -> Iterable[str]: + """ + Return both dynamic model IDs and statically configured rerank model IDs. + """ + dynamic_ids: Iterable[str] = [] + try: + dynamic_ids = await super().list_provider_model_ids() + except Exception: + # If the dynamic listing fails, proceed with just configured rerank IDs + dynamic_ids = [] + + configured_rerank_ids = list(self.config.rerank_model_to_url.keys()) + return list(dict.fromkeys(list(dynamic_ids) + configured_rerank_ids)) # remove duplicates + + def construct_model_from_identifier(self, identifier: str) -> Model: + """ + Classify rerank models from config; otherwise use the base behavior. + """ + if identifier in self.config.rerank_model_to_url: + return Model( + provider_id=self.__provider_id__, # type: ignore[attr-defined] + provider_resource_id=identifier, + identifier=identifier, + model_type=ModelType.rerank, + ) + return super().construct_model_from_identifier(identifier) + + async def rerank( + self, + model: str, + query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam, + items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam], + max_num_results: int | None = None, + ) -> RerankResponse: + provider_model_id = await self._get_provider_model_id(model) + + ranking_url = self.get_base_url() + + if _is_nvidia_hosted(self.config) and provider_model_id in self.config.rerank_model_to_url: + ranking_url = self.config.rerank_model_to_url[provider_model_id] + + logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}") + + # Convert query to text format + if isinstance(query, str): + query_text = query + elif isinstance(query, OpenAIChatCompletionContentPartTextParam): + query_text = query.text + else: + raise ValueError("Query must be a string or text content part") + + # Convert items to text format + passages = [] + for item in items: + if isinstance(item, str): + passages.append({"text": item}) + elif isinstance(item, OpenAIChatCompletionContentPartTextParam): + passages.append({"text": item.text}) + else: + raise ValueError("Items must be strings or text content parts") + + payload = { + "model": provider_model_id, + "query": {"text": query_text}, + "passages": passages, + } + + headers = { + "Authorization": f"Bearer {self.get_api_key()}", + "Content-Type": "application/json", + } + + try: + async with aiohttp.ClientSession() as session: + async with session.post(ranking_url, headers=headers, json=payload) as response: + if response.status != 200: + response_text = await response.text() + raise ConnectionError( + f"NVIDIA rerank API request failed with status {response.status}: {response_text}" + ) + + result = await response.json() + rankings = result.get("rankings", []) + + # Convert to RerankData format + rerank_data = [] + for ranking in rankings: + rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"])) + + # Apply max_num_results limit + if max_num_results is not None: + rerank_data = rerank_data[:max_num_results] + + return RerankResponse(data=rerank_data) + + except aiohttp.ClientError as e: + raise ConnectionError(f"Failed to connect to NVIDIA rerank API at {ranking_url}: {e}") from e diff --git a/src/llama_stack/ui/app/api/v1/[...path]/route.ts b/src/llama_stack/ui/app/api/v1/[...path]/route.ts index 51c1f8004..d1aa31014 100644 --- a/src/llama_stack/ui/app/api/v1/[...path]/route.ts +++ b/src/llama_stack/ui/app/api/v1/[...path]/route.ts @@ -51,10 +51,14 @@ async function proxyRequest(request: NextRequest, method: string) { ); // Create response with same status and headers - const proxyResponse = new NextResponse(responseText, { - status: response.status, - statusText: response.statusText, - }); + // Handle 204 No Content responses specially + const proxyResponse = + response.status === 204 + ? new NextResponse(null, { status: 204 }) + : new NextResponse(responseText, { + status: response.status, + statusText: response.statusText, + }); // Copy response headers (except problematic ones) response.headers.forEach((value, key) => { diff --git a/src/llama_stack/ui/app/prompts/page.tsx b/src/llama_stack/ui/app/prompts/page.tsx new file mode 100644 index 000000000..30106a056 --- /dev/null +++ b/src/llama_stack/ui/app/prompts/page.tsx @@ -0,0 +1,5 @@ +import { PromptManagement } from "@/components/prompts"; + +export default function PromptsPage() { + return ; +} diff --git a/src/llama_stack/ui/components/layout/app-sidebar.tsx b/src/llama_stack/ui/components/layout/app-sidebar.tsx index 373f0c5ae..a5df60aef 100644 --- a/src/llama_stack/ui/components/layout/app-sidebar.tsx +++ b/src/llama_stack/ui/components/layout/app-sidebar.tsx @@ -8,6 +8,7 @@ import { MessageCircle, Settings2, Compass, + FileText, } from "lucide-react"; import Link from "next/link"; import { usePathname } from "next/navigation"; @@ -50,6 +51,11 @@ const manageItems = [ url: "/logs/vector-stores", icon: Database, }, + { + title: "Prompts", + url: "/prompts", + icon: FileText, + }, { title: "Documentation", url: "https://llama-stack.readthedocs.io/en/latest/references/api_reference/index.html", diff --git a/src/llama_stack/ui/components/prompts/index.ts b/src/llama_stack/ui/components/prompts/index.ts new file mode 100644 index 000000000..d190c5eb6 --- /dev/null +++ b/src/llama_stack/ui/components/prompts/index.ts @@ -0,0 +1,4 @@ +export { PromptManagement } from "./prompt-management"; +export { PromptList } from "./prompt-list"; +export { PromptEditor } from "./prompt-editor"; +export * from "./types"; diff --git a/src/llama_stack/ui/components/prompts/prompt-editor.test.tsx b/src/llama_stack/ui/components/prompts/prompt-editor.test.tsx new file mode 100644 index 000000000..458a5f942 --- /dev/null +++ b/src/llama_stack/ui/components/prompts/prompt-editor.test.tsx @@ -0,0 +1,309 @@ +import React from "react"; +import { render, screen, fireEvent } from "@testing-library/react"; +import "@testing-library/jest-dom"; +import { PromptEditor } from "./prompt-editor"; +import type { Prompt, PromptFormData } from "./types"; + +describe("PromptEditor", () => { + const mockOnSave = jest.fn(); + const mockOnCancel = jest.fn(); + const mockOnDelete = jest.fn(); + + const defaultProps = { + onSave: mockOnSave, + onCancel: mockOnCancel, + onDelete: mockOnDelete, + }; + + beforeEach(() => { + jest.clearAllMocks(); + }); + + describe("Create Mode", () => { + test("renders create form correctly", () => { + render(); + + expect(screen.getByLabelText("Prompt Content *")).toBeInTheDocument(); + expect(screen.getByText("Variables")).toBeInTheDocument(); + expect(screen.getByText("Preview")).toBeInTheDocument(); + expect(screen.getByText("Create Prompt")).toBeInTheDocument(); + expect(screen.getByText("Cancel")).toBeInTheDocument(); + }); + + test("shows preview placeholder when no content", () => { + render(); + + expect( + screen.getByText("Enter content to preview the compiled prompt") + ).toBeInTheDocument(); + }); + + test("submits form with correct data", () => { + render(); + + const promptInput = screen.getByLabelText("Prompt Content *"); + fireEvent.change(promptInput, { + target: { value: "Hello {{name}}, welcome!" }, + }); + + fireEvent.click(screen.getByText("Create Prompt")); + + expect(mockOnSave).toHaveBeenCalledWith({ + prompt: "Hello {{name}}, welcome!", + variables: [], + }); + }); + + test("prevents submission with empty prompt", () => { + render(); + + fireEvent.click(screen.getByText("Create Prompt")); + + expect(mockOnSave).not.toHaveBeenCalled(); + }); + }); + + describe("Edit Mode", () => { + const mockPrompt: Prompt = { + prompt_id: "prompt_123", + prompt: "Hello {{name}}, how is {{weather}}?", + version: 1, + variables: ["name", "weather"], + is_default: true, + }; + + test("renders edit form with existing data", () => { + render(); + + expect( + screen.getByDisplayValue("Hello {{name}}, how is {{weather}}?") + ).toBeInTheDocument(); + expect(screen.getAllByText("name")).toHaveLength(2); // One in variables, one in preview + expect(screen.getAllByText("weather")).toHaveLength(2); // One in variables, one in preview + expect(screen.getByText("Update Prompt")).toBeInTheDocument(); + expect(screen.getByText("Delete Prompt")).toBeInTheDocument(); + }); + + test("submits updated data correctly", () => { + render(); + + const promptInput = screen.getByLabelText("Prompt Content *"); + fireEvent.change(promptInput, { + target: { value: "Updated: Hello {{name}}!" }, + }); + + fireEvent.click(screen.getByText("Update Prompt")); + + expect(mockOnSave).toHaveBeenCalledWith({ + prompt: "Updated: Hello {{name}}!", + variables: ["name", "weather"], + }); + }); + }); + + describe("Variables Management", () => { + test("adds new variable", () => { + render(); + + const variableInput = screen.getByPlaceholderText( + "Add variable name (e.g. user_name, topic)" + ); + fireEvent.change(variableInput, { target: { value: "testVar" } }); + fireEvent.click(screen.getByText("Add")); + + expect(screen.getByText("testVar")).toBeInTheDocument(); + }); + + test("prevents adding duplicate variables", () => { + render(); + + const variableInput = screen.getByPlaceholderText( + "Add variable name (e.g. user_name, topic)" + ); + + // Add first variable + fireEvent.change(variableInput, { target: { value: "test" } }); + fireEvent.click(screen.getByText("Add")); + + // Try to add same variable again + fireEvent.change(variableInput, { target: { value: "test" } }); + + // Button should be disabled + expect(screen.getByText("Add")).toBeDisabled(); + }); + + test("removes variable", () => { + const mockPrompt: Prompt = { + prompt_id: "prompt_123", + prompt: "Hello {{name}}", + version: 1, + variables: ["name", "location"], + is_default: true, + }; + + render(); + + // Check that both variables are present initially + expect(screen.getAllByText("name").length).toBeGreaterThan(0); + expect(screen.getAllByText("location").length).toBeGreaterThan(0); + + // Remove the location variable by clicking the X button with the specific title + const removeLocationButton = screen.getByTitle( + "Remove location variable" + ); + fireEvent.click(removeLocationButton); + + // Name should still be there, location should be gone from the variables section + expect(screen.getAllByText("name").length).toBeGreaterThan(0); + expect( + screen.queryByTitle("Remove location variable") + ).not.toBeInTheDocument(); + }); + + test("adds variable on Enter key", () => { + render(); + + const variableInput = screen.getByPlaceholderText( + "Add variable name (e.g. user_name, topic)" + ); + fireEvent.change(variableInput, { target: { value: "enterVar" } }); + + // Simulate Enter key press + fireEvent.keyPress(variableInput, { + key: "Enter", + code: "Enter", + charCode: 13, + preventDefault: jest.fn(), + }); + + // Check if the variable was added by looking for the badge + expect(screen.getAllByText("enterVar").length).toBeGreaterThan(0); + }); + }); + + describe("Preview Functionality", () => { + test("shows live preview with variables", () => { + render(); + + // Add prompt content + const promptInput = screen.getByLabelText("Prompt Content *"); + fireEvent.change(promptInput, { + target: { value: "Hello {{name}}, welcome to {{place}}!" }, + }); + + // Add variables + const variableInput = screen.getByPlaceholderText( + "Add variable name (e.g. user_name, topic)" + ); + fireEvent.change(variableInput, { target: { value: "name" } }); + fireEvent.click(screen.getByText("Add")); + + fireEvent.change(variableInput, { target: { value: "place" } }); + fireEvent.click(screen.getByText("Add")); + + // Check that preview area shows the content + expect(screen.getByText("Compiled Prompt")).toBeInTheDocument(); + }); + + test("shows variable value inputs in preview", () => { + const mockPrompt: Prompt = { + prompt_id: "prompt_123", + prompt: "Hello {{name}}", + version: 1, + variables: ["name"], + is_default: true, + }; + + render(); + + expect(screen.getByText("Variable Values")).toBeInTheDocument(); + expect( + screen.getByPlaceholderText("Enter value for name") + ).toBeInTheDocument(); + }); + + test("shows color legend for variable states", () => { + render(); + + // Add content to show preview + const promptInput = screen.getByLabelText("Prompt Content *"); + fireEvent.change(promptInput, { + target: { value: "Hello {{name}}" }, + }); + + expect(screen.getByText("Used")).toBeInTheDocument(); + expect(screen.getByText("Unused")).toBeInTheDocument(); + expect(screen.getByText("Undefined")).toBeInTheDocument(); + }); + }); + + describe("Error Handling", () => { + test("displays error message", () => { + const errorMessage = "Prompt contains undeclared variables"; + render(); + + expect(screen.getByText(errorMessage)).toBeInTheDocument(); + }); + }); + + describe("Delete Functionality", () => { + const mockPrompt: Prompt = { + prompt_id: "prompt_123", + prompt: "Hello {{name}}", + version: 1, + variables: ["name"], + is_default: true, + }; + + test("shows delete button in edit mode", () => { + render(); + + expect(screen.getByText("Delete Prompt")).toBeInTheDocument(); + }); + + test("hides delete button in create mode", () => { + render(); + + expect(screen.queryByText("Delete Prompt")).not.toBeInTheDocument(); + }); + + test("calls onDelete with confirmation", () => { + const originalConfirm = window.confirm; + window.confirm = jest.fn(() => true); + + render(); + + fireEvent.click(screen.getByText("Delete Prompt")); + + expect(window.confirm).toHaveBeenCalledWith( + "Are you sure you want to delete this prompt? This action cannot be undone." + ); + expect(mockOnDelete).toHaveBeenCalledWith("prompt_123"); + + window.confirm = originalConfirm; + }); + + test("does not delete when confirmation is cancelled", () => { + const originalConfirm = window.confirm; + window.confirm = jest.fn(() => false); + + render(); + + fireEvent.click(screen.getByText("Delete Prompt")); + + expect(mockOnDelete).not.toHaveBeenCalled(); + + window.confirm = originalConfirm; + }); + }); + + describe("Cancel Functionality", () => { + test("calls onCancel when cancel button is clicked", () => { + render(); + + fireEvent.click(screen.getByText("Cancel")); + + expect(mockOnCancel).toHaveBeenCalled(); + }); + }); +}); diff --git a/src/llama_stack/ui/components/prompts/prompt-editor.tsx b/src/llama_stack/ui/components/prompts/prompt-editor.tsx new file mode 100644 index 000000000..efa76f757 --- /dev/null +++ b/src/llama_stack/ui/components/prompts/prompt-editor.tsx @@ -0,0 +1,346 @@ +"use client"; + +import { useState, useEffect } from "react"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { Textarea } from "@/components/ui/textarea"; +import { Badge } from "@/components/ui/badge"; +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from "@/components/ui/card"; +import { Separator } from "@/components/ui/separator"; +import { X, Plus, Save, Trash2 } from "lucide-react"; +import { Prompt, PromptFormData } from "./types"; + +interface PromptEditorProps { + prompt?: Prompt; + onSave: (prompt: PromptFormData) => void; + onCancel: () => void; + onDelete?: (promptId: string) => void; + error?: string | null; +} + +export function PromptEditor({ + prompt, + onSave, + onCancel, + onDelete, + error, +}: PromptEditorProps) { + const [formData, setFormData] = useState({ + prompt: "", + variables: [], + }); + + const [newVariable, setNewVariable] = useState(""); + const [variableValues, setVariableValues] = useState>( + {} + ); + + useEffect(() => { + if (prompt) { + setFormData({ + prompt: prompt.prompt || "", + variables: prompt.variables || [], + }); + } + }, [prompt]); + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault(); + if (!formData.prompt.trim()) { + return; + } + onSave(formData); + }; + + const addVariable = () => { + if ( + newVariable.trim() && + !formData.variables.includes(newVariable.trim()) + ) { + setFormData(prev => ({ + ...prev, + variables: [...prev.variables, newVariable.trim()], + })); + setNewVariable(""); + } + }; + + const removeVariable = (variableToRemove: string) => { + setFormData(prev => ({ + ...prev, + variables: prev.variables.filter( + variable => variable !== variableToRemove + ), + })); + }; + + const renderPreview = () => { + const text = formData.prompt; + if (!text) return text; + + // Split text by variable patterns and process each part + const parts = text.split(/(\{\{\s*\w+\s*\}\})/g); + + return parts.map((part, index) => { + const variableMatch = part.match(/\{\{\s*(\w+)\s*\}\}/); + if (variableMatch) { + const variableName = variableMatch[1]; + const isDefined = formData.variables.includes(variableName); + const value = variableValues[variableName]; + + if (!isDefined) { + // Variable not in variables list - likely a typo/bug (RED) + return ( + + {part} + + ); + } else if (value && value.trim()) { + // Variable defined and has value - show the value (GREEN) + return ( + + {value} + + ); + } else { + // Variable defined but empty (YELLOW) + return ( + + {part} + + ); + } + } + return part; + }); + }; + + const updateVariableValue = (variable: string, value: string) => { + setVariableValues(prev => ({ + ...prev, + [variable]: value, + })); + }; + + return ( +
+ {error && ( +
+

{error}

+
+ )} +
+ {/* Form Section */} +
+
+ +