mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-04 10:10:36 +00:00
Merge 7a19488787 into sapling-pr-archive-ehhuang
This commit is contained in:
commit
202a28f8ca
71 changed files with 3537 additions and 39048 deletions
60
.github/actions/install-llama-stack-client/action.yml
vendored
Normal file
60
.github/actions/install-llama-stack-client/action.yml
vendored
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
name: Install llama-stack-client
|
||||
description: Install llama-stack-client based on branch context and client-version input
|
||||
|
||||
inputs:
|
||||
client-version:
|
||||
description: 'Client version to install on non-release branches (latest or published). Ignored on release branches.'
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
outputs:
|
||||
uv-extra-index-url:
|
||||
description: 'UV_EXTRA_INDEX_URL to use (set for release branches)'
|
||||
value: ${{ steps.configure.outputs.uv-extra-index-url }}
|
||||
install-after-sync:
|
||||
description: 'Whether to install client after uv sync'
|
||||
value: ${{ steps.configure.outputs.install-after-sync }}
|
||||
install-source:
|
||||
description: 'Where to install client from after sync'
|
||||
value: ${{ steps.configure.outputs.install-source }}
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Configure client installation
|
||||
id: configure
|
||||
shell: bash
|
||||
run: |
|
||||
# Determine the branch we're working with
|
||||
BRANCH="${{ github.base_ref || github.ref }}"
|
||||
BRANCH="${BRANCH#refs/heads/}"
|
||||
|
||||
echo "Working with branch: $BRANCH"
|
||||
|
||||
# On release branches: use test.pypi for uv sync, then install from git
|
||||
# On non-release branches: install based on client-version after sync
|
||||
if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
|
||||
echo "Detected release branch: $BRANCH"
|
||||
|
||||
# Check if matching branch exists in client repo
|
||||
if ! git ls-remote --exit-code --heads https://github.com/llamastack/llama-stack-client-python.git "$BRANCH" > /dev/null 2>&1; then
|
||||
echo "::error::Branch $BRANCH not found in llama-stack-client-python repository"
|
||||
echo "::error::Please create the matching release branch in llama-stack-client-python before testing"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configure to use test.pypi as extra index (PyPI is primary)
|
||||
echo "uv-extra-index-url=https://test.pypi.org/simple/" >> $GITHUB_OUTPUT
|
||||
echo "install-after-sync=true" >> $GITHUB_OUTPUT
|
||||
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@$BRANCH" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ inputs.client-version }}" = "latest" ]; then
|
||||
# Install from main git after sync
|
||||
echo "install-after-sync=true" >> $GITHUB_OUTPUT
|
||||
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@main" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
||||
# Use published version from PyPI (installed by sync)
|
||||
echo "install-after-sync=false" >> $GITHUB_OUTPUT
|
||||
elif [ -n "${{ inputs.client-version }}" ]; then
|
||||
echo "::error::Invalid client-version: ${{ inputs.client-version }}"
|
||||
exit 1
|
||||
fi
|
||||
30
.github/actions/setup-runner/action.yml
vendored
30
.github/actions/setup-runner/action.yml
vendored
|
|
@ -18,25 +18,35 @@ runs:
|
|||
python-version: ${{ inputs.python-version }}
|
||||
version: 0.7.6
|
||||
|
||||
- name: Configure client installation
|
||||
id: client-config
|
||||
uses: ./.github/actions/install-llama-stack-client
|
||||
with:
|
||||
client-version: ${{ inputs.client-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
env:
|
||||
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
|
||||
run: |
|
||||
# Export UV env vars for current step and persist to GITHUB_ENV for subsequent steps
|
||||
if [ -n "$UV_EXTRA_INDEX_URL" ]; then
|
||||
export UV_INDEX_STRATEGY=unsafe-best-match
|
||||
echo "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" >> $GITHUB_ENV
|
||||
echo "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" >> $GITHUB_ENV
|
||||
echo "Exported UV environment variables for current and subsequent steps"
|
||||
fi
|
||||
|
||||
echo "Updating project dependencies via uv sync"
|
||||
uv sync --all-groups
|
||||
|
||||
echo "Installing ad-hoc dependencies"
|
||||
uv pip install faiss-cpu
|
||||
|
||||
# Install llama-stack-client-python based on the client-version input
|
||||
if [ "${{ inputs.client-version }}" = "latest" ]; then
|
||||
echo "Installing latest llama-stack-client-python from main branch"
|
||||
uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@main
|
||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
||||
echo "Installing published llama-stack-client-python from PyPI"
|
||||
uv pip install llama-stack-client
|
||||
else
|
||||
echo "Invalid client-version: ${{ inputs.client-version }}"
|
||||
exit 1
|
||||
# Install specific client version after sync if needed
|
||||
if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
|
||||
echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
|
||||
uv pip install ${{ steps.client-config.outputs.install-source }}
|
||||
fi
|
||||
|
||||
echo "Installed llama packages"
|
||||
|
|
|
|||
|
|
@ -42,36 +42,7 @@ runs:
|
|||
- name: Build Llama Stack
|
||||
shell: bash
|
||||
run: |
|
||||
# Install llama-stack-client-python based on the client-version input
|
||||
if [ "${{ inputs.client-version }}" = "latest" ]; then
|
||||
# Check if PR is targeting a release branch
|
||||
TARGET_BRANCH="${{ github.base_ref }}"
|
||||
|
||||
if [[ "$TARGET_BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x-maint$ ]]; then
|
||||
echo "PR targets release branch: $TARGET_BRANCH"
|
||||
echo "Checking if matching branch exists in llama-stack-client-python..."
|
||||
|
||||
# Check if the branch exists in the client repo
|
||||
if git ls-remote --exit-code --heads https://github.com/llamastack/llama-stack-client-python.git "$TARGET_BRANCH" > /dev/null 2>&1; then
|
||||
echo "Installing llama-stack-client-python from matching branch: $TARGET_BRANCH"
|
||||
export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@$TARGET_BRANCH
|
||||
else
|
||||
echo "::error::Branch $TARGET_BRANCH not found in llama-stack-client-python repository"
|
||||
echo "::error::Please create the matching release branch in llama-stack-client-python before testing"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Installing latest llama-stack-client-python from main branch"
|
||||
export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@main
|
||||
fi
|
||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
||||
echo "Installing published llama-stack-client-python from PyPI"
|
||||
unset LLAMA_STACK_CLIENT_DIR
|
||||
else
|
||||
echo "Invalid client-version: ${{ inputs.client-version }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Client is already installed by setup-runner (handles both main and release branches)
|
||||
echo "Building Llama Stack"
|
||||
|
||||
LLAMA_STACK_DIR=. \
|
||||
|
|
|
|||
1
.github/workflows/README.md
vendored
1
.github/workflows/README.md
vendored
|
|
@ -13,7 +13,6 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
|||
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
|
||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
|
||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||
| Test llama stack list-deps | [providers-list-deps.yml](providers-list-deps.yml) | Test llama stack list-deps |
|
||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||
|
|
|
|||
8
.github/workflows/backward-compat.yml
vendored
8
.github/workflows/backward-compat.yml
vendored
|
|
@ -6,7 +6,9 @@ on:
|
|||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.[0-9]+.[0-9]+'
|
||||
- 'release-[0-9]+.[0-9]+.[0-9]+'
|
||||
- 'release-[0-9]+.[0-9]+'
|
||||
paths:
|
||||
- 'src/llama_stack/core/datatypes.py'
|
||||
- 'src/llama_stack/providers/datatypes.py'
|
||||
|
|
@ -35,7 +37,7 @@ jobs:
|
|||
python-version: '3.12'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
|
|
@ -413,7 +415,7 @@ jobs:
|
|||
python-version: '3.12'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
|
|
|
|||
1
.github/workflows/conformance.yml
vendored
1
.github/workflows/conformance.yml
vendored
|
|
@ -22,7 +22,6 @@ on:
|
|||
- 'docs/static/stable-llama-stack-spec.yaml' # Stable APIs spec
|
||||
- 'docs/static/experimental-llama-stack-spec.yaml' # Experimental APIs spec
|
||||
- 'docs/static/deprecated-llama-stack-spec.yaml' # Deprecated APIs spec
|
||||
- 'docs/static/llama-stack-spec.html' # Legacy HTML spec
|
||||
- '.github/workflows/conformance.yml' # This workflow itself
|
||||
|
||||
concurrency:
|
||||
|
|
|
|||
10
.github/workflows/install-script-ci.yml
vendored
10
.github/workflows/install-script-ci.yml
vendored
|
|
@ -30,10 +30,16 @@ jobs:
|
|||
|
||||
- name: Build a single provider
|
||||
run: |
|
||||
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=starter"
|
||||
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
||||
fi
|
||||
if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
|
||||
fi
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=starter \
|
||||
$BUILD_ARGS \
|
||||
--tag llama-stack:starter-ci
|
||||
|
||||
- name: Run installer end-to-end
|
||||
|
|
|
|||
4
.github/workflows/integration-auth-tests.yml
vendored
4
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -6,11 +6,11 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
paths:
|
||||
- 'distributions/**'
|
||||
- 'src/llama_stack/**'
|
||||
|
|
|
|||
|
|
@ -6,11 +6,11 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
paths:
|
||||
- 'src/llama_stack/providers/utils/sqlstore/**'
|
||||
- 'tests/integration/sqlstore/**'
|
||||
|
|
|
|||
4
.github/workflows/integration-tests.yml
vendored
4
.github/workflows/integration-tests.yml
vendored
|
|
@ -6,11 +6,11 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
types: [opened, synchronize, reopened]
|
||||
paths:
|
||||
- 'src/llama_stack/**'
|
||||
|
|
|
|||
|
|
@ -6,11 +6,11 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
paths:
|
||||
- 'src/llama_stack/**'
|
||||
- '!src/llama_stack/ui/**'
|
||||
|
|
|
|||
29
.github/workflows/pre-commit.yml
vendored
29
.github/workflows/pre-commit.yml
vendored
|
|
@ -7,7 +7,7 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||
|
|
@ -46,7 +46,7 @@ jobs:
|
|||
cache-dependency-path: 'src/llama_stack/ui/'
|
||||
|
||||
- name: Set up uv
|
||||
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
|
||||
- name: Install npm dependencies
|
||||
run: npm ci
|
||||
|
|
@ -130,11 +130,34 @@ jobs:
|
|||
exit 1
|
||||
fi
|
||||
|
||||
- name: Configure client installation
|
||||
id: client-config
|
||||
uses: ./.github/actions/install-llama-stack-client
|
||||
|
||||
- name: Sync dev + type_checking dependencies
|
||||
run: uv sync --group dev --group type_checking
|
||||
env:
|
||||
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
|
||||
run: |
|
||||
if [ -n "$UV_EXTRA_INDEX_URL" ]; then
|
||||
export UV_INDEX_STRATEGY="unsafe-best-match"
|
||||
fi
|
||||
|
||||
uv sync --group dev --group type_checking
|
||||
|
||||
# Install specific client version after sync if needed
|
||||
if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
|
||||
echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
|
||||
uv pip install ${{ steps.client-config.outputs.install-source }}
|
||||
fi
|
||||
|
||||
- name: Run mypy (full type_checking)
|
||||
env:
|
||||
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
|
||||
run: |
|
||||
if [ -n "$UV_EXTRA_INDEX_URL" ]; then
|
||||
export UV_INDEX_STRATEGY="unsafe-best-match"
|
||||
fi
|
||||
|
||||
set +e
|
||||
uv run --group dev --group type_checking mypy
|
||||
status=$?
|
||||
|
|
|
|||
227
.github/workflows/precommit-trigger.yml
vendored
227
.github/workflows/precommit-trigger.yml
vendored
|
|
@ -1,227 +0,0 @@
|
|||
name: Pre-commit Bot
|
||||
|
||||
run-name: Pre-commit bot for PR #${{ github.event.issue.number }}
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
pre-commit:
|
||||
# Only run on pull request comments
|
||||
if: github.event.issue.pull_request && contains(github.event.comment.body, '@github-actions run precommit')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- name: Check comment author and get PR details
|
||||
id: check_author
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
// Get PR details
|
||||
const pr = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: context.issue.number
|
||||
});
|
||||
|
||||
// Check if commenter has write access or is the PR author
|
||||
const commenter = context.payload.comment.user.login;
|
||||
const prAuthor = pr.data.user.login;
|
||||
|
||||
let hasPermission = false;
|
||||
|
||||
// Check if commenter is PR author
|
||||
if (commenter === prAuthor) {
|
||||
hasPermission = true;
|
||||
console.log(`Comment author ${commenter} is the PR author`);
|
||||
} else {
|
||||
// Check if commenter has write/admin access
|
||||
try {
|
||||
const permission = await github.rest.repos.getCollaboratorPermissionLevel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
username: commenter
|
||||
});
|
||||
|
||||
const level = permission.data.permission;
|
||||
hasPermission = ['write', 'admin', 'maintain'].includes(level);
|
||||
console.log(`Comment author ${commenter} has permission: ${level}`);
|
||||
} catch (error) {
|
||||
console.log(`Could not check permissions for ${commenter}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasPermission) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: `❌ @${commenter} You don't have permission to trigger pre-commit. Only PR authors or repository collaborators can run this command.`
|
||||
});
|
||||
core.setFailed(`User ${commenter} does not have permission`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Save PR info for later steps
|
||||
core.setOutput('pr_number', context.issue.number);
|
||||
core.setOutput('pr_head_ref', pr.data.head.ref);
|
||||
core.setOutput('pr_head_sha', pr.data.head.sha);
|
||||
core.setOutput('pr_head_repo', pr.data.head.repo.full_name);
|
||||
core.setOutput('pr_base_ref', pr.data.base.ref);
|
||||
core.setOutput('is_fork', pr.data.head.repo.full_name !== context.payload.repository.full_name);
|
||||
core.setOutput('authorized', 'true');
|
||||
|
||||
- name: React to comment
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.reactions.createForIssueComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: context.payload.comment.id,
|
||||
content: 'rocket'
|
||||
});
|
||||
|
||||
- name: Comment starting
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `⏳ Running [pre-commit hooks](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) on PR #${{ steps.check_author.outputs.pr_number }}...`
|
||||
});
|
||||
|
||||
- name: Checkout PR branch (same-repo)
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'false'
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
ref: ${{ steps.check_author.outputs.pr_head_ref }}
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Checkout PR branch (fork)
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'true'
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
repository: ${{ steps.check_author.outputs.pr_head_repo }}
|
||||
ref: ${{ steps.check_author.outputs.pr_head_ref }}
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Verify checkout
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
run: |
|
||||
echo "Current SHA: $(git rev-parse HEAD)"
|
||||
echo "Expected SHA: ${{ steps.check_author.outputs.pr_head_sha }}"
|
||||
if [[ "$(git rev-parse HEAD)" != "${{ steps.check_author.outputs.pr_head_sha }}" ]]; then
|
||||
echo "::error::Checked out SHA does not match expected SHA"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Set up Python
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
cache: pip
|
||||
cache-dependency-path: |
|
||||
**/requirements*.txt
|
||||
.pre-commit-config.yaml
|
||||
|
||||
- name: Set up Node.js
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: 'npm'
|
||||
cache-dependency-path: 'src/llama_stack/ui/'
|
||||
|
||||
- name: Install npm dependencies
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
run: npm ci
|
||||
working-directory: src/llama_stack/ui
|
||||
|
||||
- name: Run pre-commit
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
id: precommit
|
||||
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||
continue-on-error: true
|
||||
env:
|
||||
SKIP: no-commit-to-branch
|
||||
RUFF_OUTPUT_FORMAT: github
|
||||
|
||||
- name: Check for changes
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
id: changes
|
||||
run: |
|
||||
if ! git diff --exit-code || [ -n "$(git ls-files --others --exclude-standard)" ]; then
|
||||
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||
echo "Changes detected after pre-commit"
|
||||
else
|
||||
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||
echo "No changes after pre-commit"
|
||||
fi
|
||||
|
||||
- name: Commit and push changes
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||
run: |
|
||||
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git config --local user.name "github-actions[bot]"
|
||||
|
||||
git add -A
|
||||
git commit -m "style: apply pre-commit fixes
|
||||
|
||||
🤖 Applied by @github-actions bot via pre-commit workflow"
|
||||
|
||||
# Push changes
|
||||
git push origin HEAD:${{ steps.check_author.outputs.pr_head_ref }}
|
||||
|
||||
- name: Comment success with changes
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `✅ Pre-commit hooks completed successfully!\n\n🔧 Changes have been committed and pushed to the PR branch.`
|
||||
});
|
||||
|
||||
- name: Comment success without changes
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `✅ Pre-commit hooks passed!\n\n✨ No changes needed - your code is already formatted correctly.`
|
||||
});
|
||||
|
||||
- name: Comment failure
|
||||
if: failure()
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `❌ Pre-commit workflow failed!\n\nPlease check the [workflow logs](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) for details.`
|
||||
});
|
||||
38
.github/workflows/providers-build.yml
vendored
38
.github/workflows/providers-build.yml
vendored
|
|
@ -72,10 +72,16 @@ jobs:
|
|||
- name: Build container image
|
||||
if: matrix.image-type == 'container'
|
||||
run: |
|
||||
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=${{ matrix.distro }}"
|
||||
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
||||
fi
|
||||
if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
|
||||
fi
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=${{ matrix.distro }} \
|
||||
$BUILD_ARGS \
|
||||
--tag llama-stack:${{ matrix.distro }}-ci
|
||||
|
||||
- name: Print dependencies in the image
|
||||
|
|
@ -108,12 +114,18 @@ jobs:
|
|||
- name: Build container image
|
||||
run: |
|
||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
|
||||
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml"
|
||||
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
||||
fi
|
||||
if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
|
||||
fi
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=ci-tests \
|
||||
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||
--build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
|
||||
$BUILD_ARGS \
|
||||
-t llama-stack:ci-tests
|
||||
|
||||
- name: Inspect the container image entrypoint
|
||||
|
|
@ -148,12 +160,18 @@ jobs:
|
|||
- name: Build UBI9 container image
|
||||
run: |
|
||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
|
||||
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml"
|
||||
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
|
||||
fi
|
||||
if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
|
||||
fi
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=ci-tests \
|
||||
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||
--build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
|
||||
$BUILD_ARGS \
|
||||
-t llama-stack:ci-tests-ubi9
|
||||
|
||||
- name: Inspect UBI9 image
|
||||
|
|
|
|||
2
.github/workflows/python-build-test.yml
vendored
2
.github/workflows/python-build-test.yml
vendored
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
activate-environment: true
|
||||
|
|
|
|||
4
.github/workflows/unit-tests.yml
vendored
4
.github/workflows/unit-tests.yml
vendored
|
|
@ -6,11 +6,11 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x-maint'
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
paths:
|
||||
- 'src/llama_stack/**'
|
||||
- '!src/llama_stack/ui/**'
|
||||
|
|
|
|||
|
|
@ -52,10 +52,6 @@ repos:
|
|||
additional_dependencies:
|
||||
- black==24.3.0
|
||||
|
||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||
rev: 0.7.20
|
||||
hooks:
|
||||
- id: uv-lock
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.18.2
|
||||
|
|
@ -63,22 +59,13 @@ repos:
|
|||
- id: mypy
|
||||
additional_dependencies:
|
||||
- uv==0.6.2
|
||||
- mypy
|
||||
- pytest
|
||||
- rich
|
||||
- types-requests
|
||||
- pydantic
|
||||
- httpx
|
||||
pass_filenames: false
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: mypy-full
|
||||
name: mypy (full type_checking)
|
||||
entry: uv run --group dev --group type_checking mypy
|
||||
language: system
|
||||
pass_filenames: false
|
||||
stages: [manual]
|
||||
|
||||
# - repo: https://github.com/tcort/markdown-link-check
|
||||
# rev: v3.11.2
|
||||
# hooks:
|
||||
|
|
@ -87,11 +74,26 @@ repos:
|
|||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: uv-lock
|
||||
name: uv-lock
|
||||
additional_dependencies:
|
||||
- uv==0.7.20
|
||||
entry: ./scripts/uv-run-with-index.sh lock
|
||||
language: python
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
files: ^(pyproject\.toml|uv\.lock)$
|
||||
- id: mypy-full
|
||||
name: mypy (full type_checking)
|
||||
entry: ./scripts/uv-run-with-index.sh run --group dev --group type_checking mypy
|
||||
language: system
|
||||
pass_filenames: false
|
||||
stages: [manual]
|
||||
- id: distro-codegen
|
||||
name: Distribution Template Codegen
|
||||
additional_dependencies:
|
||||
- uv==0.7.8
|
||||
entry: uv run --group codegen ./scripts/distro_codegen.py
|
||||
entry: ./scripts/uv-run-with-index.sh run --group codegen ./scripts/distro_codegen.py
|
||||
language: python
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
|
|
@ -100,7 +102,7 @@ repos:
|
|||
name: Provider Codegen
|
||||
additional_dependencies:
|
||||
- uv==0.7.8
|
||||
entry: uv run --group codegen ./scripts/provider_codegen.py
|
||||
entry: ./scripts/uv-run-with-index.sh run --group codegen ./scripts/provider_codegen.py
|
||||
language: python
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
|
|
@ -109,7 +111,7 @@ repos:
|
|||
name: API Spec Codegen
|
||||
additional_dependencies:
|
||||
- uv==0.7.8
|
||||
entry: sh -c 'uv run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null'
|
||||
entry: sh -c './scripts/uv-run-with-index.sh run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null'
|
||||
language: python
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
|
|
@ -150,7 +152,7 @@ repos:
|
|||
name: Generate CI documentation
|
||||
additional_dependencies:
|
||||
- uv==0.7.8
|
||||
entry: uv run ./scripts/gen-ci-docs.py
|
||||
entry: ./scripts/uv-run-with-index.sh run ./scripts/gen-ci-docs.py
|
||||
language: python
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
|
|
@ -162,6 +164,7 @@ repos:
|
|||
files: ^src/llama_stack/ui/.*\.(ts|tsx)$
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
|
||||
- id: check-log-usage
|
||||
name: Ensure 'llama_stack.log' usage for logging
|
||||
entry: bash
|
||||
|
|
@ -197,6 +200,7 @@ repos:
|
|||
echo;
|
||||
exit 1;
|
||||
} || true
|
||||
|
||||
ci:
|
||||
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
|
||||
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
|
||||
|
|
|
|||
|
|
@ -956,7 +956,22 @@ paths:
|
|||
List routes.
|
||||
|
||||
List all available API routes with their methods and implementing providers.
|
||||
parameters: []
|
||||
parameters:
|
||||
- name: api_filter
|
||||
in: query
|
||||
description: >-
|
||||
Optional filter to control which routes are returned. Can be an API level
|
||||
('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
|
||||
or 'deprecated' to show deprecated routes across all levels. If not specified,
|
||||
returns only non-deprecated v1 routes.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
enum:
|
||||
- v1
|
||||
- v1alpha
|
||||
- v1beta
|
||||
- deprecated
|
||||
deprecated: false
|
||||
/v1/models:
|
||||
get:
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@ ARG KEEP_WORKSPACE=""
|
|||
ARG DISTRO_NAME="starter"
|
||||
ARG RUN_CONFIG_PATH=""
|
||||
ARG UV_HTTP_TIMEOUT=500
|
||||
ARG UV_EXTRA_INDEX_URL=""
|
||||
ARG UV_INDEX_STRATEGY=""
|
||||
ENV UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT}
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
|
|
@ -45,7 +47,7 @@ RUN set -eux; \
|
|||
exit 1; \
|
||||
fi
|
||||
|
||||
RUN pip install --no-cache uv
|
||||
RUN pip install --no-cache-dir uv
|
||||
ENV UV_SYSTEM_PYTHON=1
|
||||
|
||||
ENV INSTALL_MODE=${INSTALL_MODE}
|
||||
|
|
@ -62,47 +64,60 @@ COPY . /workspace
|
|||
|
||||
# Install the client package if it is provided
|
||||
# NOTE: this is installed before llama-stack since llama-stack depends on llama-stack-client-python
|
||||
# Unset UV index env vars to ensure we only use PyPI for the client
|
||||
RUN set -eux; \
|
||||
unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
|
||||
if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
|
||||
echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
uv pip install --no-cache -e "$LLAMA_STACK_CLIENT_DIR"; \
|
||||
uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
|
||||
fi;
|
||||
|
||||
# Install llama-stack
|
||||
# Use UV_EXTRA_INDEX_URL inline only for editable install with RC dependencies
|
||||
RUN set -eux; \
|
||||
SAVED_UV_EXTRA_INDEX_URL="${UV_EXTRA_INDEX_URL:-}"; \
|
||||
SAVED_UV_INDEX_STRATEGY="${UV_INDEX_STRATEGY:-}"; \
|
||||
unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \
|
||||
if [ "$INSTALL_MODE" = "editable" ]; then \
|
||||
if [ ! -d "$LLAMA_STACK_DIR" ]; then \
|
||||
echo "INSTALL_MODE=editable requires LLAMA_STACK_DIR to point to a directory inside the build context" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
uv pip install --no-cache -e "$LLAMA_STACK_DIR"; \
|
||||
elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
|
||||
uv pip install --no-cache fastapi libcst; \
|
||||
if [ -n "$TEST_PYPI_VERSION" ]; then \
|
||||
uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
|
||||
if [ -n "$SAVED_UV_EXTRA_INDEX_URL" ] && [ -n "$SAVED_UV_INDEX_STRATEGY" ]; then \
|
||||
UV_EXTRA_INDEX_URL="$SAVED_UV_EXTRA_INDEX_URL" UV_INDEX_STRATEGY="$SAVED_UV_INDEX_STRATEGY" \
|
||||
uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
|
||||
else \
|
||||
uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
|
||||
uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
|
||||
fi; \
|
||||
elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
|
||||
uv pip install --no-cache-dir fastapi libcst; \
|
||||
if [ -n "$TEST_PYPI_VERSION" ]; then \
|
||||
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
|
||||
else \
|
||||
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
|
||||
fi; \
|
||||
else \
|
||||
if [ -n "$PYPI_VERSION" ]; then \
|
||||
uv pip install --no-cache "llama-stack==$PYPI_VERSION"; \
|
||||
uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \
|
||||
else \
|
||||
uv pip install --no-cache llama-stack; \
|
||||
uv pip install --no-cache-dir llama-stack; \
|
||||
fi; \
|
||||
fi;
|
||||
|
||||
# Install the dependencies for the distribution
|
||||
# Explicitly unset UV index env vars to ensure we only use PyPI for distribution deps
|
||||
RUN set -eux; \
|
||||
unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \
|
||||
if [ -z "$DISTRO_NAME" ]; then \
|
||||
echo "DISTRO_NAME must be provided" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
deps="$(llama stack list-deps "$DISTRO_NAME")"; \
|
||||
if [ -n "$deps" ]; then \
|
||||
printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache; \
|
||||
printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
|
|||
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
||||
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||
| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
|
||||
| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
|
|
|
|||
|
|
@ -84,7 +84,6 @@ def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: boo
|
|||
)
|
||||
|
||||
yaml_filename = f"{filename_prefix}llama-stack-spec.yaml"
|
||||
html_filename = f"{filename_prefix}llama-stack-spec.html"
|
||||
|
||||
with open(output_dir / yaml_filename, "w", encoding="utf-8") as fp:
|
||||
y = yaml.YAML()
|
||||
|
|
@ -102,11 +101,6 @@ def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: boo
|
|||
fp,
|
||||
)
|
||||
|
||||
with open(output_dir / html_filename, "w") as fp:
|
||||
spec.write_html(fp, pretty_print=True)
|
||||
|
||||
print(f"Generated {yaml_filename} and {html_filename}")
|
||||
|
||||
def main(output_dir: str):
|
||||
output_dir = Path(output_dir)
|
||||
if not output_dir.exists():
|
||||
|
|
|
|||
14220
docs/static/deprecated-llama-stack-spec.html
vendored
14220
docs/static/deprecated-llama-stack-spec.html
vendored
File diff suppressed because it is too large
Load diff
5552
docs/static/experimental-llama-stack-spec.html
vendored
5552
docs/static/experimental-llama-stack-spec.html
vendored
File diff suppressed because it is too large
Load diff
18
docs/static/llama-stack-spec.html
vendored
18
docs/static/llama-stack-spec.html
vendored
|
|
@ -1258,7 +1258,23 @@
|
|||
],
|
||||
"summary": "List routes.",
|
||||
"description": "List routes.\nList all available API routes with their methods and implementing providers.",
|
||||
"parameters": [],
|
||||
"parameters": [
|
||||
{
|
||||
"name": "api_filter",
|
||||
"in": "query",
|
||||
"description": "Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns only non-deprecated v1 routes.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"v1",
|
||||
"v1alpha",
|
||||
"v1beta",
|
||||
"deprecated"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
|
|
|
|||
17
docs/static/llama-stack-spec.yaml
vendored
17
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -953,7 +953,22 @@ paths:
|
|||
List routes.
|
||||
|
||||
List all available API routes with their methods and implementing providers.
|
||||
parameters: []
|
||||
parameters:
|
||||
- name: api_filter
|
||||
in: query
|
||||
description: >-
|
||||
Optional filter to control which routes are returned. Can be an API level
|
||||
('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
|
||||
or 'deprecated' to show deprecated routes across all levels. If not specified,
|
||||
returns only non-deprecated v1 routes.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
enum:
|
||||
- v1
|
||||
- v1alpha
|
||||
- v1beta
|
||||
- deprecated
|
||||
deprecated: false
|
||||
/v1/models:
|
||||
get:
|
||||
|
|
|
|||
18730
docs/static/stainless-llama-stack-spec.html
vendored
18730
docs/static/stainless-llama-stack-spec.html
vendored
File diff suppressed because it is too large
Load diff
17
docs/static/stainless-llama-stack-spec.yaml
vendored
17
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -956,7 +956,22 @@ paths:
|
|||
List routes.
|
||||
|
||||
List all available API routes with their methods and implementing providers.
|
||||
parameters: []
|
||||
parameters:
|
||||
- name: api_filter
|
||||
in: query
|
||||
description: >-
|
||||
Optional filter to control which routes are returned. Can be an API level
|
||||
('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
|
||||
or 'deprecated' to show deprecated routes across all levels. If not specified,
|
||||
returns only non-deprecated v1 routes.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
enum:
|
||||
- v1
|
||||
- v1alpha
|
||||
- v1beta
|
||||
- deprecated
|
||||
deprecated: false
|
||||
/v1/models:
|
||||
get:
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ required-version = ">=0.7.0"
|
|||
|
||||
[project]
|
||||
name = "llama_stack"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0.dev0"
|
||||
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
|
||||
description = "Llama Stack"
|
||||
readme = "README.md"
|
||||
|
|
|
|||
|
|
@ -215,6 +215,16 @@ build_image() {
|
|||
--build-arg "LLAMA_STACK_DIR=/workspace"
|
||||
)
|
||||
|
||||
# Pass UV index configuration for release branches
|
||||
if [[ -n "${UV_EXTRA_INDEX_URL:-}" ]]; then
|
||||
echo "Adding UV_EXTRA_INDEX_URL to docker build: $UV_EXTRA_INDEX_URL"
|
||||
build_cmd+=(--build-arg "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL")
|
||||
fi
|
||||
if [[ -n "${UV_INDEX_STRATEGY:-}" ]]; then
|
||||
echo "Adding UV_INDEX_STRATEGY to docker build: $UV_INDEX_STRATEGY"
|
||||
build_cmd+=(--build-arg "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY")
|
||||
fi
|
||||
|
||||
if ! "${build_cmd[@]}"; then
|
||||
echo "❌ Failed to build Docker image"
|
||||
exit 1
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ COLLECT_ONLY=false
|
|||
|
||||
# Function to display usage
|
||||
usage() {
|
||||
cat << EOF
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Options:
|
||||
|
|
@ -102,7 +102,6 @@ while [[ $# -gt 0 ]]; do
|
|||
esac
|
||||
done
|
||||
|
||||
|
||||
# Validate required parameters
|
||||
if [[ -z "$STACK_CONFIG" && "$COLLECT_ONLY" == false ]]; then
|
||||
echo "Error: --stack-config is required"
|
||||
|
|
@ -177,12 +176,12 @@ cd $ROOT_DIR
|
|||
# check if "llama" and "pytest" are available. this script does not use `uv run` given
|
||||
# it can be used in a pre-release environment where we have not been able to tell
|
||||
# uv about pre-release dependencies properly (yet).
|
||||
if [[ "$COLLECT_ONLY" == false ]] && ! command -v llama &> /dev/null; then
|
||||
if [[ "$COLLECT_ONLY" == false ]] && ! command -v llama &>/dev/null; then
|
||||
echo "llama could not be found, ensure llama-stack is installed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v pytest &> /dev/null; then
|
||||
if ! command -v pytest &>/dev/null; then
|
||||
echo "pytest could not be found, ensure pytest is installed"
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -216,10 +215,11 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
|
||||
export OTEL_BSP_SCHEDULE_DELAY="200"
|
||||
export OTEL_BSP_EXPORT_TIMEOUT="2000"
|
||||
export OTEL_METRIC_EXPORT_INTERVAL="200"
|
||||
|
||||
# remove "server:" from STACK_CONFIG
|
||||
stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
|
||||
nohup llama stack run $stack_config > server.log 2>&1 &
|
||||
nohup llama stack run $stack_config >server.log 2>&1 &
|
||||
|
||||
echo "Waiting for Llama Stack Server to start..."
|
||||
for i in {1..30}; do
|
||||
|
|
@ -248,7 +248,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
container_name="llama-stack-test-$DISTRO"
|
||||
if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then
|
||||
echo "Dumping container logs before stopping..."
|
||||
docker logs "$container_name" > "docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true
|
||||
docker logs "$container_name" >"docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true
|
||||
echo "Stopping and removing container: $container_name"
|
||||
docker stop "$container_name" 2>/dev/null || true
|
||||
docker rm "$container_name" 2>/dev/null || true
|
||||
|
|
@ -280,6 +280,16 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
--build-arg "LLAMA_STACK_DIR=/workspace"
|
||||
)
|
||||
|
||||
# Pass UV index configuration for release branches
|
||||
if [[ -n "${UV_EXTRA_INDEX_URL:-}" ]]; then
|
||||
echo "Adding UV_EXTRA_INDEX_URL to docker build: $UV_EXTRA_INDEX_URL"
|
||||
build_cmd+=(--build-arg "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL")
|
||||
fi
|
||||
if [[ -n "${UV_INDEX_STRATEGY:-}" ]]; then
|
||||
echo "Adding UV_INDEX_STRATEGY to docker build: $UV_INDEX_STRATEGY"
|
||||
build_cmd+=(--build-arg "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY")
|
||||
fi
|
||||
|
||||
if ! "${build_cmd[@]}"; then
|
||||
echo "❌ Failed to build Docker image"
|
||||
exit 1
|
||||
|
|
@ -302,6 +312,9 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_METRIC_EXPORT_INTERVAL=200"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_SCHEDULE_DELAY=200"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_EXPORT_TIMEOUT=2000"
|
||||
|
||||
# Pass through API keys if they exist
|
||||
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
|
||||
|
|
@ -437,17 +450,13 @@ elif [ $exit_code -eq 5 ]; then
|
|||
else
|
||||
echo "❌ Tests failed"
|
||||
echo ""
|
||||
echo "=== Dumping last 100 lines of logs for debugging ==="
|
||||
|
||||
# Output server or container logs based on stack config
|
||||
if [[ "$STACK_CONFIG" == *"server:"* && -f "server.log" ]]; then
|
||||
echo "--- Last 100 lines of server.log ---"
|
||||
tail -100 server.log
|
||||
echo "--- Server side failures can be located inside server.log (available from artifacts on CI) ---"
|
||||
elif [[ "$STACK_CONFIG" == *"docker:"* ]]; then
|
||||
docker_log_file="docker-${DISTRO}-${INFERENCE_MODE}.log"
|
||||
if [[ -f "$docker_log_file" ]]; then
|
||||
echo "--- Last 100 lines of $docker_log_file ---"
|
||||
tail -100 "$docker_log_file"
|
||||
echo "--- Server side failures can be located inside $docker_log_file (available from artifacts on CI) ---"
|
||||
fi
|
||||
fi
|
||||
|
||||
|
|
|
|||
42
scripts/uv-run-with-index.sh
Executable file
42
scripts/uv-run-with-index.sh
Executable file
|
|
@ -0,0 +1,42 @@
|
|||
#!/bin/bash
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Detect current branch and target branch
|
||||
# In GitHub Actions, use GITHUB_REF/GITHUB_BASE_REF
|
||||
if [[ -n "${GITHUB_REF:-}" ]]; then
|
||||
BRANCH="${GITHUB_REF#refs/heads/}"
|
||||
else
|
||||
BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "")
|
||||
fi
|
||||
|
||||
# For PRs, check the target branch
|
||||
if [[ -n "${GITHUB_BASE_REF:-}" ]]; then
|
||||
TARGET_BRANCH="${GITHUB_BASE_REF}"
|
||||
else
|
||||
TARGET_BRANCH=$(git rev-parse --abbrev-ref HEAD@{upstream} 2>/dev/null | sed 's|origin/||' || echo "")
|
||||
fi
|
||||
|
||||
# Check if on a release branch or targeting one, or LLAMA_STACK_RELEASE_MODE is set
|
||||
IS_RELEASE=false
|
||||
if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
|
||||
IS_RELEASE=true
|
||||
elif [[ "$TARGET_BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
|
||||
IS_RELEASE=true
|
||||
elif [[ "${LLAMA_STACK_RELEASE_MODE:-}" == "true" ]]; then
|
||||
IS_RELEASE=true
|
||||
fi
|
||||
|
||||
# On release branches, use test.pypi as extra index for RC versions
|
||||
if [[ "$IS_RELEASE" == "true" ]]; then
|
||||
export UV_EXTRA_INDEX_URL="https://test.pypi.org/simple/"
|
||||
export UV_INDEX_STRATEGY="unsafe-best-match"
|
||||
fi
|
||||
|
||||
# Run uv with all arguments passed through
|
||||
exec uv "$@"
|
||||
|
|
@ -4,14 +4,21 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Protocol, runtime_checkable
|
||||
from typing import Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.apis.version import (
|
||||
LLAMA_STACK_API_V1,
|
||||
)
|
||||
from llama_stack.providers.datatypes import HealthStatus
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
# Valid values for the route filter parameter.
|
||||
# Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
|
||||
# Special filter value: "deprecated" (shows deprecated routes regardless of level)
|
||||
ApiFilter = Literal["v1", "v1alpha", "v1beta", "deprecated"]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RouteInfo(BaseModel):
|
||||
|
|
@ -64,11 +71,12 @@ class Inspect(Protocol):
|
|||
"""
|
||||
|
||||
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_routes(self) -> ListRoutesResponse:
|
||||
async def list_routes(self, api_filter: ApiFilter | None = None) -> ListRoutesResponse:
|
||||
"""List routes.
|
||||
|
||||
List all available API routes with their methods and implementing providers.
|
||||
|
||||
:param api_filter: Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns only non-deprecated v1 routes.
|
||||
:returns: Response containing information about all available routes.
|
||||
"""
|
||||
...
|
||||
|
|
|
|||
|
|
@ -8,15 +8,28 @@ import argparse
|
|||
import os
|
||||
import ssl
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import uvicorn
|
||||
import yaml
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
from llama_stack.core.datatypes import StackRunConfig
|
||||
from llama_stack.core.datatypes import Api, Provider, StackRunConfig
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
|
||||
from llama_stack.core.storage.datatypes import (
|
||||
InferenceStoreReference,
|
||||
KVStoreReference,
|
||||
ServerStoresConfig,
|
||||
SqliteKVStoreConfig,
|
||||
SqliteSqlStoreConfig,
|
||||
SqlStoreReference,
|
||||
StorageConfig,
|
||||
)
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
from llama_stack.log import LoggingConfig, get_logger
|
||||
|
||||
|
|
@ -68,6 +81,12 @@ class StackRun(Subcommand):
|
|||
action="store_true",
|
||||
help="Start the UI server",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--providers",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Run a stack with only a list of providers. This list is formatted like: api1=provider1,api1=provider2,api2=provider3. Where there can be multiple providers per API.",
|
||||
)
|
||||
|
||||
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
||||
import yaml
|
||||
|
|
@ -93,6 +112,49 @@ class StackRun(Subcommand):
|
|||
config_file = resolve_config_or_distro(args.config, Mode.RUN)
|
||||
except ValueError as e:
|
||||
self.parser.error(str(e))
|
||||
elif args.providers:
|
||||
provider_list: dict[str, list[Provider]] = dict()
|
||||
for api_provider in args.providers.split(","):
|
||||
if "=" not in api_provider:
|
||||
cprint(
|
||||
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
api, provider_type = api_provider.split("=")
|
||||
providers_for_api = get_provider_registry().get(Api(api), None)
|
||||
if providers_for_api is None:
|
||||
cprint(
|
||||
f"{api} is not a valid API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
if provider_type in providers_for_api:
|
||||
provider = Provider(
|
||||
provider_type=provider_type,
|
||||
provider_id=provider_type.split("::")[1],
|
||||
)
|
||||
provider_list.setdefault(api, []).append(provider)
|
||||
else:
|
||||
cprint(
|
||||
f"{provider} is not a valid provider for the {api} API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
run_config = self._generate_run_config_from_providers(providers=provider_list)
|
||||
config_dict = run_config.model_dump(mode="json")
|
||||
|
||||
# Write config to disk in providers-run directory
|
||||
distro_dir = DISTRIBS_BASE_DIR / "providers-run"
|
||||
config_file = distro_dir / "run.yaml"
|
||||
|
||||
logger.info(f"Writing generated config to: {config_file}")
|
||||
with open(config_file, "w") as f:
|
||||
yaml.dump(config_dict, f, default_flow_style=False, sort_keys=False)
|
||||
|
||||
else:
|
||||
config_file = None
|
||||
|
||||
|
|
@ -106,7 +168,8 @@ class StackRun(Subcommand):
|
|||
|
||||
try:
|
||||
config = parse_and_maybe_upgrade_config(config_dict)
|
||||
if not os.path.exists(str(config.external_providers_dir)):
|
||||
# Create external_providers_dir if it's specified and doesn't exist
|
||||
if config.external_providers_dir and not os.path.exists(str(config.external_providers_dir)):
|
||||
os.makedirs(str(config.external_providers_dir), exist_ok=True)
|
||||
except AttributeError as e:
|
||||
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
|
||||
|
|
@ -213,3 +276,44 @@ class StackRun(Subcommand):
|
|||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start UI development server in {ui_dir}: {e}")
|
||||
|
||||
def _generate_run_config_from_providers(self, providers: dict[str, list[Provider]]):
|
||||
apis = list(providers.keys())
|
||||
distro_dir = DISTRIBS_BASE_DIR / "providers-run"
|
||||
# need somewhere to put the storage.
|
||||
os.makedirs(distro_dir, exist_ok=True)
|
||||
storage = StorageConfig(
|
||||
backends={
|
||||
"kv_default": SqliteKVStoreConfig(
|
||||
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
|
||||
),
|
||||
"sql_default": SqliteSqlStoreConfig(
|
||||
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
|
||||
),
|
||||
},
|
||||
stores=ServerStoresConfig(
|
||||
metadata=KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="registry",
|
||||
),
|
||||
inference=InferenceStoreReference(
|
||||
backend="sql_default",
|
||||
table_name="inference_store",
|
||||
),
|
||||
conversations=SqlStoreReference(
|
||||
backend="sql_default",
|
||||
table_name="openai_conversations",
|
||||
),
|
||||
prompts=KVStoreReference(
|
||||
backend="kv_default",
|
||||
namespace="prompts",
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
return StackRunConfig(
|
||||
image_name="providers-run",
|
||||
apis=apis,
|
||||
providers=providers,
|
||||
storage=storage,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ from llama_stack.core.distribution import (
|
|||
get_provider_registry,
|
||||
)
|
||||
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
|
||||
from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.core.utils.prompt_for_config import prompt_for_config
|
||||
from llama_stack.log import get_logger
|
||||
|
|
@ -194,19 +193,11 @@ def upgrade_from_routing_table(
|
|||
|
||||
|
||||
def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
|
||||
version = config_dict.get("version", None)
|
||||
if version == LLAMA_STACK_RUN_CONFIG_VERSION:
|
||||
processed_config_dict = replace_env_vars(config_dict)
|
||||
return StackRunConfig(**cast_image_name_to_string(processed_config_dict))
|
||||
|
||||
if "routing_table" in config_dict:
|
||||
logger.info("Upgrading config...")
|
||||
config_dict = upgrade_from_routing_table(config_dict)
|
||||
|
||||
config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||
|
||||
if not config_dict.get("external_providers_dir", None):
|
||||
config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
|
||||
|
||||
processed_config_dict = replace_env_vars(config_dict)
|
||||
return StackRunConfig(**cast_image_name_to_string(processed_config_dict))
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ from llama_stack.apis.inspect import (
|
|||
RouteInfo,
|
||||
VersionInfo,
|
||||
)
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.datatypes import StackRunConfig
|
||||
from llama_stack.core.external import load_external_apis
|
||||
from llama_stack.core.server.routes import get_all_api_routes
|
||||
|
|
@ -39,9 +40,21 @@ class DistributionInspectImpl(Inspect):
|
|||
async def initialize(self) -> None:
|
||||
pass
|
||||
|
||||
async def list_routes(self) -> ListRoutesResponse:
|
||||
async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse:
|
||||
run_config: StackRunConfig = self.config.run_config
|
||||
|
||||
# Helper function to determine if a route should be included based on api_filter
|
||||
def should_include_route(webmethod) -> bool:
|
||||
if api_filter is None:
|
||||
# Default: only non-deprecated v1 APIs
|
||||
return not webmethod.deprecated and webmethod.level == LLAMA_STACK_API_V1
|
||||
elif api_filter == "deprecated":
|
||||
# Special filter: show deprecated routes regardless of their actual level
|
||||
return bool(webmethod.deprecated)
|
||||
else:
|
||||
# Filter by API level (non-deprecated routes only)
|
||||
return not webmethod.deprecated and webmethod.level == api_filter
|
||||
|
||||
ret = []
|
||||
external_apis = load_external_apis(run_config)
|
||||
all_endpoints = get_all_api_routes(external_apis)
|
||||
|
|
@ -55,8 +68,8 @@ class DistributionInspectImpl(Inspect):
|
|||
method=next(iter([m for m in e.methods if m != "HEAD"])),
|
||||
provider_types=[], # These APIs don't have "real" providers - they're internal to the stack
|
||||
)
|
||||
for e, _ in endpoints
|
||||
if e.methods is not None
|
||||
for e, webmethod in endpoints
|
||||
if e.methods is not None and should_include_route(webmethod)
|
||||
]
|
||||
)
|
||||
else:
|
||||
|
|
@ -69,8 +82,8 @@ class DistributionInspectImpl(Inspect):
|
|||
method=next(iter([m for m in e.methods if m != "HEAD"])),
|
||||
provider_types=[p.provider_type for p in providers],
|
||||
)
|
||||
for e, _ in endpoints
|
||||
if e.methods is not None
|
||||
for e, webmethod in endpoints
|
||||
if e.methods is not None and should_include_route(webmethod)
|
||||
]
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -427,6 +427,7 @@ _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
|
|||
"counters": {},
|
||||
"gauges": {},
|
||||
"up_down_counters": {},
|
||||
"histograms": {},
|
||||
}
|
||||
_global_lock = threading.Lock()
|
||||
_TRACER_PROVIDER = None
|
||||
|
|
@ -540,6 +541,16 @@ class Telemetry:
|
|||
)
|
||||
return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
|
||||
|
||||
def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
|
||||
assert self.meter is not None
|
||||
if name not in _GLOBAL_STORAGE["histograms"]:
|
||||
_GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
|
||||
name=name,
|
||||
unit=unit,
|
||||
description=f"Histogram for {name}",
|
||||
)
|
||||
return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
|
||||
|
||||
def _log_metric(self, event: MetricEvent) -> None:
|
||||
# Add metric as an event to the current span
|
||||
try:
|
||||
|
|
@ -571,7 +582,16 @@ class Telemetry:
|
|||
# Log to OpenTelemetry meter if available
|
||||
if self.meter is None:
|
||||
return
|
||||
if isinstance(event.value, int):
|
||||
|
||||
# Use histograms for token-related metrics (per-request measurements)
|
||||
# Use counters for other cumulative metrics
|
||||
token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
|
||||
|
||||
if event.metric in token_metrics:
|
||||
# Token metrics are per-request measurements, use histogram
|
||||
histogram = self._get_or_create_histogram(event.metric, event.unit)
|
||||
histogram.record(event.value, attributes=_clean_attributes(event.attributes))
|
||||
elif isinstance(event.value, int):
|
||||
counter = self._get_or_create_counter(event.metric, event.unit)
|
||||
counter.add(event.value, attributes=_clean_attributes(event.attributes))
|
||||
elif isinstance(event.value, float):
|
||||
|
|
|
|||
|
|
@ -1015,7 +1015,7 @@ async def load_data_from_url(url: str) -> str:
|
|||
if url.startswith("http"):
|
||||
async with httpx.AsyncClient() as client:
|
||||
r = await client.get(url)
|
||||
resp = r.text
|
||||
resp: str = r.text
|
||||
return resp
|
||||
raise ValueError(f"Unexpected URL: {type(url)}")
|
||||
|
||||
|
|
|
|||
|
|
@ -181,3 +181,22 @@ vlm_response = client.chat.completions.create(
|
|||
|
||||
print(f"VLM Response: {vlm_response.choices[0].message.content}")
|
||||
```
|
||||
|
||||
### Rerank Example
|
||||
|
||||
The following example shows how to rerank documents using an NVIDIA NIM.
|
||||
|
||||
```python
|
||||
rerank_response = client.alpha.inference.rerank(
|
||||
model="nvidia/nvidia/llama-3.2-nv-rerankqa-1b-v2",
|
||||
query="query",
|
||||
items=[
|
||||
"item_1",
|
||||
"item_2",
|
||||
"item_3",
|
||||
],
|
||||
)
|
||||
|
||||
for i, result in enumerate(rerank_response):
|
||||
print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]")
|
||||
```
|
||||
|
|
@ -28,6 +28,7 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
|
|||
Attributes:
|
||||
url (str): A base url for accessing the NVIDIA NIM, e.g. http://localhost:8000
|
||||
api_key (str): The access key for the hosted NIM endpoints
|
||||
rerank_model_to_url (dict[str, str]): Mapping of rerank model identifiers to their API endpoints
|
||||
|
||||
There are two ways to access NVIDIA NIMs -
|
||||
0. Hosted: Preview APIs hosted at https://integrate.api.nvidia.com
|
||||
|
|
@ -55,6 +56,14 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
|
|||
default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
|
||||
description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
|
||||
)
|
||||
rerank_model_to_url: dict[str, str] = Field(
|
||||
default_factory=lambda: {
|
||||
"nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
|
||||
"nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking",
|
||||
"nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
|
||||
},
|
||||
description="Mapping of rerank model identifiers to their API endpoints. ",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
|
|
|
|||
|
|
@ -5,6 +5,19 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from collections.abc import Iterable
|
||||
|
||||
import aiohttp
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
RerankData,
|
||||
RerankResponse,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIChatCompletionContentPartTextParam,
|
||||
)
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
|
|
@ -61,3 +74,101 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
|
|||
:return: The NVIDIA API base URL
|
||||
"""
|
||||
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
|
||||
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
"""
|
||||
Return both dynamic model IDs and statically configured rerank model IDs.
|
||||
"""
|
||||
dynamic_ids: Iterable[str] = []
|
||||
try:
|
||||
dynamic_ids = await super().list_provider_model_ids()
|
||||
except Exception:
|
||||
# If the dynamic listing fails, proceed with just configured rerank IDs
|
||||
dynamic_ids = []
|
||||
|
||||
configured_rerank_ids = list(self.config.rerank_model_to_url.keys())
|
||||
return list(dict.fromkeys(list(dynamic_ids) + configured_rerank_ids)) # remove duplicates
|
||||
|
||||
def construct_model_from_identifier(self, identifier: str) -> Model:
|
||||
"""
|
||||
Classify rerank models from config; otherwise use the base behavior.
|
||||
"""
|
||||
if identifier in self.config.rerank_model_to_url:
|
||||
return Model(
|
||||
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
||||
provider_resource_id=identifier,
|
||||
identifier=identifier,
|
||||
model_type=ModelType.rerank,
|
||||
)
|
||||
return super().construct_model_from_identifier(identifier)
|
||||
|
||||
async def rerank(
|
||||
self,
|
||||
model: str,
|
||||
query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
|
||||
items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
|
||||
max_num_results: int | None = None,
|
||||
) -> RerankResponse:
|
||||
provider_model_id = await self._get_provider_model_id(model)
|
||||
|
||||
ranking_url = self.get_base_url()
|
||||
|
||||
if _is_nvidia_hosted(self.config) and provider_model_id in self.config.rerank_model_to_url:
|
||||
ranking_url = self.config.rerank_model_to_url[provider_model_id]
|
||||
|
||||
logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}")
|
||||
|
||||
# Convert query to text format
|
||||
if isinstance(query, str):
|
||||
query_text = query
|
||||
elif isinstance(query, OpenAIChatCompletionContentPartTextParam):
|
||||
query_text = query.text
|
||||
else:
|
||||
raise ValueError("Query must be a string or text content part")
|
||||
|
||||
# Convert items to text format
|
||||
passages = []
|
||||
for item in items:
|
||||
if isinstance(item, str):
|
||||
passages.append({"text": item})
|
||||
elif isinstance(item, OpenAIChatCompletionContentPartTextParam):
|
||||
passages.append({"text": item.text})
|
||||
else:
|
||||
raise ValueError("Items must be strings or text content parts")
|
||||
|
||||
payload = {
|
||||
"model": provider_model_id,
|
||||
"query": {"text": query_text},
|
||||
"passages": passages,
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.get_api_key()}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(ranking_url, headers=headers, json=payload) as response:
|
||||
if response.status != 200:
|
||||
response_text = await response.text()
|
||||
raise ConnectionError(
|
||||
f"NVIDIA rerank API request failed with status {response.status}: {response_text}"
|
||||
)
|
||||
|
||||
result = await response.json()
|
||||
rankings = result.get("rankings", [])
|
||||
|
||||
# Convert to RerankData format
|
||||
rerank_data = []
|
||||
for ranking in rankings:
|
||||
rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"]))
|
||||
|
||||
# Apply max_num_results limit
|
||||
if max_num_results is not None:
|
||||
rerank_data = rerank_data[:max_num_results]
|
||||
|
||||
return RerankResponse(data=rerank_data)
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
raise ConnectionError(f"Failed to connect to NVIDIA rerank API at {ranking_url}: {e}") from e
|
||||
|
|
|
|||
|
|
@ -51,7 +51,11 @@ async function proxyRequest(request: NextRequest, method: string) {
|
|||
);
|
||||
|
||||
// Create response with same status and headers
|
||||
const proxyResponse = new NextResponse(responseText, {
|
||||
// Handle 204 No Content responses specially
|
||||
const proxyResponse =
|
||||
response.status === 204
|
||||
? new NextResponse(null, { status: 204 })
|
||||
: new NextResponse(responseText, {
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
});
|
||||
|
|
|
|||
5
src/llama_stack/ui/app/prompts/page.tsx
Normal file
5
src/llama_stack/ui/app/prompts/page.tsx
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
import { PromptManagement } from "@/components/prompts";
|
||||
|
||||
export default function PromptsPage() {
|
||||
return <PromptManagement />;
|
||||
}
|
||||
|
|
@ -8,6 +8,7 @@ import {
|
|||
MessageCircle,
|
||||
Settings2,
|
||||
Compass,
|
||||
FileText,
|
||||
} from "lucide-react";
|
||||
import Link from "next/link";
|
||||
import { usePathname } from "next/navigation";
|
||||
|
|
@ -50,6 +51,11 @@ const manageItems = [
|
|||
url: "/logs/vector-stores",
|
||||
icon: Database,
|
||||
},
|
||||
{
|
||||
title: "Prompts",
|
||||
url: "/prompts",
|
||||
icon: FileText,
|
||||
},
|
||||
{
|
||||
title: "Documentation",
|
||||
url: "https://llama-stack.readthedocs.io/en/latest/references/api_reference/index.html",
|
||||
|
|
|
|||
4
src/llama_stack/ui/components/prompts/index.ts
Normal file
4
src/llama_stack/ui/components/prompts/index.ts
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
export { PromptManagement } from "./prompt-management";
|
||||
export { PromptList } from "./prompt-list";
|
||||
export { PromptEditor } from "./prompt-editor";
|
||||
export * from "./types";
|
||||
309
src/llama_stack/ui/components/prompts/prompt-editor.test.tsx
Normal file
309
src/llama_stack/ui/components/prompts/prompt-editor.test.tsx
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
import React from "react";
|
||||
import { render, screen, fireEvent } from "@testing-library/react";
|
||||
import "@testing-library/jest-dom";
|
||||
import { PromptEditor } from "./prompt-editor";
|
||||
import type { Prompt, PromptFormData } from "./types";
|
||||
|
||||
describe("PromptEditor", () => {
|
||||
const mockOnSave = jest.fn();
|
||||
const mockOnCancel = jest.fn();
|
||||
const mockOnDelete = jest.fn();
|
||||
|
||||
const defaultProps = {
|
||||
onSave: mockOnSave,
|
||||
onCancel: mockOnCancel,
|
||||
onDelete: mockOnDelete,
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("Create Mode", () => {
|
||||
test("renders create form correctly", () => {
|
||||
render(<PromptEditor {...defaultProps} />);
|
||||
|
||||
expect(screen.getByLabelText("Prompt Content *")).toBeInTheDocument();
|
||||
expect(screen.getByText("Variables")).toBeInTheDocument();
|
||||
expect(screen.getByText("Preview")).toBeInTheDocument();
|
||||
expect(screen.getByText("Create Prompt")).toBeInTheDocument();
|
||||
expect(screen.getByText("Cancel")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("shows preview placeholder when no content", () => {
|
||||
render(<PromptEditor {...defaultProps} />);
|
||||
|
||||
expect(
|
||||
screen.getByText("Enter content to preview the compiled prompt")
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("submits form with correct data", () => {
|
||||
render(<PromptEditor {...defaultProps} />);
|
||||
|
||||
const promptInput = screen.getByLabelText("Prompt Content *");
|
||||
fireEvent.change(promptInput, {
|
||||
target: { value: "Hello {{name}}, welcome!" },
|
||||
});
|
||||
|
||||
fireEvent.click(screen.getByText("Create Prompt"));
|
||||
|
||||
expect(mockOnSave).toHaveBeenCalledWith({
|
||||
prompt: "Hello {{name}}, welcome!",
|
||||
variables: [],
|
||||
});
|
||||
});
|
||||
|
||||
test("prevents submission with empty prompt", () => {
|
||||
render(<PromptEditor {...defaultProps} />);
|
||||
|
||||
fireEvent.click(screen.getByText("Create Prompt"));
|
||||
|
||||
expect(mockOnSave).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Edit Mode", () => {
|
||||
const mockPrompt: Prompt = {
|
||||
prompt_id: "prompt_123",
|
||||
prompt: "Hello {{name}}, how is {{weather}}?",
|
||||
version: 1,
|
||||
variables: ["name", "weather"],
|
||||
is_default: true,
|
||||
};
|
||||
|
||||
test("renders edit form with existing data", () => {
|
||||
render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
|
||||
|
||||
expect(
|
||||
screen.getByDisplayValue("Hello {{name}}, how is {{weather}}?")
|
||||
).toBeInTheDocument();
|
||||
expect(screen.getAllByText("name")).toHaveLength(2); // One in variables, one in preview
|
||||
expect(screen.getAllByText("weather")).toHaveLength(2); // One in variables, one in preview
|
||||
expect(screen.getByText("Update Prompt")).toBeInTheDocument();
|
||||
expect(screen.getByText("Delete Prompt")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("submits updated data correctly", () => {
|
||||
render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
|
||||
|
||||
const promptInput = screen.getByLabelText("Prompt Content *");
|
||||
fireEvent.change(promptInput, {
|
||||
target: { value: "Updated: Hello {{name}}!" },
|
||||
});
|
||||
|
||||
fireEvent.click(screen.getByText("Update Prompt"));
|
||||
|
||||
expect(mockOnSave).toHaveBeenCalledWith({
|
||||
prompt: "Updated: Hello {{name}}!",
|
||||
variables: ["name", "weather"],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Variables Management", () => {
|
||||
test("adds new variable", () => {
|
||||
render(<PromptEditor {...defaultProps} />);
|
||||
|
||||
const variableInput = screen.getByPlaceholderText(
|
||||
"Add variable name (e.g. user_name, topic)"
|
||||
);
|
||||
fireEvent.change(variableInput, { target: { value: "testVar" } });
|
||||
fireEvent.click(screen.getByText("Add"));
|
||||
|
||||
expect(screen.getByText("testVar")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("prevents adding duplicate variables", () => {
|
||||
render(<PromptEditor {...defaultProps} />);
|
||||
|
||||
const variableInput = screen.getByPlaceholderText(
|
||||
"Add variable name (e.g. user_name, topic)"
|
||||
);
|
||||
|
||||
// Add first variable
|
||||
fireEvent.change(variableInput, { target: { value: "test" } });
|
||||
fireEvent.click(screen.getByText("Add"));
|
||||
|
||||
// Try to add same variable again
|
||||
fireEvent.change(variableInput, { target: { value: "test" } });
|
||||
|
||||
// Button should be disabled
|
||||
expect(screen.getByText("Add")).toBeDisabled();
|
||||
});
|
||||
|
||||
test("removes variable", () => {
|
||||
const mockPrompt: Prompt = {
|
||||
prompt_id: "prompt_123",
|
||||
prompt: "Hello {{name}}",
|
||||
version: 1,
|
||||
variables: ["name", "location"],
|
||||
is_default: true,
|
||||
};
|
||||
|
||||
render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
|
||||
|
||||
// Check that both variables are present initially
|
||||
expect(screen.getAllByText("name").length).toBeGreaterThan(0);
|
||||
expect(screen.getAllByText("location").length).toBeGreaterThan(0);
|
||||
|
||||
// Remove the location variable by clicking the X button with the specific title
|
||||
const removeLocationButton = screen.getByTitle(
|
||||
"Remove location variable"
|
||||
);
|
||||
fireEvent.click(removeLocationButton);
|
||||
|
||||
// Name should still be there, location should be gone from the variables section
|
||||
expect(screen.getAllByText("name").length).toBeGreaterThan(0);
|
||||
expect(
|
||||
screen.queryByTitle("Remove location variable")
|
||||
).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("adds variable on Enter key", () => {
|
||||
render(<PromptEditor {...defaultProps} />);
|
||||
|
||||
const variableInput = screen.getByPlaceholderText(
|
||||
"Add variable name (e.g. user_name, topic)"
|
||||
);
|
||||
fireEvent.change(variableInput, { target: { value: "enterVar" } });
|
||||
|
||||
// Simulate Enter key press
|
||||
fireEvent.keyPress(variableInput, {
|
||||
key: "Enter",
|
||||
code: "Enter",
|
||||
charCode: 13,
|
||||
preventDefault: jest.fn(),
|
||||
});
|
||||
|
||||
// Check if the variable was added by looking for the badge
|
||||
expect(screen.getAllByText("enterVar").length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Preview Functionality", () => {
|
||||
test("shows live preview with variables", () => {
|
||||
render(<PromptEditor {...defaultProps} />);
|
||||
|
||||
// Add prompt content
|
||||
const promptInput = screen.getByLabelText("Prompt Content *");
|
||||
fireEvent.change(promptInput, {
|
||||
target: { value: "Hello {{name}}, welcome to {{place}}!" },
|
||||
});
|
||||
|
||||
// Add variables
|
||||
const variableInput = screen.getByPlaceholderText(
|
||||
"Add variable name (e.g. user_name, topic)"
|
||||
);
|
||||
fireEvent.change(variableInput, { target: { value: "name" } });
|
||||
fireEvent.click(screen.getByText("Add"));
|
||||
|
||||
fireEvent.change(variableInput, { target: { value: "place" } });
|
||||
fireEvent.click(screen.getByText("Add"));
|
||||
|
||||
// Check that preview area shows the content
|
||||
expect(screen.getByText("Compiled Prompt")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("shows variable value inputs in preview", () => {
|
||||
const mockPrompt: Prompt = {
|
||||
prompt_id: "prompt_123",
|
||||
prompt: "Hello {{name}}",
|
||||
version: 1,
|
||||
variables: ["name"],
|
||||
is_default: true,
|
||||
};
|
||||
|
||||
render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
|
||||
|
||||
expect(screen.getByText("Variable Values")).toBeInTheDocument();
|
||||
expect(
|
||||
screen.getByPlaceholderText("Enter value for name")
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("shows color legend for variable states", () => {
|
||||
render(<PromptEditor {...defaultProps} />);
|
||||
|
||||
// Add content to show preview
|
||||
const promptInput = screen.getByLabelText("Prompt Content *");
|
||||
fireEvent.change(promptInput, {
|
||||
target: { value: "Hello {{name}}" },
|
||||
});
|
||||
|
||||
expect(screen.getByText("Used")).toBeInTheDocument();
|
||||
expect(screen.getByText("Unused")).toBeInTheDocument();
|
||||
expect(screen.getByText("Undefined")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Error Handling", () => {
|
||||
test("displays error message", () => {
|
||||
const errorMessage = "Prompt contains undeclared variables";
|
||||
render(<PromptEditor {...defaultProps} error={errorMessage} />);
|
||||
|
||||
expect(screen.getByText(errorMessage)).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Delete Functionality", () => {
|
||||
const mockPrompt: Prompt = {
|
||||
prompt_id: "prompt_123",
|
||||
prompt: "Hello {{name}}",
|
||||
version: 1,
|
||||
variables: ["name"],
|
||||
is_default: true,
|
||||
};
|
||||
|
||||
test("shows delete button in edit mode", () => {
|
||||
render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
|
||||
|
||||
expect(screen.getByText("Delete Prompt")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("hides delete button in create mode", () => {
|
||||
render(<PromptEditor {...defaultProps} />);
|
||||
|
||||
expect(screen.queryByText("Delete Prompt")).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("calls onDelete with confirmation", () => {
|
||||
const originalConfirm = window.confirm;
|
||||
window.confirm = jest.fn(() => true);
|
||||
|
||||
render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
|
||||
|
||||
fireEvent.click(screen.getByText("Delete Prompt"));
|
||||
|
||||
expect(window.confirm).toHaveBeenCalledWith(
|
||||
"Are you sure you want to delete this prompt? This action cannot be undone."
|
||||
);
|
||||
expect(mockOnDelete).toHaveBeenCalledWith("prompt_123");
|
||||
|
||||
window.confirm = originalConfirm;
|
||||
});
|
||||
|
||||
test("does not delete when confirmation is cancelled", () => {
|
||||
const originalConfirm = window.confirm;
|
||||
window.confirm = jest.fn(() => false);
|
||||
|
||||
render(<PromptEditor {...defaultProps} prompt={mockPrompt} />);
|
||||
|
||||
fireEvent.click(screen.getByText("Delete Prompt"));
|
||||
|
||||
expect(mockOnDelete).not.toHaveBeenCalled();
|
||||
|
||||
window.confirm = originalConfirm;
|
||||
});
|
||||
});
|
||||
|
||||
describe("Cancel Functionality", () => {
|
||||
test("calls onCancel when cancel button is clicked", () => {
|
||||
render(<PromptEditor {...defaultProps} />);
|
||||
|
||||
fireEvent.click(screen.getByText("Cancel"));
|
||||
|
||||
expect(mockOnCancel).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
346
src/llama_stack/ui/components/prompts/prompt-editor.tsx
Normal file
346
src/llama_stack/ui/components/prompts/prompt-editor.tsx
Normal file
|
|
@ -0,0 +1,346 @@
|
|||
"use client";
|
||||
|
||||
import { useState, useEffect } from "react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import { Badge } from "@/components/ui/badge";
|
||||
import {
|
||||
Card,
|
||||
CardContent,
|
||||
CardDescription,
|
||||
CardHeader,
|
||||
CardTitle,
|
||||
} from "@/components/ui/card";
|
||||
import { Separator } from "@/components/ui/separator";
|
||||
import { X, Plus, Save, Trash2 } from "lucide-react";
|
||||
import { Prompt, PromptFormData } from "./types";
|
||||
|
||||
interface PromptEditorProps {
|
||||
prompt?: Prompt;
|
||||
onSave: (prompt: PromptFormData) => void;
|
||||
onCancel: () => void;
|
||||
onDelete?: (promptId: string) => void;
|
||||
error?: string | null;
|
||||
}
|
||||
|
||||
export function PromptEditor({
|
||||
prompt,
|
||||
onSave,
|
||||
onCancel,
|
||||
onDelete,
|
||||
error,
|
||||
}: PromptEditorProps) {
|
||||
const [formData, setFormData] = useState<PromptFormData>({
|
||||
prompt: "",
|
||||
variables: [],
|
||||
});
|
||||
|
||||
const [newVariable, setNewVariable] = useState("");
|
||||
const [variableValues, setVariableValues] = useState<Record<string, string>>(
|
||||
{}
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
if (prompt) {
|
||||
setFormData({
|
||||
prompt: prompt.prompt || "",
|
||||
variables: prompt.variables || [],
|
||||
});
|
||||
}
|
||||
}, [prompt]);
|
||||
|
||||
const handleSubmit = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (!formData.prompt.trim()) {
|
||||
return;
|
||||
}
|
||||
onSave(formData);
|
||||
};
|
||||
|
||||
const addVariable = () => {
|
||||
if (
|
||||
newVariable.trim() &&
|
||||
!formData.variables.includes(newVariable.trim())
|
||||
) {
|
||||
setFormData(prev => ({
|
||||
...prev,
|
||||
variables: [...prev.variables, newVariable.trim()],
|
||||
}));
|
||||
setNewVariable("");
|
||||
}
|
||||
};
|
||||
|
||||
const removeVariable = (variableToRemove: string) => {
|
||||
setFormData(prev => ({
|
||||
...prev,
|
||||
variables: prev.variables.filter(
|
||||
variable => variable !== variableToRemove
|
||||
),
|
||||
}));
|
||||
};
|
||||
|
||||
const renderPreview = () => {
|
||||
const text = formData.prompt;
|
||||
if (!text) return text;
|
||||
|
||||
// Split text by variable patterns and process each part
|
||||
const parts = text.split(/(\{\{\s*\w+\s*\}\})/g);
|
||||
|
||||
return parts.map((part, index) => {
|
||||
const variableMatch = part.match(/\{\{\s*(\w+)\s*\}\}/);
|
||||
if (variableMatch) {
|
||||
const variableName = variableMatch[1];
|
||||
const isDefined = formData.variables.includes(variableName);
|
||||
const value = variableValues[variableName];
|
||||
|
||||
if (!isDefined) {
|
||||
// Variable not in variables list - likely a typo/bug (RED)
|
||||
return (
|
||||
<span
|
||||
key={index}
|
||||
className="bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-200 px-1 rounded font-medium"
|
||||
>
|
||||
{part}
|
||||
</span>
|
||||
);
|
||||
} else if (value && value.trim()) {
|
||||
// Variable defined and has value - show the value (GREEN)
|
||||
return (
|
||||
<span
|
||||
key={index}
|
||||
className="bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200 px-1 rounded font-medium"
|
||||
>
|
||||
{value}
|
||||
</span>
|
||||
);
|
||||
} else {
|
||||
// Variable defined but empty (YELLOW)
|
||||
return (
|
||||
<span
|
||||
key={index}
|
||||
className="bg-yellow-100 text-yellow-800 dark:bg-yellow-900 dark:text-yellow-200 px-1 rounded font-medium"
|
||||
>
|
||||
{part}
|
||||
</span>
|
||||
);
|
||||
}
|
||||
}
|
||||
return part;
|
||||
});
|
||||
};
|
||||
|
||||
const updateVariableValue = (variable: string, value: string) => {
|
||||
setVariableValues(prev => ({
|
||||
...prev,
|
||||
[variable]: value,
|
||||
}));
|
||||
};
|
||||
|
||||
return (
|
||||
<form onSubmit={handleSubmit} className="space-y-6">
|
||||
{error && (
|
||||
<div className="p-4 bg-destructive/10 border border-destructive/20 rounded-md">
|
||||
<p className="text-destructive text-sm">{error}</p>
|
||||
</div>
|
||||
)}
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{/* Form Section */}
|
||||
<div className="space-y-4">
|
||||
<div>
|
||||
<Label htmlFor="prompt">Prompt Content *</Label>
|
||||
<Textarea
|
||||
id="prompt"
|
||||
value={formData.prompt}
|
||||
onChange={e =>
|
||||
setFormData(prev => ({ ...prev, prompt: e.target.value }))
|
||||
}
|
||||
placeholder="Enter your prompt content here. Use {{variable_name}} for dynamic variables."
|
||||
className="min-h-32 font-mono mt-2"
|
||||
required
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground mt-2">
|
||||
Use double curly braces around variable names, e.g.,{" "}
|
||||
{`{{user_name}}`} or {`{{topic}}`}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="space-y-3">
|
||||
<Label className="text-sm font-medium">Variables</Label>
|
||||
|
||||
<div className="flex gap-2 mt-2">
|
||||
<Input
|
||||
value={newVariable}
|
||||
onChange={e => setNewVariable(e.target.value)}
|
||||
placeholder="Add variable name (e.g. user_name, topic)"
|
||||
onKeyPress={e =>
|
||||
e.key === "Enter" && (e.preventDefault(), addVariable())
|
||||
}
|
||||
className="flex-1"
|
||||
/>
|
||||
<Button
|
||||
type="button"
|
||||
onClick={addVariable}
|
||||
size="sm"
|
||||
disabled={
|
||||
!newVariable.trim() ||
|
||||
formData.variables.includes(newVariable.trim())
|
||||
}
|
||||
>
|
||||
<Plus className="h-4 w-4" />
|
||||
Add
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{formData.variables.length > 0 && (
|
||||
<div className="border rounded-lg p-3 bg-muted/20">
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{formData.variables.map(variable => (
|
||||
<Badge
|
||||
key={variable}
|
||||
variant="secondary"
|
||||
className="text-sm px-2 py-1"
|
||||
>
|
||||
{variable}
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => removeVariable(variable)}
|
||||
className="ml-2 hover:text-destructive transition-colors"
|
||||
title={`Remove ${variable} variable`}
|
||||
>
|
||||
<X className="h-3 w-3" />
|
||||
</button>
|
||||
</Badge>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Variables that can be used in the prompt template. Each variable
|
||||
should match a {`{{variable}}`} placeholder in the content above.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Preview Section */}
|
||||
<div className="space-y-4">
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-lg">Preview</CardTitle>
|
||||
<CardDescription>
|
||||
Live preview of compiled prompt and variable substitution.
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
{formData.prompt ? (
|
||||
<>
|
||||
{/* Variable Values */}
|
||||
{formData.variables.length > 0 && (
|
||||
<div className="space-y-3">
|
||||
<Label className="text-sm font-medium">
|
||||
Variable Values
|
||||
</Label>
|
||||
<div className="space-y-2">
|
||||
{formData.variables.map(variable => (
|
||||
<div
|
||||
key={variable}
|
||||
className="grid grid-cols-2 gap-3 items-center"
|
||||
>
|
||||
<div className="text-sm font-mono text-muted-foreground">
|
||||
{variable}
|
||||
</div>
|
||||
<Input
|
||||
id={`var-${variable}`}
|
||||
value={variableValues[variable] || ""}
|
||||
onChange={e =>
|
||||
updateVariableValue(variable, e.target.value)
|
||||
}
|
||||
placeholder={`Enter value for ${variable}`}
|
||||
className="text-sm"
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
<Separator />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Live Preview */}
|
||||
<div>
|
||||
<Label className="text-sm font-medium mb-2 block">
|
||||
Compiled Prompt
|
||||
</Label>
|
||||
<div className="bg-muted/50 p-4 rounded-lg border">
|
||||
<div className="text-sm leading-relaxed whitespace-pre-wrap">
|
||||
{renderPreview()}
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex flex-wrap gap-4 mt-2 text-xs">
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-3 h-3 bg-green-500 dark:bg-green-400 border rounded"></div>
|
||||
<span className="text-muted-foreground">Used</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-3 h-3 bg-yellow-500 dark:bg-yellow-400 border rounded"></div>
|
||||
<span className="text-muted-foreground">Unused</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-3 h-3 bg-red-500 dark:bg-red-400 border rounded"></div>
|
||||
<span className="text-muted-foreground">Undefined</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<div className="text-center py-8">
|
||||
<div className="text-muted-foreground text-sm">
|
||||
Enter content to preview the compiled prompt
|
||||
</div>
|
||||
<div className="text-xs text-muted-foreground mt-2">
|
||||
Use {`{{variable_name}}`} to add dynamic variables
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="flex justify-between">
|
||||
<div>
|
||||
{prompt && onDelete && (
|
||||
<Button
|
||||
type="button"
|
||||
variant="destructive"
|
||||
onClick={() => {
|
||||
if (
|
||||
confirm(
|
||||
`Are you sure you want to delete this prompt? This action cannot be undone.`
|
||||
)
|
||||
) {
|
||||
onDelete(prompt.prompt_id);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Trash2 className="h-4 w-4 mr-2" />
|
||||
Delete Prompt
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<Button type="button" variant="outline" onClick={onCancel}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button type="submit">
|
||||
<Save className="h-4 w-4 mr-2" />
|
||||
{prompt ? "Update" : "Create"} Prompt
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
);
|
||||
}
|
||||
259
src/llama_stack/ui/components/prompts/prompt-list.test.tsx
Normal file
259
src/llama_stack/ui/components/prompts/prompt-list.test.tsx
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
import React from "react";
|
||||
import { render, screen, fireEvent } from "@testing-library/react";
|
||||
import "@testing-library/jest-dom";
|
||||
import { PromptList } from "./prompt-list";
|
||||
import type { Prompt } from "./types";
|
||||
|
||||
describe("PromptList", () => {
|
||||
const mockOnEdit = jest.fn();
|
||||
const mockOnDelete = jest.fn();
|
||||
|
||||
const defaultProps = {
|
||||
prompts: [],
|
||||
onEdit: mockOnEdit,
|
||||
onDelete: mockOnDelete,
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("Empty State", () => {
|
||||
test("renders empty message when no prompts", () => {
|
||||
render(<PromptList {...defaultProps} />);
|
||||
|
||||
expect(screen.getByText("No prompts yet")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("shows filtered empty message when search has no results", () => {
|
||||
const prompts: Prompt[] = [
|
||||
{
|
||||
prompt_id: "prompt_123",
|
||||
prompt: "Hello world",
|
||||
version: 1,
|
||||
variables: [],
|
||||
is_default: false,
|
||||
},
|
||||
];
|
||||
|
||||
render(<PromptList {...defaultProps} prompts={prompts} />);
|
||||
|
||||
// Search for something that doesn't exist
|
||||
const searchInput = screen.getByPlaceholderText("Search prompts...");
|
||||
fireEvent.change(searchInput, { target: { value: "nonexistent" } });
|
||||
|
||||
expect(
|
||||
screen.getByText("No prompts match your filters")
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Prompts Display", () => {
|
||||
const mockPrompts: Prompt[] = [
|
||||
{
|
||||
prompt_id: "prompt_123",
|
||||
prompt: "Hello {{name}}, how are you?",
|
||||
version: 1,
|
||||
variables: ["name"],
|
||||
is_default: true,
|
||||
},
|
||||
{
|
||||
prompt_id: "prompt_456",
|
||||
prompt: "Summarize this {{text}} in {{length}} words",
|
||||
version: 2,
|
||||
variables: ["text", "length"],
|
||||
is_default: false,
|
||||
},
|
||||
{
|
||||
prompt_id: "prompt_789",
|
||||
prompt: "Simple prompt with no variables",
|
||||
version: 1,
|
||||
variables: [],
|
||||
is_default: false,
|
||||
},
|
||||
];
|
||||
|
||||
test("renders prompts table with correct headers", () => {
|
||||
render(<PromptList {...defaultProps} prompts={mockPrompts} />);
|
||||
|
||||
expect(screen.getByText("ID")).toBeInTheDocument();
|
||||
expect(screen.getByText("Content")).toBeInTheDocument();
|
||||
expect(screen.getByText("Variables")).toBeInTheDocument();
|
||||
expect(screen.getByText("Version")).toBeInTheDocument();
|
||||
expect(screen.getByText("Actions")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("renders prompt data correctly", () => {
|
||||
render(<PromptList {...defaultProps} prompts={mockPrompts} />);
|
||||
|
||||
// Check prompt IDs
|
||||
expect(screen.getByText("prompt_123")).toBeInTheDocument();
|
||||
expect(screen.getByText("prompt_456")).toBeInTheDocument();
|
||||
expect(screen.getByText("prompt_789")).toBeInTheDocument();
|
||||
|
||||
// Check content
|
||||
expect(
|
||||
screen.getByText("Hello {{name}}, how are you?")
|
||||
).toBeInTheDocument();
|
||||
expect(
|
||||
screen.getByText("Summarize this {{text}} in {{length}} words")
|
||||
).toBeInTheDocument();
|
||||
expect(
|
||||
screen.getByText("Simple prompt with no variables")
|
||||
).toBeInTheDocument();
|
||||
|
||||
// Check versions
|
||||
expect(screen.getAllByText("1")).toHaveLength(2); // Two prompts with version 1
|
||||
expect(screen.getByText("2")).toBeInTheDocument();
|
||||
|
||||
// Check default badge
|
||||
expect(screen.getByText("Default")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("renders variables correctly", () => {
|
||||
render(<PromptList {...defaultProps} prompts={mockPrompts} />);
|
||||
|
||||
// Check variables display
|
||||
expect(screen.getByText("name")).toBeInTheDocument();
|
||||
expect(screen.getByText("text")).toBeInTheDocument();
|
||||
expect(screen.getByText("length")).toBeInTheDocument();
|
||||
expect(screen.getByText("None")).toBeInTheDocument(); // For prompt with no variables
|
||||
});
|
||||
|
||||
test("prompt ID links are clickable and call onEdit", () => {
|
||||
render(<PromptList {...defaultProps} prompts={mockPrompts} />);
|
||||
|
||||
// Click on the first prompt ID link
|
||||
const promptLink = screen.getByRole("button", { name: "prompt_123" });
|
||||
fireEvent.click(promptLink);
|
||||
|
||||
expect(mockOnEdit).toHaveBeenCalledWith(mockPrompts[0]);
|
||||
});
|
||||
|
||||
test("edit buttons call onEdit", () => {
|
||||
const { container } = render(
|
||||
<PromptList {...defaultProps} prompts={mockPrompts} />
|
||||
);
|
||||
|
||||
// Find the action buttons in the table - they should be in the last column
|
||||
const actionCells = container.querySelectorAll("td:last-child");
|
||||
const firstActionCell = actionCells[0];
|
||||
const editButton = firstActionCell?.querySelector("button");
|
||||
|
||||
expect(editButton).toBeInTheDocument();
|
||||
fireEvent.click(editButton!);
|
||||
|
||||
expect(mockOnEdit).toHaveBeenCalledWith(mockPrompts[0]);
|
||||
});
|
||||
|
||||
test("delete buttons call onDelete with confirmation", () => {
|
||||
const originalConfirm = window.confirm;
|
||||
window.confirm = jest.fn(() => true);
|
||||
|
||||
const { container } = render(
|
||||
<PromptList {...defaultProps} prompts={mockPrompts} />
|
||||
);
|
||||
|
||||
// Find the delete button (second button in the first action cell)
|
||||
const actionCells = container.querySelectorAll("td:last-child");
|
||||
const firstActionCell = actionCells[0];
|
||||
const buttons = firstActionCell?.querySelectorAll("button");
|
||||
const deleteButton = buttons?.[1]; // Second button should be delete
|
||||
|
||||
expect(deleteButton).toBeInTheDocument();
|
||||
fireEvent.click(deleteButton!);
|
||||
|
||||
expect(window.confirm).toHaveBeenCalledWith(
|
||||
"Are you sure you want to delete this prompt? This action cannot be undone."
|
||||
);
|
||||
expect(mockOnDelete).toHaveBeenCalledWith("prompt_123");
|
||||
|
||||
window.confirm = originalConfirm;
|
||||
});
|
||||
|
||||
test("delete does not execute when confirmation is cancelled", () => {
|
||||
const originalConfirm = window.confirm;
|
||||
window.confirm = jest.fn(() => false);
|
||||
|
||||
const { container } = render(
|
||||
<PromptList {...defaultProps} prompts={mockPrompts} />
|
||||
);
|
||||
|
||||
const actionCells = container.querySelectorAll("td:last-child");
|
||||
const firstActionCell = actionCells[0];
|
||||
const buttons = firstActionCell?.querySelectorAll("button");
|
||||
const deleteButton = buttons?.[1]; // Second button should be delete
|
||||
|
||||
expect(deleteButton).toBeInTheDocument();
|
||||
fireEvent.click(deleteButton!);
|
||||
|
||||
expect(mockOnDelete).not.toHaveBeenCalled();
|
||||
|
||||
window.confirm = originalConfirm;
|
||||
});
|
||||
});
|
||||
|
||||
describe("Search Functionality", () => {
|
||||
const mockPrompts: Prompt[] = [
|
||||
{
|
||||
prompt_id: "user_greeting",
|
||||
prompt: "Hello {{name}}, welcome!",
|
||||
version: 1,
|
||||
variables: ["name"],
|
||||
is_default: true,
|
||||
},
|
||||
{
|
||||
prompt_id: "system_summary",
|
||||
prompt: "Summarize the following text",
|
||||
version: 1,
|
||||
variables: [],
|
||||
is_default: false,
|
||||
},
|
||||
];
|
||||
|
||||
test("filters prompts by prompt ID", () => {
|
||||
render(<PromptList {...defaultProps} prompts={mockPrompts} />);
|
||||
|
||||
const searchInput = screen.getByPlaceholderText("Search prompts...");
|
||||
fireEvent.change(searchInput, { target: { value: "user" } });
|
||||
|
||||
expect(screen.getByText("user_greeting")).toBeInTheDocument();
|
||||
expect(screen.queryByText("system_summary")).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("filters prompts by content", () => {
|
||||
render(<PromptList {...defaultProps} prompts={mockPrompts} />);
|
||||
|
||||
const searchInput = screen.getByPlaceholderText("Search prompts...");
|
||||
fireEvent.change(searchInput, { target: { value: "welcome" } });
|
||||
|
||||
expect(screen.getByText("user_greeting")).toBeInTheDocument();
|
||||
expect(screen.queryByText("system_summary")).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("search is case insensitive", () => {
|
||||
render(<PromptList {...defaultProps} prompts={mockPrompts} />);
|
||||
|
||||
const searchInput = screen.getByPlaceholderText("Search prompts...");
|
||||
fireEvent.change(searchInput, { target: { value: "HELLO" } });
|
||||
|
||||
expect(screen.getByText("user_greeting")).toBeInTheDocument();
|
||||
expect(screen.queryByText("system_summary")).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("clearing search shows all prompts", () => {
|
||||
render(<PromptList {...defaultProps} prompts={mockPrompts} />);
|
||||
|
||||
const searchInput = screen.getByPlaceholderText("Search prompts...");
|
||||
|
||||
// Filter first
|
||||
fireEvent.change(searchInput, { target: { value: "user" } });
|
||||
expect(screen.queryByText("system_summary")).not.toBeInTheDocument();
|
||||
|
||||
// Clear search
|
||||
fireEvent.change(searchInput, { target: { value: "" } });
|
||||
expect(screen.getByText("user_greeting")).toBeInTheDocument();
|
||||
expect(screen.getByText("system_summary")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
});
|
||||
164
src/llama_stack/ui/components/prompts/prompt-list.tsx
Normal file
164
src/llama_stack/ui/components/prompts/prompt-list.tsx
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import { Badge } from "@/components/ui/badge";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableHead,
|
||||
TableHeader,
|
||||
TableRow,
|
||||
} from "@/components/ui/table";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Edit, Search, Trash2 } from "lucide-react";
|
||||
import { Prompt, PromptFilters } from "./types";
|
||||
|
||||
interface PromptListProps {
|
||||
prompts: Prompt[];
|
||||
onEdit: (prompt: Prompt) => void;
|
||||
onDelete: (promptId: string) => void;
|
||||
}
|
||||
|
||||
export function PromptList({ prompts, onEdit, onDelete }: PromptListProps) {
|
||||
const [filters, setFilters] = useState<PromptFilters>({});
|
||||
|
||||
const filteredPrompts = prompts.filter(prompt => {
|
||||
if (
|
||||
filters.searchTerm &&
|
||||
!(
|
||||
prompt.prompt
|
||||
?.toLowerCase()
|
||||
.includes(filters.searchTerm.toLowerCase()) ||
|
||||
prompt.prompt_id
|
||||
.toLowerCase()
|
||||
.includes(filters.searchTerm.toLowerCase())
|
||||
)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
{/* Filters */}
|
||||
<div className="flex flex-col sm:flex-row gap-4">
|
||||
<div className="relative flex-1">
|
||||
<Search className="absolute left-3 top-1/2 transform -translate-y-1/2 text-muted-foreground h-4 w-4" />
|
||||
<Input
|
||||
placeholder="Search prompts..."
|
||||
value={filters.searchTerm || ""}
|
||||
onChange={e =>
|
||||
setFilters(prev => ({ ...prev, searchTerm: e.target.value }))
|
||||
}
|
||||
className="pl-10"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Prompts Table */}
|
||||
<div className="overflow-auto">
|
||||
<Table>
|
||||
<TableHeader>
|
||||
<TableRow>
|
||||
<TableHead>ID</TableHead>
|
||||
<TableHead>Content</TableHead>
|
||||
<TableHead>Variables</TableHead>
|
||||
<TableHead>Version</TableHead>
|
||||
<TableHead>Actions</TableHead>
|
||||
</TableRow>
|
||||
</TableHeader>
|
||||
<TableBody>
|
||||
{filteredPrompts.map(prompt => (
|
||||
<TableRow key={prompt.prompt_id}>
|
||||
<TableCell className="max-w-48">
|
||||
<Button
|
||||
variant="link"
|
||||
className="p-0 h-auto font-mono text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300 max-w-full justify-start"
|
||||
onClick={() => onEdit(prompt)}
|
||||
title={prompt.prompt_id}
|
||||
>
|
||||
<div className="truncate">{prompt.prompt_id}</div>
|
||||
</Button>
|
||||
</TableCell>
|
||||
<TableCell className="max-w-64">
|
||||
<div
|
||||
className="font-mono text-xs text-muted-foreground truncate"
|
||||
title={prompt.prompt || "No content"}
|
||||
>
|
||||
{prompt.prompt || "No content"}
|
||||
</div>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
{prompt.variables.length > 0 ? (
|
||||
<div className="flex flex-wrap gap-1">
|
||||
{prompt.variables.map(variable => (
|
||||
<Badge
|
||||
key={variable}
|
||||
variant="outline"
|
||||
className="text-xs"
|
||||
>
|
||||
{variable}
|
||||
</Badge>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<span className="text-muted-foreground text-sm">None</span>
|
||||
)}
|
||||
</TableCell>
|
||||
<TableCell className="text-sm">
|
||||
{prompt.version}
|
||||
{prompt.is_default && (
|
||||
<Badge variant="secondary" className="text-xs ml-2">
|
||||
Default
|
||||
</Badge>
|
||||
)}
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<div className="flex gap-1">
|
||||
<Button
|
||||
size="sm"
|
||||
variant="outline"
|
||||
onClick={() => onEdit(prompt)}
|
||||
className="h-8 w-8 p-0"
|
||||
>
|
||||
<Edit className="h-3 w-3" />
|
||||
</Button>
|
||||
<Button
|
||||
size="sm"
|
||||
variant="outline"
|
||||
onClick={() => {
|
||||
if (
|
||||
confirm(
|
||||
`Are you sure you want to delete this prompt? This action cannot be undone.`
|
||||
)
|
||||
) {
|
||||
onDelete(prompt.prompt_id);
|
||||
}
|
||||
}}
|
||||
className="h-8 w-8 p-0 text-destructive hover:text-destructive"
|
||||
>
|
||||
<Trash2 className="h-3 w-3" />
|
||||
</Button>
|
||||
</div>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</div>
|
||||
|
||||
{filteredPrompts.length === 0 && (
|
||||
<div className="text-center py-12">
|
||||
<div className="text-muted-foreground">
|
||||
{prompts.length === 0
|
||||
? "No prompts yet"
|
||||
: "No prompts match your filters"}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
304
src/llama_stack/ui/components/prompts/prompt-management.test.tsx
Normal file
304
src/llama_stack/ui/components/prompts/prompt-management.test.tsx
Normal file
|
|
@ -0,0 +1,304 @@
|
|||
import React from "react";
|
||||
import { render, screen, fireEvent, waitFor } from "@testing-library/react";
|
||||
import "@testing-library/jest-dom";
|
||||
import { PromptManagement } from "./prompt-management";
|
||||
import type { Prompt } from "./types";
|
||||
|
||||
// Mock the auth client
|
||||
const mockPromptsClient = {
|
||||
list: jest.fn(),
|
||||
create: jest.fn(),
|
||||
update: jest.fn(),
|
||||
delete: jest.fn(),
|
||||
};
|
||||
|
||||
jest.mock("@/hooks/use-auth-client", () => ({
|
||||
useAuthClient: () => ({
|
||||
prompts: mockPromptsClient,
|
||||
}),
|
||||
}));
|
||||
|
||||
describe("PromptManagement", () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("Loading State", () => {
|
||||
test("renders loading state initially", () => {
|
||||
mockPromptsClient.list.mockReturnValue(new Promise(() => {})); // Never resolves
|
||||
render(<PromptManagement />);
|
||||
|
||||
expect(screen.getByText("Loading prompts...")).toBeInTheDocument();
|
||||
expect(screen.getByText("Prompts")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Empty State", () => {
|
||||
test("renders empty state when no prompts", async () => {
|
||||
mockPromptsClient.list.mockResolvedValue([]);
|
||||
render(<PromptManagement />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("No prompts found.")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
expect(screen.getByText("Create Your First Prompt")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("opens modal when clicking 'Create Your First Prompt'", async () => {
|
||||
mockPromptsClient.list.mockResolvedValue([]);
|
||||
render(<PromptManagement />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByText("Create Your First Prompt")
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
fireEvent.click(screen.getByText("Create Your First Prompt"));
|
||||
|
||||
expect(screen.getByText("Create New Prompt")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Error State", () => {
|
||||
test("renders error state when API fails", async () => {
|
||||
const error = new Error("API not found");
|
||||
mockPromptsClient.list.mockRejectedValue(error);
|
||||
render(<PromptManagement />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText(/Error:/)).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
test("renders specific error for 404", async () => {
|
||||
const error = new Error("404 Not found");
|
||||
mockPromptsClient.list.mockRejectedValue(error);
|
||||
render(<PromptManagement />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByText(/Prompts API endpoint not found/)
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Prompts List", () => {
|
||||
const mockPrompts: Prompt[] = [
|
||||
{
|
||||
prompt_id: "prompt_123",
|
||||
prompt: "Hello {{name}}, how are you?",
|
||||
version: 1,
|
||||
variables: ["name"],
|
||||
is_default: true,
|
||||
},
|
||||
{
|
||||
prompt_id: "prompt_456",
|
||||
prompt: "Summarize this {{text}}",
|
||||
version: 2,
|
||||
variables: ["text"],
|
||||
is_default: false,
|
||||
},
|
||||
];
|
||||
|
||||
test("renders prompts list correctly", async () => {
|
||||
mockPromptsClient.list.mockResolvedValue(mockPrompts);
|
||||
render(<PromptManagement />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("prompt_123")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
expect(screen.getByText("prompt_456")).toBeInTheDocument();
|
||||
expect(
|
||||
screen.getByText("Hello {{name}}, how are you?")
|
||||
).toBeInTheDocument();
|
||||
expect(screen.getByText("Summarize this {{text}}")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("opens modal when clicking 'New Prompt' button", async () => {
|
||||
mockPromptsClient.list.mockResolvedValue(mockPrompts);
|
||||
render(<PromptManagement />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("prompt_123")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
fireEvent.click(screen.getByText("New Prompt"));
|
||||
|
||||
expect(screen.getByText("Create New Prompt")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Modal Operations", () => {
|
||||
const mockPrompts: Prompt[] = [
|
||||
{
|
||||
prompt_id: "prompt_123",
|
||||
prompt: "Hello {{name}}",
|
||||
version: 1,
|
||||
variables: ["name"],
|
||||
is_default: true,
|
||||
},
|
||||
];
|
||||
|
||||
test("closes modal when clicking cancel", async () => {
|
||||
mockPromptsClient.list.mockResolvedValue(mockPrompts);
|
||||
render(<PromptManagement />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("prompt_123")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Open modal
|
||||
fireEvent.click(screen.getByText("New Prompt"));
|
||||
expect(screen.getByText("Create New Prompt")).toBeInTheDocument();
|
||||
|
||||
// Close modal
|
||||
fireEvent.click(screen.getByText("Cancel"));
|
||||
expect(screen.queryByText("Create New Prompt")).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
test("creates new prompt successfully", async () => {
|
||||
const newPrompt: Prompt = {
|
||||
prompt_id: "prompt_new",
|
||||
prompt: "New prompt content",
|
||||
version: 1,
|
||||
variables: [],
|
||||
is_default: false,
|
||||
};
|
||||
|
||||
mockPromptsClient.list.mockResolvedValue(mockPrompts);
|
||||
mockPromptsClient.create.mockResolvedValue(newPrompt);
|
||||
render(<PromptManagement />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("prompt_123")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Open modal
|
||||
fireEvent.click(screen.getByText("New Prompt"));
|
||||
|
||||
// Fill form
|
||||
const promptInput = screen.getByLabelText("Prompt Content *");
|
||||
fireEvent.change(promptInput, {
|
||||
target: { value: "New prompt content" },
|
||||
});
|
||||
|
||||
// Submit form
|
||||
fireEvent.click(screen.getByText("Create Prompt"));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockPromptsClient.create).toHaveBeenCalledWith({
|
||||
prompt: "New prompt content",
|
||||
variables: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
test("handles create error gracefully", async () => {
|
||||
const error = {
|
||||
detail: {
|
||||
errors: [{ msg: "Prompt contains undeclared variables: ['test']" }],
|
||||
},
|
||||
};
|
||||
|
||||
mockPromptsClient.list.mockResolvedValue(mockPrompts);
|
||||
mockPromptsClient.create.mockRejectedValue(error);
|
||||
render(<PromptManagement />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("prompt_123")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Open modal
|
||||
fireEvent.click(screen.getByText("New Prompt"));
|
||||
|
||||
// Fill form
|
||||
const promptInput = screen.getByLabelText("Prompt Content *");
|
||||
fireEvent.change(promptInput, { target: { value: "Hello {{test}}" } });
|
||||
|
||||
// Submit form
|
||||
fireEvent.click(screen.getByText("Create Prompt"));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
screen.getByText("Prompt contains undeclared variables: ['test']")
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
test("updates existing prompt successfully", async () => {
|
||||
const updatedPrompt: Prompt = {
|
||||
...mockPrompts[0],
|
||||
prompt: "Updated content",
|
||||
};
|
||||
|
||||
mockPromptsClient.list.mockResolvedValue(mockPrompts);
|
||||
mockPromptsClient.update.mockResolvedValue(updatedPrompt);
|
||||
const { container } = render(<PromptManagement />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("prompt_123")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Click edit button (first button in the action cell of the first row)
|
||||
const actionCells = container.querySelectorAll("td:last-child");
|
||||
const firstActionCell = actionCells[0];
|
||||
const editButton = firstActionCell?.querySelector("button");
|
||||
|
||||
expect(editButton).toBeInTheDocument();
|
||||
fireEvent.click(editButton!);
|
||||
|
||||
expect(screen.getByText("Edit Prompt")).toBeInTheDocument();
|
||||
|
||||
// Update content
|
||||
const promptInput = screen.getByLabelText("Prompt Content *");
|
||||
fireEvent.change(promptInput, { target: { value: "Updated content" } });
|
||||
|
||||
// Submit form
|
||||
fireEvent.click(screen.getByText("Update Prompt"));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockPromptsClient.update).toHaveBeenCalledWith("prompt_123", {
|
||||
prompt: "Updated content",
|
||||
variables: ["name"],
|
||||
version: 1,
|
||||
set_as_default: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
test("deletes prompt successfully", async () => {
|
||||
mockPromptsClient.list.mockResolvedValue(mockPrompts);
|
||||
mockPromptsClient.delete.mockResolvedValue(undefined);
|
||||
|
||||
// Mock window.confirm
|
||||
const originalConfirm = window.confirm;
|
||||
window.confirm = jest.fn(() => true);
|
||||
|
||||
const { container } = render(<PromptManagement />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("prompt_123")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Click delete button (second button in the action cell of the first row)
|
||||
const actionCells = container.querySelectorAll("td:last-child");
|
||||
const firstActionCell = actionCells[0];
|
||||
const buttons = firstActionCell?.querySelectorAll("button");
|
||||
const deleteButton = buttons?.[1]; // Second button should be delete
|
||||
|
||||
expect(deleteButton).toBeInTheDocument();
|
||||
fireEvent.click(deleteButton!);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(mockPromptsClient.delete).toHaveBeenCalledWith("prompt_123");
|
||||
});
|
||||
|
||||
// Restore window.confirm
|
||||
window.confirm = originalConfirm;
|
||||
});
|
||||
});
|
||||
});
|
||||
233
src/llama_stack/ui/components/prompts/prompt-management.tsx
Normal file
233
src/llama_stack/ui/components/prompts/prompt-management.tsx
Normal file
|
|
@ -0,0 +1,233 @@
|
|||
"use client";
|
||||
|
||||
import { useState, useEffect } from "react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Plus } from "lucide-react";
|
||||
import { PromptList } from "./prompt-list";
|
||||
import { PromptEditor } from "./prompt-editor";
|
||||
import { Prompt, PromptFormData } from "./types";
|
||||
import { useAuthClient } from "@/hooks/use-auth-client";
|
||||
|
||||
export function PromptManagement() {
|
||||
const [prompts, setPrompts] = useState<Prompt[]>([]);
|
||||
const [showPromptModal, setShowPromptModal] = useState(false);
|
||||
const [editingPrompt, setEditingPrompt] = useState<Prompt | undefined>();
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null); // For main page errors (loading, etc.)
|
||||
const [modalError, setModalError] = useState<string | null>(null); // For form submission errors
|
||||
const client = useAuthClient();
|
||||
|
||||
// Load prompts from API on component mount
|
||||
useEffect(() => {
|
||||
const fetchPrompts = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
|
||||
const response = await client.prompts.list();
|
||||
setPrompts(response || []);
|
||||
} catch (err: unknown) {
|
||||
console.error("Failed to load prompts:", err);
|
||||
|
||||
// Handle different types of errors
|
||||
const error = err as Error & { status?: number };
|
||||
if (error?.message?.includes("404") || error?.status === 404) {
|
||||
setError(
|
||||
"Prompts API endpoint not found. Please ensure your Llama Stack server supports the prompts API."
|
||||
);
|
||||
} else if (
|
||||
error?.message?.includes("not implemented") ||
|
||||
error?.message?.includes("not supported")
|
||||
) {
|
||||
setError(
|
||||
"Prompts API is not yet implemented on this Llama Stack server."
|
||||
);
|
||||
} else {
|
||||
setError(
|
||||
`Failed to load prompts: ${error?.message || "Unknown error"}`
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
fetchPrompts();
|
||||
}, [client]);
|
||||
|
||||
const handleSavePrompt = async (formData: PromptFormData) => {
|
||||
try {
|
||||
setModalError(null);
|
||||
|
||||
if (editingPrompt) {
|
||||
// Update existing prompt
|
||||
const response = await client.prompts.update(editingPrompt.prompt_id, {
|
||||
prompt: formData.prompt,
|
||||
variables: formData.variables,
|
||||
version: editingPrompt.version,
|
||||
set_as_default: true,
|
||||
});
|
||||
|
||||
// Update local state
|
||||
setPrompts(prev =>
|
||||
prev.map(p =>
|
||||
p.prompt_id === editingPrompt.prompt_id ? response : p
|
||||
)
|
||||
);
|
||||
} else {
|
||||
// Create new prompt
|
||||
const response = await client.prompts.create({
|
||||
prompt: formData.prompt,
|
||||
variables: formData.variables,
|
||||
});
|
||||
|
||||
// Add to local state
|
||||
setPrompts(prev => [response, ...prev]);
|
||||
}
|
||||
|
||||
setShowPromptModal(false);
|
||||
setEditingPrompt(undefined);
|
||||
} catch (err) {
|
||||
console.error("Failed to save prompt:", err);
|
||||
|
||||
// Extract specific error message from API response
|
||||
const error = err as Error & {
|
||||
message?: string;
|
||||
detail?: { errors?: Array<{ msg?: string }> };
|
||||
};
|
||||
|
||||
// Try to parse JSON from error message if it's a string
|
||||
let parsedError = error;
|
||||
if (typeof error?.message === "string" && error.message.includes("{")) {
|
||||
try {
|
||||
const jsonMatch = error.message.match(/\d+\s+(.+)/);
|
||||
if (jsonMatch) {
|
||||
parsedError = JSON.parse(jsonMatch[1]);
|
||||
}
|
||||
} catch {
|
||||
// If parsing fails, use original error
|
||||
}
|
||||
}
|
||||
|
||||
// Try to get the specific validation error message
|
||||
const validationError = parsedError?.detail?.errors?.[0]?.msg;
|
||||
if (validationError) {
|
||||
// Clean up validation error messages (remove "Value error, " prefix if present)
|
||||
const cleanMessage = validationError.replace(/^Value error,\s*/i, "");
|
||||
setModalError(cleanMessage);
|
||||
} else {
|
||||
// For other errors, format them nicely with line breaks
|
||||
const statusMatch = error?.message?.match(/(\d+)\s+(.+)/);
|
||||
if (statusMatch) {
|
||||
const statusCode = statusMatch[1];
|
||||
const response = statusMatch[2];
|
||||
setModalError(
|
||||
`Failed to save prompt: Status Code ${statusCode}\n\nResponse: ${response}`
|
||||
);
|
||||
} else {
|
||||
const message = error?.message || error?.detail || "Unknown error";
|
||||
setModalError(`Failed to save prompt: ${message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const handleEditPrompt = (prompt: Prompt) => {
|
||||
setEditingPrompt(prompt);
|
||||
setShowPromptModal(true);
|
||||
setModalError(null); // Clear any previous modal errors
|
||||
};
|
||||
|
||||
const handleDeletePrompt = async (promptId: string) => {
|
||||
try {
|
||||
setError(null);
|
||||
await client.prompts.delete(promptId);
|
||||
setPrompts(prev => prev.filter(p => p.prompt_id !== promptId));
|
||||
|
||||
// If we're deleting the currently editing prompt, close the modal
|
||||
if (editingPrompt && editingPrompt.prompt_id === promptId) {
|
||||
setShowPromptModal(false);
|
||||
setEditingPrompt(undefined);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Failed to delete prompt:", err);
|
||||
setError("Failed to delete prompt");
|
||||
}
|
||||
};
|
||||
|
||||
const handleCreateNew = () => {
|
||||
setEditingPrompt(undefined);
|
||||
setShowPromptModal(true);
|
||||
setModalError(null); // Clear any previous modal errors
|
||||
};
|
||||
|
||||
const handleCancel = () => {
|
||||
setShowPromptModal(false);
|
||||
setEditingPrompt(undefined);
|
||||
};
|
||||
|
||||
const renderContent = () => {
|
||||
if (loading) {
|
||||
return <div className="text-muted-foreground">Loading prompts...</div>;
|
||||
}
|
||||
|
||||
if (error) {
|
||||
return <div className="text-destructive">Error: {error}</div>;
|
||||
}
|
||||
|
||||
if (!prompts || prompts.length === 0) {
|
||||
return (
|
||||
<div className="text-center py-12">
|
||||
<p className="text-muted-foreground mb-4">No prompts found.</p>
|
||||
<Button onClick={handleCreateNew}>
|
||||
<Plus className="h-4 w-4 mr-2" />
|
||||
Create Your First Prompt
|
||||
</Button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<PromptList
|
||||
prompts={prompts}
|
||||
onEdit={handleEditPrompt}
|
||||
onDelete={handleDeletePrompt}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<h1 className="text-2xl font-semibold">Prompts</h1>
|
||||
<Button onClick={handleCreateNew} disabled={loading}>
|
||||
<Plus className="h-4 w-4 mr-2" />
|
||||
New Prompt
|
||||
</Button>
|
||||
</div>
|
||||
{renderContent()}
|
||||
|
||||
{/* Create/Edit Prompt Modal */}
|
||||
{showPromptModal && (
|
||||
<div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50">
|
||||
<div className="bg-background border rounded-lg shadow-lg max-w-4xl w-full mx-4 max-h-[90vh] overflow-hidden">
|
||||
<div className="p-6 border-b">
|
||||
<h2 className="text-2xl font-bold">
|
||||
{editingPrompt ? "Edit Prompt" : "Create New Prompt"}
|
||||
</h2>
|
||||
</div>
|
||||
<div className="p-6 overflow-y-auto max-h-[calc(90vh-120px)]">
|
||||
<PromptEditor
|
||||
prompt={editingPrompt}
|
||||
onSave={handleSavePrompt}
|
||||
onCancel={handleCancel}
|
||||
onDelete={handleDeletePrompt}
|
||||
error={modalError}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
16
src/llama_stack/ui/components/prompts/types.ts
Normal file
16
src/llama_stack/ui/components/prompts/types.ts
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
export interface Prompt {
|
||||
prompt_id: string;
|
||||
prompt: string | null;
|
||||
version: number;
|
||||
variables: string[];
|
||||
is_default: boolean;
|
||||
}
|
||||
|
||||
export interface PromptFormData {
|
||||
prompt: string;
|
||||
variables: string[];
|
||||
}
|
||||
|
||||
export interface PromptFilters {
|
||||
searchTerm?: string;
|
||||
}
|
||||
36
src/llama_stack/ui/components/ui/badge.tsx
Normal file
36
src/llama_stack/ui/components/ui/badge.tsx
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
import * as React from "react";
|
||||
import { cva, type VariantProps } from "class-variance-authority";
|
||||
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
const badgeVariants = cva(
|
||||
"inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2",
|
||||
{
|
||||
variants: {
|
||||
variant: {
|
||||
default:
|
||||
"border-transparent bg-primary text-primary-foreground hover:bg-primary/80",
|
||||
secondary:
|
||||
"border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80",
|
||||
destructive:
|
||||
"border-transparent bg-destructive text-destructive-foreground hover:bg-destructive/80",
|
||||
outline: "text-foreground",
|
||||
},
|
||||
},
|
||||
defaultVariants: {
|
||||
variant: "default",
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
export interface BadgeProps
|
||||
extends React.HTMLAttributes<HTMLDivElement>,
|
||||
VariantProps<typeof badgeVariants> {}
|
||||
|
||||
function Badge({ className, variant, ...props }: BadgeProps) {
|
||||
return (
|
||||
<div className={cn(badgeVariants({ variant }), className)} {...props} />
|
||||
);
|
||||
}
|
||||
|
||||
export { Badge, badgeVariants };
|
||||
24
src/llama_stack/ui/components/ui/label.tsx
Normal file
24
src/llama_stack/ui/components/ui/label.tsx
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
import * as React from "react";
|
||||
import * as LabelPrimitive from "@radix-ui/react-label";
|
||||
import { cva, type VariantProps } from "class-variance-authority";
|
||||
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
const labelVariants = cva(
|
||||
"text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70"
|
||||
);
|
||||
|
||||
const Label = React.forwardRef<
|
||||
React.ElementRef<typeof LabelPrimitive.Root>,
|
||||
React.ComponentPropsWithoutRef<typeof LabelPrimitive.Root> &
|
||||
VariantProps<typeof labelVariants>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<LabelPrimitive.Root
|
||||
ref={ref}
|
||||
className={cn(labelVariants(), className)}
|
||||
{...props}
|
||||
/>
|
||||
));
|
||||
Label.displayName = LabelPrimitive.Root.displayName;
|
||||
|
||||
export { Label };
|
||||
53
src/llama_stack/ui/components/ui/tabs.tsx
Normal file
53
src/llama_stack/ui/components/ui/tabs.tsx
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import * as React from "react";
|
||||
import * as TabsPrimitive from "@radix-ui/react-tabs";
|
||||
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
const Tabs = TabsPrimitive.Root;
|
||||
|
||||
const TabsList = React.forwardRef<
|
||||
React.ElementRef<typeof TabsPrimitive.List>,
|
||||
React.ComponentPropsWithoutRef<typeof TabsPrimitive.List>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<TabsPrimitive.List
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"inline-flex h-10 items-center justify-center rounded-md bg-muted p-1 text-muted-foreground",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
));
|
||||
TabsList.displayName = TabsPrimitive.List.displayName;
|
||||
|
||||
const TabsTrigger = React.forwardRef<
|
||||
React.ElementRef<typeof TabsPrimitive.Trigger>,
|
||||
React.ComponentPropsWithoutRef<typeof TabsPrimitive.Trigger>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<TabsPrimitive.Trigger
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"inline-flex items-center justify-center whitespace-nowrap rounded-sm px-3 py-1.5 text-sm font-medium ring-offset-background transition-all focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 data-[state=active]:bg-background data-[state=active]:text-foreground data-[state=active]:shadow-sm",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
));
|
||||
TabsTrigger.displayName = TabsPrimitive.Trigger.displayName;
|
||||
|
||||
const TabsContent = React.forwardRef<
|
||||
React.ElementRef<typeof TabsPrimitive.Content>,
|
||||
React.ComponentPropsWithoutRef<typeof TabsPrimitive.Content>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<TabsPrimitive.Content
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"mt-2 ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
));
|
||||
TabsContent.displayName = TabsPrimitive.Content.displayName;
|
||||
|
||||
export { Tabs, TabsList, TabsTrigger, TabsContent };
|
||||
23
src/llama_stack/ui/components/ui/textarea.tsx
Normal file
23
src/llama_stack/ui/components/ui/textarea.tsx
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import * as React from "react";
|
||||
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
export type TextareaProps = React.TextareaHTMLAttributes<HTMLTextAreaElement>;
|
||||
|
||||
const Textarea = React.forwardRef<HTMLTextAreaElement, TextareaProps>(
|
||||
({ className, ...props }, ref) => {
|
||||
return (
|
||||
<textarea
|
||||
className={cn(
|
||||
"flex min-h-[80px] w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
|
||||
className
|
||||
)}
|
||||
ref={ref}
|
||||
{...props}
|
||||
/>
|
||||
);
|
||||
}
|
||||
);
|
||||
Textarea.displayName = "Textarea";
|
||||
|
||||
export { Textarea };
|
||||
62
src/llama_stack/ui/package-lock.json
generated
62
src/llama_stack/ui/package-lock.json
generated
|
|
@ -11,14 +11,16 @@
|
|||
"@radix-ui/react-collapsible": "^1.1.12",
|
||||
"@radix-ui/react-dialog": "^1.1.15",
|
||||
"@radix-ui/react-dropdown-menu": "^2.1.16",
|
||||
"@radix-ui/react-label": "^2.1.7",
|
||||
"@radix-ui/react-select": "^2.2.6",
|
||||
"@radix-ui/react-separator": "^1.1.7",
|
||||
"@radix-ui/react-slot": "^1.2.3",
|
||||
"@radix-ui/react-tabs": "^1.1.13",
|
||||
"@radix-ui/react-tooltip": "^1.2.8",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"framer-motion": "^12.23.24",
|
||||
"llama-stack-client": "^0.3.0",
|
||||
"llama-stack-client": "github:llamastack/llama-stack-client-typescript",
|
||||
"lucide-react": "^0.545.0",
|
||||
"next": "15.5.4",
|
||||
"next-auth": "^4.24.11",
|
||||
|
|
@ -2597,6 +2599,29 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-label": {
|
||||
"version": "2.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-label/-/react-label-2.1.7.tgz",
|
||||
"integrity": "sha512-YT1GqPSL8kJn20djelMX7/cTRp/Y9w5IZHvfxQTVHrOqa2yMl7i/UfMqKRU5V7mEyKTrUVgJXhNQPVCG8PBLoQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@radix-ui/react-primitive": "2.1.3"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "*",
|
||||
"@types/react-dom": "*",
|
||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
},
|
||||
"@types/react-dom": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-menu": {
|
||||
"version": "2.1.16",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz",
|
||||
|
|
@ -2855,6 +2880,36 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-tabs": {
|
||||
"version": "1.1.13",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-tabs/-/react-tabs-1.1.13.tgz",
|
||||
"integrity": "sha512-7xdcatg7/U+7+Udyoj2zodtI9H/IIopqo+YOIcZOq1nJwXWBZ9p8xiu5llXlekDbZkca79a/fozEYQXIA4sW6A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@radix-ui/primitive": "1.1.3",
|
||||
"@radix-ui/react-context": "1.1.2",
|
||||
"@radix-ui/react-direction": "1.1.1",
|
||||
"@radix-ui/react-id": "1.1.1",
|
||||
"@radix-ui/react-presence": "1.1.5",
|
||||
"@radix-ui/react-primitive": "2.1.3",
|
||||
"@radix-ui/react-roving-focus": "1.1.11",
|
||||
"@radix-ui/react-use-controllable-state": "1.2.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "*",
|
||||
"@types/react-dom": "*",
|
||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
},
|
||||
"@types/react-dom": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-tooltip": {
|
||||
"version": "1.2.8",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz",
|
||||
|
|
@ -9629,9 +9684,8 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/llama-stack-client": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.0.tgz",
|
||||
"integrity": "sha512-76K/t1doaGmlBbDxCADaral9Vccvys9P8pqAMIhwBhMAqWudCEORrMMhUSg+pjhamWmEKj3wa++d4zeOGbfN/w==",
|
||||
"version": "0.4.0-alpha.1",
|
||||
"resolved": "git+ssh://git@github.com/llamastack/llama-stack-client-typescript.git#78de4862c4b7d77939ac210fa9f9bde77a2c5c5f",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/node": "^18.11.18",
|
||||
|
|
|
|||
|
|
@ -16,14 +16,16 @@
|
|||
"@radix-ui/react-collapsible": "^1.1.12",
|
||||
"@radix-ui/react-dialog": "^1.1.15",
|
||||
"@radix-ui/react-dropdown-menu": "^2.1.16",
|
||||
"@radix-ui/react-label": "^2.1.7",
|
||||
"@radix-ui/react-select": "^2.2.6",
|
||||
"@radix-ui/react-separator": "^1.1.7",
|
||||
"@radix-ui/react-slot": "^1.2.3",
|
||||
"@radix-ui/react-tabs": "^1.1.13",
|
||||
"@radix-ui/react-tooltip": "^1.2.8",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"framer-motion": "^12.23.24",
|
||||
"llama-stack-client": "^0.3.0",
|
||||
"llama-stack-client": "github:llamastack/llama-stack-client-typescript",
|
||||
"lucide-react": "^0.545.0",
|
||||
"next": "15.5.4",
|
||||
"next-auth": "^4.24.11",
|
||||
|
|
|
|||
|
|
@ -84,5 +84,6 @@
|
|||
}
|
||||
],
|
||||
"is_streaming": false
|
||||
}
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -171,6 +171,10 @@ def pytest_addoption(parser):
|
|||
"--embedding-model",
|
||||
help="comma-separated list of embedding models. Fixture name: embedding_model_id",
|
||||
)
|
||||
parser.addoption(
|
||||
"--rerank-model",
|
||||
help="comma-separated list of rerank models. Fixture name: rerank_model_id",
|
||||
)
|
||||
parser.addoption(
|
||||
"--safety-shield",
|
||||
help="comma-separated list of safety shields. Fixture name: shield_id",
|
||||
|
|
@ -249,6 +253,7 @@ def pytest_generate_tests(metafunc):
|
|||
"shield_id": ("--safety-shield", "shield"),
|
||||
"judge_model_id": ("--judge-model", "judge"),
|
||||
"embedding_dimension": ("--embedding-dimension", "dim"),
|
||||
"rerank_model_id": ("--rerank-model", "rerank"),
|
||||
}
|
||||
|
||||
# Collect all parameters and their values
|
||||
|
|
|
|||
|
|
@ -153,6 +153,7 @@ def client_with_models(
|
|||
vision_model_id,
|
||||
embedding_model_id,
|
||||
judge_model_id,
|
||||
rerank_model_id,
|
||||
):
|
||||
client = llama_stack_client
|
||||
|
||||
|
|
@ -170,6 +171,9 @@ def client_with_models(
|
|||
|
||||
if embedding_model_id and embedding_model_id not in model_ids:
|
||||
raise ValueError(f"embedding_model_id {embedding_model_id} not found")
|
||||
|
||||
if rerank_model_id and rerank_model_id not in model_ids:
|
||||
raise ValueError(f"rerank_model_id {rerank_model_id} not found")
|
||||
return client
|
||||
|
||||
|
||||
|
|
@ -185,7 +189,14 @@ def model_providers(llama_stack_client):
|
|||
|
||||
@pytest.fixture(autouse=True)
|
||||
def skip_if_no_model(request):
|
||||
model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id", "shield_id"]
|
||||
model_fixtures = [
|
||||
"text_model_id",
|
||||
"vision_model_id",
|
||||
"embedding_model_id",
|
||||
"judge_model_id",
|
||||
"shield_id",
|
||||
"rerank_model_id",
|
||||
]
|
||||
test_func = request.node.function
|
||||
|
||||
actual_params = inspect.signature(test_func).parameters.keys()
|
||||
|
|
|
|||
|
|
@ -721,6 +721,6 @@ def test_openai_chat_completion_structured_output(openai_client, text_model_id,
|
|||
print(response.choices[0].message.content)
|
||||
answer = AnswerFormat.model_validate_json(response.choices[0].message.content)
|
||||
expected = tc["expected"]
|
||||
assert answer.first_name == expected["first_name"]
|
||||
assert answer.last_name == expected["last_name"]
|
||||
assert expected["first_name"].lower() in answer.first_name.lower()
|
||||
assert expected["last_name"].lower() in answer.last_name.lower()
|
||||
assert answer.year_of_birth == expected["year_of_birth"]
|
||||
|
|
|
|||
214
tests/integration/inference/test_rerank.py
Normal file
214
tests/integration/inference/test_rerank.py
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import pytest
|
||||
from llama_stack_client import BadRequestError as LlamaStackBadRequestError
|
||||
from llama_stack_client.types.alpha import InferenceRerankResponse
|
||||
from llama_stack_client.types.shared.interleaved_content import (
|
||||
ImageContentItem,
|
||||
ImageContentItemImage,
|
||||
ImageContentItemImageURL,
|
||||
TextContentItem,
|
||||
)
|
||||
|
||||
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
||||
|
||||
# Test data
|
||||
DUMMY_STRING = "string_1"
|
||||
DUMMY_STRING2 = "string_2"
|
||||
DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")
|
||||
DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text")
|
||||
DUMMY_IMAGE_URL = ImageContentItem(
|
||||
image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
|
||||
)
|
||||
DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
|
||||
|
||||
PROVIDERS_SUPPORTING_MEDIA = {} # Providers that support media input for rerank models
|
||||
|
||||
|
||||
def skip_if_provider_doesnt_support_rerank(inference_provider_type):
|
||||
supported_providers = {"remote::nvidia"}
|
||||
if inference_provider_type not in supported_providers:
|
||||
pytest.skip(f"{inference_provider_type} doesn't support rerank models")
|
||||
|
||||
|
||||
def _validate_rerank_response(response: InferenceRerankResponse, items: list) -> None:
|
||||
"""
|
||||
Validate that a rerank response has the correct structure and ordering.
|
||||
|
||||
Args:
|
||||
response: The InferenceRerankResponse to validate
|
||||
items: The original items list that was ranked
|
||||
|
||||
Raises:
|
||||
AssertionError: If any validation fails
|
||||
"""
|
||||
seen = set()
|
||||
last_score = float("inf")
|
||||
for d in response:
|
||||
assert 0 <= d.index < len(items), f"Index {d.index} out of bounds for {len(items)} items"
|
||||
assert d.index not in seen, f"Duplicate index {d.index} found"
|
||||
seen.add(d.index)
|
||||
assert isinstance(d.relevance_score, float), f"Score must be float, got {type(d.relevance_score)}"
|
||||
assert d.relevance_score <= last_score, f"Scores not in descending order: {d.relevance_score} > {last_score}"
|
||||
last_score = d.relevance_score
|
||||
|
||||
|
||||
def _validate_semantic_ranking(response: InferenceRerankResponse, items: list, expected_first_item: str) -> None:
|
||||
"""
|
||||
Validate that the expected most relevant item ranks first.
|
||||
|
||||
Args:
|
||||
response: The InferenceRerankResponse to validate
|
||||
items: The original items list that was ranked
|
||||
expected_first_item: The expected first item in the ranking
|
||||
|
||||
Raises:
|
||||
AssertionError: If any validation fails
|
||||
"""
|
||||
if not response:
|
||||
raise AssertionError("No ranking data returned in response")
|
||||
|
||||
actual_first_index = response[0].index
|
||||
actual_first_item = items[actual_first_index]
|
||||
assert actual_first_item == expected_first_item, (
|
||||
f"Expected '{expected_first_item}' to rank first, but '{actual_first_item}' ranked first instead."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query,items",
|
||||
[
|
||||
(DUMMY_STRING, [DUMMY_STRING, DUMMY_STRING2]),
|
||||
(DUMMY_TEXT, [DUMMY_TEXT, DUMMY_TEXT2]),
|
||||
(DUMMY_STRING, [DUMMY_STRING2, DUMMY_TEXT]),
|
||||
(DUMMY_TEXT, [DUMMY_STRING, DUMMY_TEXT2]),
|
||||
],
|
||||
ids=[
|
||||
"string-query-string-items",
|
||||
"text-query-text-items",
|
||||
"mixed-content-1",
|
||||
"mixed-content-2",
|
||||
],
|
||||
)
|
||||
def test_rerank_text(client_with_models, rerank_model_id, query, items, inference_provider_type):
|
||||
skip_if_provider_doesnt_support_rerank(inference_provider_type)
|
||||
|
||||
response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
|
||||
assert isinstance(response, list)
|
||||
# TODO: Add type validation for response items once InferenceRerankResponseItem is exported from llama stack client.
|
||||
assert len(response) <= len(items)
|
||||
_validate_rerank_response(response, items)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query,items",
|
||||
[
|
||||
(DUMMY_IMAGE_URL, [DUMMY_STRING]),
|
||||
(DUMMY_IMAGE_BASE64, [DUMMY_TEXT]),
|
||||
(DUMMY_TEXT, [DUMMY_IMAGE_URL]),
|
||||
(DUMMY_IMAGE_BASE64, [DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT]),
|
||||
(DUMMY_TEXT, [DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT]),
|
||||
],
|
||||
ids=[
|
||||
"image-query-url",
|
||||
"image-query-base64",
|
||||
"text-query-image-item",
|
||||
"mixed-content-1",
|
||||
"mixed-content-2",
|
||||
],
|
||||
)
|
||||
def test_rerank_image(client_with_models, rerank_model_id, query, items, inference_provider_type):
|
||||
skip_if_provider_doesnt_support_rerank(inference_provider_type)
|
||||
|
||||
if rerank_model_id not in PROVIDERS_SUPPORTING_MEDIA:
|
||||
error_type = (
|
||||
ValueError if isinstance(client_with_models, LlamaStackAsLibraryClient) else LlamaStackBadRequestError
|
||||
)
|
||||
with pytest.raises(error_type):
|
||||
client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
|
||||
else:
|
||||
response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
|
||||
|
||||
assert isinstance(response, list)
|
||||
assert len(response) <= len(items)
|
||||
_validate_rerank_response(response, items)
|
||||
|
||||
|
||||
def test_rerank_max_results(client_with_models, rerank_model_id, inference_provider_type):
|
||||
skip_if_provider_doesnt_support_rerank(inference_provider_type)
|
||||
|
||||
items = [DUMMY_STRING, DUMMY_STRING2, DUMMY_TEXT, DUMMY_TEXT2]
|
||||
max_num_results = 2
|
||||
|
||||
response = client_with_models.alpha.inference.rerank(
|
||||
model=rerank_model_id,
|
||||
query=DUMMY_STRING,
|
||||
items=items,
|
||||
max_num_results=max_num_results,
|
||||
)
|
||||
|
||||
assert isinstance(response, list)
|
||||
assert len(response) == max_num_results
|
||||
_validate_rerank_response(response, items)
|
||||
|
||||
|
||||
def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_id, inference_provider_type):
|
||||
skip_if_provider_doesnt_support_rerank(inference_provider_type)
|
||||
|
||||
items = [DUMMY_STRING, DUMMY_STRING2]
|
||||
response = client_with_models.alpha.inference.rerank(
|
||||
model=rerank_model_id,
|
||||
query=DUMMY_STRING,
|
||||
items=items,
|
||||
max_num_results=10, # Larger than items length
|
||||
)
|
||||
|
||||
assert isinstance(response, list)
|
||||
assert len(response) <= len(items) # Should return at most len(items)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query,items,expected_first_item",
|
||||
[
|
||||
(
|
||||
"What is a reranking model? ",
|
||||
[
|
||||
"A reranking model reranks a list of items based on the query. ",
|
||||
"Machine learning algorithms learn patterns from data. ",
|
||||
"Python is a programming language. ",
|
||||
],
|
||||
"A reranking model reranks a list of items based on the query. ",
|
||||
),
|
||||
(
|
||||
"What is C++?",
|
||||
[
|
||||
"Learning new things is interesting. ",
|
||||
"C++ is a programming language. ",
|
||||
"Books provide knowledge and entertainment. ",
|
||||
],
|
||||
"C++ is a programming language. ",
|
||||
),
|
||||
(
|
||||
"What are good learning habits? ",
|
||||
[
|
||||
"Cooking pasta is a fun activity. ",
|
||||
"Plants need water and sunlight. ",
|
||||
"Good learning habits include reading daily and taking notes. ",
|
||||
],
|
||||
"Good learning habits include reading daily and taking notes. ",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rerank_semantic_correctness(
|
||||
client_with_models, rerank_model_id, query, items, expected_first_item, inference_provider_type
|
||||
):
|
||||
skip_if_provider_doesnt_support_rerank(inference_provider_type)
|
||||
|
||||
response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
|
||||
|
||||
_validate_rerank_response(response, items)
|
||||
_validate_semantic_ranking(response, items, expected_first_item)
|
||||
|
|
@ -4,18 +4,75 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import pytest
|
||||
from llama_stack_client import LlamaStackClient
|
||||
|
||||
from llama_stack import LlamaStackAsLibraryClient
|
||||
|
||||
|
||||
class TestInspect:
|
||||
@pytest.mark.skip(reason="inspect tests disabled")
|
||||
def test_health(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
|
||||
health = llama_stack_client.inspect.health()
|
||||
assert health is not None
|
||||
assert health.status == "OK"
|
||||
|
||||
@pytest.mark.skip(reason="inspect tests disabled")
|
||||
def test_version(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
|
||||
version = llama_stack_client.inspect.version()
|
||||
assert version is not None
|
||||
assert version.version is not None
|
||||
|
||||
@pytest.mark.skip(reason="inspect tests disabled")
|
||||
def test_list_routes_default(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
|
||||
"""Test list_routes with default filter (non-deprecated v1 routes)."""
|
||||
response = llama_stack_client.routes.list()
|
||||
assert response is not None
|
||||
assert hasattr(response, "data")
|
||||
routes = response.data
|
||||
assert len(routes) > 0
|
||||
|
||||
# All routes should be non-deprecated
|
||||
# Check that we don't see any /openai/ routes (which are deprecated)
|
||||
openai_routes = [r for r in routes if "/openai/" in r.route]
|
||||
assert len(openai_routes) == 0, "Default filter should not include deprecated /openai/ routes"
|
||||
|
||||
# Should see standard v1 routes like /inspect/routes, /health, /version
|
||||
paths = [r.route for r in routes]
|
||||
assert "/inspect/routes" in paths or "/v1/inspect/routes" in paths
|
||||
assert "/health" in paths or "/v1/health" in paths
|
||||
|
||||
@pytest.mark.skip(reason="inspect tests disabled")
|
||||
def test_list_routes_filter_by_deprecated(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
|
||||
"""Test list_routes with deprecated filter."""
|
||||
response = llama_stack_client.routes.list(api_filter="deprecated")
|
||||
assert response is not None
|
||||
assert hasattr(response, "data")
|
||||
routes = response.data
|
||||
|
||||
# When filtering for deprecated, we should get deprecated routes
|
||||
# At minimum, we should see some /openai/ routes which are deprecated
|
||||
if len(routes) > 0:
|
||||
# If there are any deprecated routes, they should include openai routes
|
||||
openai_routes = [r for r in routes if "/openai/" in r.route]
|
||||
assert len(openai_routes) > 0, "Deprecated filter should include /openai/ routes"
|
||||
|
||||
@pytest.mark.skip(reason="inspect tests disabled")
|
||||
def test_list_routes_filter_by_v1(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
|
||||
"""Test list_routes with v1 filter."""
|
||||
response = llama_stack_client.routes.list(api_filter="v1")
|
||||
assert response is not None
|
||||
assert hasattr(response, "data")
|
||||
routes = response.data
|
||||
assert len(routes) > 0
|
||||
|
||||
# Should not include deprecated routes
|
||||
openai_routes = [r for r in routes if "/openai/" in r.route]
|
||||
assert len(openai_routes) == 0
|
||||
|
||||
# Should include v1 routes
|
||||
paths = [r.route for r in routes]
|
||||
assert any(
|
||||
"/v1/" in p or p.startswith("/inspect/") or p.startswith("/health") or p.startswith("/version")
|
||||
for p in paths
|
||||
)
|
||||
|
|
|
|||
|
|
@ -6,20 +6,88 @@
|
|||
|
||||
"""Shared helpers for telemetry test collectors."""
|
||||
|
||||
import time
|
||||
from collections.abc import Iterable
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class SpanStub:
|
||||
class MetricStub:
|
||||
"""Unified metric interface for both in-memory and OTLP collectors."""
|
||||
|
||||
name: str
|
||||
attributes: dict[str, Any]
|
||||
value: Any
|
||||
attributes: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SpanStub:
|
||||
"""Unified span interface for both in-memory and OTLP collectors."""
|
||||
|
||||
name: str
|
||||
attributes: dict[str, Any] | None = None
|
||||
resource_attributes: dict[str, Any] | None = None
|
||||
events: list[dict[str, Any]] | None = None
|
||||
trace_id: str | None = None
|
||||
span_id: str | None = None
|
||||
|
||||
@property
|
||||
def context(self):
|
||||
"""Provide context-like interface for trace_id compatibility."""
|
||||
if self.trace_id is None:
|
||||
return None
|
||||
return type("Context", (), {"trace_id": int(self.trace_id, 16)})()
|
||||
|
||||
def get_trace_id(self) -> str | None:
|
||||
"""Get trace ID in hex format.
|
||||
|
||||
Tries context.trace_id first, then falls back to direct trace_id.
|
||||
"""
|
||||
context = getattr(self, "context", None)
|
||||
if context and getattr(context, "trace_id", None) is not None:
|
||||
return f"{context.trace_id:032x}"
|
||||
return getattr(self, "trace_id", None)
|
||||
|
||||
def has_message(self, text: str) -> bool:
|
||||
"""Check if span contains a specific message in its args."""
|
||||
if self.attributes is None:
|
||||
return False
|
||||
args = self.attributes.get("__args__")
|
||||
if not args or not isinstance(args, str):
|
||||
return False
|
||||
return text in args
|
||||
|
||||
def is_root_span(self) -> bool:
|
||||
"""Check if this is a root span."""
|
||||
if self.attributes is None:
|
||||
return False
|
||||
return self.attributes.get("__root__") is True
|
||||
|
||||
def is_autotraced(self) -> bool:
|
||||
"""Check if this span was automatically traced."""
|
||||
if self.attributes is None:
|
||||
return False
|
||||
return self.attributes.get("__autotraced__") is True
|
||||
|
||||
def get_span_type(self) -> str | None:
|
||||
"""Get the span type (async, sync, async_generator)."""
|
||||
if self.attributes is None:
|
||||
return None
|
||||
return self.attributes.get("__type__")
|
||||
|
||||
def get_class_method(self) -> tuple[str | None, str | None]:
|
||||
"""Get the class and method names for autotraced spans."""
|
||||
if self.attributes is None:
|
||||
return None, None
|
||||
return (self.attributes.get("__class__"), self.attributes.get("__method__"))
|
||||
|
||||
def get_location(self) -> str | None:
|
||||
"""Get the location (library_client, server) for root spans."""
|
||||
if self.attributes is None:
|
||||
return None
|
||||
return self.attributes.get("__location__")
|
||||
|
||||
|
||||
def _value_to_python(value: Any) -> Any:
|
||||
kind = value.WhichOneof("value")
|
||||
|
|
@ -56,14 +124,18 @@ def events_to_list(events: Iterable[Any]) -> list[dict[str, Any]]:
|
|||
|
||||
|
||||
class BaseTelemetryCollector:
|
||||
"""Base class for telemetry collectors that ensures consistent return types.
|
||||
|
||||
All collectors must return SpanStub objects to ensure test compatibility
|
||||
across both library-client and server modes.
|
||||
"""
|
||||
|
||||
def get_spans(
|
||||
self,
|
||||
expected_count: int | None = None,
|
||||
timeout: float = 5.0,
|
||||
poll_interval: float = 0.05,
|
||||
) -> tuple[Any, ...]:
|
||||
import time
|
||||
|
||||
) -> tuple[SpanStub, ...]:
|
||||
deadline = time.time() + timeout
|
||||
min_count = expected_count if expected_count is not None else 1
|
||||
last_len: int | None = None
|
||||
|
|
@ -91,16 +163,206 @@ class BaseTelemetryCollector:
|
|||
last_len = len(spans)
|
||||
time.sleep(poll_interval)
|
||||
|
||||
def get_metrics(self) -> Any | None:
|
||||
return self._snapshot_metrics()
|
||||
def get_metrics(
|
||||
self,
|
||||
expected_count: int | None = None,
|
||||
timeout: float = 5.0,
|
||||
poll_interval: float = 0.05,
|
||||
expect_model_id: str | None = None,
|
||||
) -> dict[str, MetricStub]:
|
||||
"""Get metrics with polling until metrics are available or timeout is reached."""
|
||||
|
||||
# metrics need to be collected since get requests delete stored metrics
|
||||
deadline = time.time() + timeout
|
||||
min_count = expected_count if expected_count is not None else 1
|
||||
accumulated_metrics = {}
|
||||
count_metrics_with_model_id = 0
|
||||
|
||||
while time.time() < deadline:
|
||||
current_metrics = self._snapshot_metrics()
|
||||
if current_metrics:
|
||||
for metric in current_metrics:
|
||||
metric_name = metric.name
|
||||
if metric_name not in accumulated_metrics:
|
||||
accumulated_metrics[metric_name] = metric
|
||||
if (
|
||||
expect_model_id
|
||||
and metric.attributes
|
||||
and metric.attributes.get("model_id") == expect_model_id
|
||||
):
|
||||
count_metrics_with_model_id += 1
|
||||
else:
|
||||
accumulated_metrics[metric_name] = metric
|
||||
|
||||
# Check if we have enough metrics
|
||||
if len(accumulated_metrics) >= min_count:
|
||||
if not expect_model_id:
|
||||
return accumulated_metrics
|
||||
if count_metrics_with_model_id >= min_count:
|
||||
return accumulated_metrics
|
||||
|
||||
time.sleep(poll_interval)
|
||||
|
||||
return accumulated_metrics
|
||||
|
||||
@staticmethod
|
||||
def _convert_attributes_to_dict(attrs: Any) -> dict[str, Any]:
|
||||
"""Convert various attribute types to a consistent dictionary format.
|
||||
|
||||
Handles mappingproxy, dict, and other attribute types.
|
||||
"""
|
||||
if attrs is None:
|
||||
return {}
|
||||
|
||||
try:
|
||||
return dict(attrs.items()) # type: ignore[attr-defined]
|
||||
except AttributeError:
|
||||
try:
|
||||
return dict(attrs)
|
||||
except TypeError:
|
||||
return dict(attrs) if attrs else {}
|
||||
|
||||
@staticmethod
|
||||
def _extract_trace_span_ids(span: Any) -> tuple[str | None, str | None]:
|
||||
"""Extract trace_id and span_id from OpenTelemetry span object.
|
||||
|
||||
Handles both context-based and direct attribute access.
|
||||
"""
|
||||
trace_id = None
|
||||
span_id = None
|
||||
|
||||
context = getattr(span, "context", None)
|
||||
if context:
|
||||
trace_id = f"{context.trace_id:032x}"
|
||||
span_id = f"{context.span_id:016x}"
|
||||
else:
|
||||
trace_id = getattr(span, "trace_id", None)
|
||||
span_id = getattr(span, "span_id", None)
|
||||
|
||||
return trace_id, span_id
|
||||
|
||||
@staticmethod
|
||||
def _create_span_stub_from_opentelemetry(span: Any) -> SpanStub:
|
||||
"""Create SpanStub from OpenTelemetry span object.
|
||||
|
||||
This helper reduces code duplication between collectors.
|
||||
"""
|
||||
trace_id, span_id = BaseTelemetryCollector._extract_trace_span_ids(span)
|
||||
attributes = BaseTelemetryCollector._convert_attributes_to_dict(span.attributes) or {}
|
||||
|
||||
return SpanStub(
|
||||
name=span.name,
|
||||
attributes=attributes,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _create_span_stub_from_protobuf(span: Any, resource_attrs: dict[str, Any] | None = None) -> SpanStub:
|
||||
"""Create SpanStub from protobuf span object.
|
||||
|
||||
This helper handles the different structure of protobuf spans.
|
||||
"""
|
||||
attributes = attributes_to_dict(span.attributes) or {}
|
||||
events = events_to_list(span.events) if span.events else None
|
||||
trace_id = span.trace_id.hex() if span.trace_id else None
|
||||
span_id = span.span_id.hex() if span.span_id else None
|
||||
|
||||
return SpanStub(
|
||||
name=span.name,
|
||||
attributes=attributes,
|
||||
resource_attributes=resource_attrs,
|
||||
events=events,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _extract_metric_from_opentelemetry(metric: Any) -> MetricStub | None:
|
||||
"""Extract MetricStub from OpenTelemetry metric object.
|
||||
|
||||
This helper reduces code duplication between collectors.
|
||||
"""
|
||||
if not (hasattr(metric, "name") and hasattr(metric, "data") and hasattr(metric.data, "data_points")):
|
||||
return None
|
||||
|
||||
if not (metric.data.data_points and len(metric.data.data_points) > 0):
|
||||
return None
|
||||
|
||||
# Get the value from the first data point
|
||||
data_point = metric.data.data_points[0]
|
||||
|
||||
# Handle different metric types
|
||||
if hasattr(data_point, "value"):
|
||||
# Counter or Gauge
|
||||
value = data_point.value
|
||||
elif hasattr(data_point, "sum"):
|
||||
# Histogram - use the sum of all recorded values
|
||||
value = data_point.sum
|
||||
else:
|
||||
return None
|
||||
|
||||
# Extract attributes if available
|
||||
attributes = {}
|
||||
if hasattr(data_point, "attributes"):
|
||||
attrs = data_point.attributes
|
||||
if attrs is not None and hasattr(attrs, "items"):
|
||||
attributes = dict(attrs.items())
|
||||
elif attrs is not None and not isinstance(attrs, dict):
|
||||
attributes = dict(attrs)
|
||||
|
||||
return MetricStub(
|
||||
name=metric.name,
|
||||
value=value,
|
||||
attributes=attributes or {},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _create_metric_stub_from_protobuf(metric: Any) -> MetricStub | None:
|
||||
"""Create MetricStub from protobuf metric object.
|
||||
|
||||
Protobuf metrics have a different structure than OpenTelemetry metrics.
|
||||
They can have sum, gauge, or histogram data.
|
||||
"""
|
||||
if not hasattr(metric, "name"):
|
||||
return None
|
||||
|
||||
# Try to extract value from different metric types
|
||||
for metric_type in ["sum", "gauge", "histogram"]:
|
||||
if hasattr(metric, metric_type):
|
||||
metric_data = getattr(metric, metric_type)
|
||||
if metric_data and hasattr(metric_data, "data_points"):
|
||||
data_points = metric_data.data_points
|
||||
if data_points and len(data_points) > 0:
|
||||
data_point = data_points[0]
|
||||
|
||||
# Extract attributes first (needed for all metric types)
|
||||
attributes = (
|
||||
attributes_to_dict(data_point.attributes) if hasattr(data_point, "attributes") else {}
|
||||
)
|
||||
|
||||
# Extract value based on metric type
|
||||
if metric_type == "sum":
|
||||
value = data_point.as_int
|
||||
elif metric_type == "gauge":
|
||||
value = data_point.as_double
|
||||
else: # histogram
|
||||
value = data_point.sum
|
||||
|
||||
return MetricStub(
|
||||
name=metric.name,
|
||||
value=value,
|
||||
attributes=attributes,
|
||||
)
|
||||
return None
|
||||
|
||||
def clear(self) -> None:
|
||||
self._clear_impl()
|
||||
|
||||
def _snapshot_spans(self) -> tuple[Any, ...]: # pragma: no cover - interface hook
|
||||
def _snapshot_spans(self) -> tuple[SpanStub, ...]: # pragma: no cover - interface hook
|
||||
raise NotImplementedError
|
||||
|
||||
def _snapshot_metrics(self) -> Any | None: # pragma: no cover - interface hook
|
||||
def _snapshot_metrics(self) -> tuple[MetricStub, ...] | None: # pragma: no cover - interface hook
|
||||
raise NotImplementedError
|
||||
|
||||
def _clear_impl(self) -> None: # pragma: no cover - interface hook
|
||||
|
|
|
|||
|
|
@ -6,8 +6,6 @@
|
|||
|
||||
"""In-memory telemetry collector for library-client tests."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import opentelemetry.metrics as otel_metrics
|
||||
import opentelemetry.trace as otel_trace
|
||||
from opentelemetry import metrics, trace
|
||||
|
|
@ -19,47 +17,42 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanE
|
|||
|
||||
import llama_stack.core.telemetry.telemetry as telemetry_module
|
||||
|
||||
from .base import BaseTelemetryCollector, SpanStub
|
||||
from .base import BaseTelemetryCollector, MetricStub, SpanStub
|
||||
|
||||
|
||||
class InMemoryTelemetryCollector(BaseTelemetryCollector):
|
||||
"""In-memory telemetry collector for library-client tests.
|
||||
|
||||
Converts OpenTelemetry span objects to SpanStub objects to ensure
|
||||
consistent interface with OTLP collector used in server mode.
|
||||
"""
|
||||
|
||||
def __init__(self, span_exporter: InMemorySpanExporter, metric_reader: InMemoryMetricReader) -> None:
|
||||
self._span_exporter = span_exporter
|
||||
self._metric_reader = metric_reader
|
||||
|
||||
def _snapshot_spans(self) -> tuple[Any, ...]:
|
||||
def _snapshot_spans(self) -> tuple[SpanStub, ...]:
|
||||
spans = []
|
||||
for span in self._span_exporter.get_finished_spans():
|
||||
trace_id = None
|
||||
span_id = None
|
||||
context = getattr(span, "context", None)
|
||||
if context:
|
||||
trace_id = f"{context.trace_id:032x}"
|
||||
span_id = f"{context.span_id:016x}"
|
||||
else:
|
||||
trace_id = getattr(span, "trace_id", None)
|
||||
span_id = getattr(span, "span_id", None)
|
||||
|
||||
stub = SpanStub(
|
||||
span.name,
|
||||
span.attributes,
|
||||
getattr(span, "resource", None),
|
||||
getattr(span, "events", None),
|
||||
trace_id,
|
||||
span_id,
|
||||
)
|
||||
spans.append(stub)
|
||||
|
||||
spans.append(self._create_span_stub_from_opentelemetry(span))
|
||||
return tuple(spans)
|
||||
|
||||
def _snapshot_metrics(self) -> Any | None:
|
||||
def _snapshot_metrics(self) -> tuple[MetricStub, ...] | None:
|
||||
data = self._metric_reader.get_metrics_data()
|
||||
if data and data.resource_metrics:
|
||||
resource_metric = data.resource_metrics[0]
|
||||
if resource_metric.scope_metrics:
|
||||
return resource_metric.scope_metrics[0].metrics
|
||||
if not data or not data.resource_metrics:
|
||||
return None
|
||||
|
||||
metric_stubs = []
|
||||
for resource_metric in data.resource_metrics:
|
||||
if resource_metric.scope_metrics:
|
||||
for scope_metric in resource_metric.scope_metrics:
|
||||
for metric in scope_metric.metrics:
|
||||
metric_stub = self._extract_metric_from_opentelemetry(metric)
|
||||
if metric_stub:
|
||||
metric_stubs.append(metric_stub)
|
||||
|
||||
return tuple(metric_stubs) if metric_stubs else None
|
||||
|
||||
def _clear_impl(self) -> None:
|
||||
self._span_exporter.clear()
|
||||
self._metric_reader.get_metrics_data()
|
||||
|
|
|
|||
|
|
@ -9,20 +9,20 @@
|
|||
import gzip
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from socketserver import ThreadingMixIn
|
||||
from typing import Any
|
||||
|
||||
from opentelemetry.proto.collector.metrics.v1.metrics_service_pb2 import ExportMetricsServiceRequest
|
||||
from opentelemetry.proto.collector.trace.v1.trace_service_pb2 import ExportTraceServiceRequest
|
||||
|
||||
from .base import BaseTelemetryCollector, SpanStub, attributes_to_dict, events_to_list
|
||||
from .base import BaseTelemetryCollector, MetricStub, SpanStub, attributes_to_dict
|
||||
|
||||
|
||||
class OtlpHttpTestCollector(BaseTelemetryCollector):
|
||||
def __init__(self) -> None:
|
||||
self._spans: list[SpanStub] = []
|
||||
self._metrics: list[Any] = []
|
||||
self._metrics: list[MetricStub] = []
|
||||
self._lock = threading.Lock()
|
||||
|
||||
class _ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
|
||||
|
|
@ -47,11 +47,7 @@ class OtlpHttpTestCollector(BaseTelemetryCollector):
|
|||
|
||||
for scope_spans in resource_spans.scope_spans:
|
||||
for span in scope_spans.spans:
|
||||
attributes = attributes_to_dict(span.attributes)
|
||||
events = events_to_list(span.events) if span.events else None
|
||||
trace_id = span.trace_id.hex() if span.trace_id else None
|
||||
span_id = span.span_id.hex() if span.span_id else None
|
||||
new_spans.append(SpanStub(span.name, attributes, resource_attrs or None, events, trace_id, span_id))
|
||||
new_spans.append(self._create_span_stub_from_protobuf(span, resource_attrs or None))
|
||||
|
||||
if not new_spans:
|
||||
return
|
||||
|
|
@ -60,10 +56,13 @@ class OtlpHttpTestCollector(BaseTelemetryCollector):
|
|||
self._spans.extend(new_spans)
|
||||
|
||||
def _handle_metrics(self, request: ExportMetricsServiceRequest) -> None:
|
||||
new_metrics: list[Any] = []
|
||||
new_metrics: list[MetricStub] = []
|
||||
for resource_metrics in request.resource_metrics:
|
||||
for scope_metrics in resource_metrics.scope_metrics:
|
||||
new_metrics.extend(scope_metrics.metrics)
|
||||
for metric in scope_metrics.metrics:
|
||||
metric_stub = self._create_metric_stub_from_protobuf(metric)
|
||||
if metric_stub:
|
||||
new_metrics.append(metric_stub)
|
||||
|
||||
if not new_metrics:
|
||||
return
|
||||
|
|
@ -75,11 +74,40 @@ class OtlpHttpTestCollector(BaseTelemetryCollector):
|
|||
with self._lock:
|
||||
return tuple(self._spans)
|
||||
|
||||
def _snapshot_metrics(self) -> Any | None:
|
||||
def _snapshot_metrics(self) -> tuple[MetricStub, ...] | None:
|
||||
with self._lock:
|
||||
return list(self._metrics) if self._metrics else None
|
||||
return tuple(self._metrics) if self._metrics else None
|
||||
|
||||
def _clear_impl(self) -> None:
|
||||
"""Clear telemetry over a period of time to prevent race conditions between tests."""
|
||||
with self._lock:
|
||||
self._spans.clear()
|
||||
self._metrics.clear()
|
||||
|
||||
# Prevent race conditions where telemetry arrives after clear() but before
|
||||
# the test starts, causing contamination between tests
|
||||
deadline = time.time() + 2.0 # Maximum wait time
|
||||
last_span_count = 0
|
||||
last_metric_count = 0
|
||||
stable_iterations = 0
|
||||
|
||||
while time.time() < deadline:
|
||||
with self._lock:
|
||||
current_span_count = len(self._spans)
|
||||
current_metric_count = len(self._metrics)
|
||||
|
||||
if current_span_count == last_span_count and current_metric_count == last_metric_count:
|
||||
stable_iterations += 1
|
||||
if stable_iterations >= 4: # 4 * 50ms = 200ms of stability
|
||||
break
|
||||
else:
|
||||
stable_iterations = 0
|
||||
last_span_count = current_span_count
|
||||
last_metric_count = current_metric_count
|
||||
|
||||
time.sleep(0.05)
|
||||
|
||||
# Final clear to remove any telemetry that arrived during stabilization
|
||||
with self._lock:
|
||||
self._spans.clear()
|
||||
self._metrics.clear()
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@
|
|||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "import torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load the pre-trained model and tokenizer\nmodel_name = \"CompVis/transformers-base-uncased\"\nmodel = AutoModelForCausalLM.from_pretrained(model_name)\ntokenizer = AutoTokenizer.from_pretrained(model_name)\n\n# Set the temperature to 0.7\ntemperature = 0.7\n\n# Define a function to generate text\ndef generate_text(prompt, max_length=100):\n input",
|
||||
"content": "To test the trace function from OpenAI's API with a temperature of 0.7, you can use the following Python code:\n\n```python\nimport json\n\n# Import the required libraries\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Set the API endpoint and model name\nmodel_name = \"dalle-mini\"\n\n# Initialize the model and tokenizer\nmodel = AutoModelForCausalLM.from_pretrained(model_name)\ntokenizer = AutoTokenizer.from_pretrained(model_name)\n\n",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
|
|
@ -55,5 +55,6 @@
|
|||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,48 +4,17 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""Telemetry tests verifying @trace_protocol decorator format across stack modes."""
|
||||
"""Telemetry tests verifying @trace_protocol decorator format across stack modes.
|
||||
|
||||
Note: The mock_otlp_collector fixture automatically clears telemetry data
|
||||
before and after each test, ensuring test isolation.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
|
||||
def _span_attributes(span):
|
||||
attrs = getattr(span, "attributes", None)
|
||||
if attrs is None:
|
||||
return {}
|
||||
# ReadableSpan.attributes acts like a mapping
|
||||
try:
|
||||
return dict(attrs.items()) # type: ignore[attr-defined]
|
||||
except AttributeError:
|
||||
try:
|
||||
return dict(attrs)
|
||||
except TypeError:
|
||||
return attrs
|
||||
|
||||
|
||||
def _span_attr(span, key):
|
||||
attrs = _span_attributes(span)
|
||||
return attrs.get(key)
|
||||
|
||||
|
||||
def _span_trace_id(span):
|
||||
context = getattr(span, "context", None)
|
||||
if context and getattr(context, "trace_id", None) is not None:
|
||||
return f"{context.trace_id:032x}"
|
||||
return getattr(span, "trace_id", None)
|
||||
|
||||
|
||||
def _span_has_message(span, text: str) -> bool:
|
||||
args = _span_attr(span, "__args__")
|
||||
if not args or not isinstance(args, str):
|
||||
return False
|
||||
return text in args
|
||||
|
||||
|
||||
def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_model_id):
|
||||
"""Verify streaming adds chunk_count and __type__=async_generator."""
|
||||
mock_otlp_collector.clear()
|
||||
|
||||
stream = llama_stack_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[{"role": "user", "content": "Test trace openai 1"}],
|
||||
|
|
@ -62,16 +31,16 @@ def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_mod
|
|||
(
|
||||
span
|
||||
for span in reversed(spans)
|
||||
if _span_attr(span, "__type__") == "async_generator"
|
||||
and _span_attr(span, "chunk_count")
|
||||
and _span_has_message(span, "Test trace openai 1")
|
||||
if span.get_span_type() == "async_generator"
|
||||
and span.attributes.get("chunk_count")
|
||||
and span.has_message("Test trace openai 1")
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
assert async_generator_span is not None
|
||||
|
||||
raw_chunk_count = _span_attr(async_generator_span, "chunk_count")
|
||||
raw_chunk_count = async_generator_span.attributes.get("chunk_count")
|
||||
assert raw_chunk_count is not None
|
||||
chunk_count = int(raw_chunk_count)
|
||||
|
||||
|
|
@ -80,7 +49,6 @@ def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_mod
|
|||
|
||||
def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, text_model_id):
|
||||
"""Comprehensive validation of telemetry data format including spans and metrics."""
|
||||
mock_otlp_collector.clear()
|
||||
|
||||
response = llama_stack_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
|
|
@ -101,37 +69,36 @@ def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client,
|
|||
# Verify spans
|
||||
spans = mock_otlp_collector.get_spans(expected_count=7)
|
||||
target_span = next(
|
||||
(span for span in reversed(spans) if _span_has_message(span, "Test trace openai with temperature 0.7")),
|
||||
(span for span in reversed(spans) if span.has_message("Test trace openai with temperature 0.7")),
|
||||
None,
|
||||
)
|
||||
assert target_span is not None
|
||||
|
||||
trace_id = _span_trace_id(target_span)
|
||||
trace_id = target_span.get_trace_id()
|
||||
assert trace_id is not None
|
||||
|
||||
spans = [span for span in spans if _span_trace_id(span) == trace_id]
|
||||
spans = [span for span in spans if _span_attr(span, "__root__") or _span_attr(span, "__autotraced__")]
|
||||
spans = [span for span in spans if span.get_trace_id() == trace_id]
|
||||
spans = [span for span in spans if span.is_root_span() or span.is_autotraced()]
|
||||
assert len(spans) >= 4
|
||||
|
||||
# Collect all model_ids found in spans
|
||||
logged_model_ids = []
|
||||
|
||||
for span in spans:
|
||||
attrs = _span_attributes(span)
|
||||
attrs = span.attributes
|
||||
assert attrs is not None
|
||||
|
||||
# Root span is created manually by tracing middleware, not by @trace_protocol decorator
|
||||
is_root_span = attrs.get("__root__") is True
|
||||
|
||||
if is_root_span:
|
||||
assert attrs.get("__location__") in ["library_client", "server"]
|
||||
if span.is_root_span():
|
||||
assert span.get_location() in ["library_client", "server"]
|
||||
continue
|
||||
|
||||
assert attrs.get("__autotraced__")
|
||||
assert attrs.get("__class__") and attrs.get("__method__")
|
||||
assert attrs.get("__type__") in ["async", "sync", "async_generator"]
|
||||
assert span.is_autotraced()
|
||||
class_name, method_name = span.get_class_method()
|
||||
assert class_name and method_name
|
||||
assert span.get_span_type() in ["async", "sync", "async_generator"]
|
||||
|
||||
args_field = attrs.get("__args__")
|
||||
args_field = span.attributes.get("__args__")
|
||||
if args_field:
|
||||
args = json.loads(args_field)
|
||||
if "model_id" in args:
|
||||
|
|
@ -140,21 +107,40 @@ def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client,
|
|||
# At least one span should capture the fully qualified model ID
|
||||
assert text_model_id in logged_model_ids, f"Expected to find {text_model_id} in spans, but got {logged_model_ids}"
|
||||
|
||||
# TODO: re-enable this once metrics get fixed
|
||||
"""
|
||||
# Verify token usage metrics in response
|
||||
metrics = mock_otlp_collector.get_metrics()
|
||||
# Verify token usage metrics in response using polling
|
||||
expected_metrics = ["completion_tokens", "total_tokens", "prompt_tokens"]
|
||||
metrics = mock_otlp_collector.get_metrics(expected_count=len(expected_metrics), expect_model_id=text_model_id)
|
||||
assert len(metrics) > 0, "No metrics found within timeout"
|
||||
|
||||
assert metrics
|
||||
for metric in metrics:
|
||||
assert metric.name in ["completion_tokens", "total_tokens", "prompt_tokens"]
|
||||
assert metric.unit == "tokens"
|
||||
assert metric.data.data_points and len(metric.data.data_points) == 1
|
||||
match metric.name:
|
||||
case "completion_tokens":
|
||||
assert metric.data.data_points[0].value == usage["completion_tokens"]
|
||||
case "total_tokens":
|
||||
assert metric.data.data_points[0].value == usage["total_tokens"]
|
||||
case "prompt_tokens":
|
||||
assert metric.data.data_points[0].value == usage["prompt_tokens"
|
||||
"""
|
||||
# Filter metrics to only those from the specific model used in the request
|
||||
# This prevents issues when multiple metrics with the same name exist from different models
|
||||
# (e.g., when safety models like llama-guard are also called)
|
||||
inference_model_metrics = {}
|
||||
all_model_ids = set()
|
||||
|
||||
for name, metric in metrics.items():
|
||||
if name in expected_metrics:
|
||||
model_id = metric.attributes.get("model_id")
|
||||
all_model_ids.add(model_id)
|
||||
# Only include metrics from the specific model used in the test request
|
||||
if model_id == text_model_id:
|
||||
inference_model_metrics[name] = metric
|
||||
|
||||
# Verify expected metrics are present for our specific model
|
||||
for metric_name in expected_metrics:
|
||||
assert metric_name in inference_model_metrics, (
|
||||
f"Expected metric {metric_name} for model {text_model_id} not found. "
|
||||
f"Available models: {sorted(all_model_ids)}, "
|
||||
f"Available metrics for {text_model_id}: {list(inference_model_metrics.keys())}"
|
||||
)
|
||||
|
||||
# Verify metric values match usage data
|
||||
assert inference_model_metrics["completion_tokens"].value == usage["completion_tokens"], (
|
||||
f"Expected {usage['completion_tokens']} for completion_tokens, but got {inference_model_metrics['completion_tokens'].value}"
|
||||
)
|
||||
assert inference_model_metrics["total_tokens"].value == usage["total_tokens"], (
|
||||
f"Expected {usage['total_tokens']} for total_tokens, but got {inference_model_metrics['total_tokens'].value}"
|
||||
)
|
||||
assert inference_model_metrics["prompt_tokens"].value == usage["prompt_tokens"], (
|
||||
f"Expected {usage['prompt_tokens']} for prompt_tokens, but got {inference_model_metrics['prompt_tokens'].value}"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -206,3 +206,65 @@ def test_parse_and_maybe_upgrade_config_invalid(invalid_config):
|
|||
def test_parse_and_maybe_upgrade_config_image_name_int(config_with_image_name_int):
|
||||
result = parse_and_maybe_upgrade_config(config_with_image_name_int)
|
||||
assert isinstance(result.image_name, str)
|
||||
|
||||
|
||||
def test_parse_and_maybe_upgrade_config_sets_external_providers_dir(up_to_date_config):
|
||||
"""Test that external_providers_dir is None when not specified (deprecated field)."""
|
||||
# Ensure the config doesn't have external_providers_dir set
|
||||
assert "external_providers_dir" not in up_to_date_config
|
||||
|
||||
result = parse_and_maybe_upgrade_config(up_to_date_config)
|
||||
|
||||
# Verify external_providers_dir is None (not set to default)
|
||||
# This aligns with the deprecation of external_providers_dir
|
||||
assert result.external_providers_dir is None
|
||||
|
||||
|
||||
def test_parse_and_maybe_upgrade_config_preserves_custom_external_providers_dir(up_to_date_config):
|
||||
"""Test that custom external_providers_dir values are preserved."""
|
||||
custom_dir = "/custom/providers/dir"
|
||||
up_to_date_config["external_providers_dir"] = custom_dir
|
||||
|
||||
result = parse_and_maybe_upgrade_config(up_to_date_config)
|
||||
|
||||
# Verify the custom value was preserved
|
||||
assert str(result.external_providers_dir) == custom_dir
|
||||
|
||||
|
||||
def test_generate_run_config_from_providers():
|
||||
"""Test that _generate_run_config_from_providers creates a valid config"""
|
||||
import argparse
|
||||
|
||||
from llama_stack.cli.stack.run import StackRun
|
||||
from llama_stack.core.datatypes import Provider
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
subparsers = parser.add_subparsers()
|
||||
stack_run = StackRun(subparsers)
|
||||
|
||||
providers = {
|
||||
"inference": [
|
||||
Provider(
|
||||
provider_type="inline::meta-reference",
|
||||
provider_id="meta-reference",
|
||||
)
|
||||
]
|
||||
}
|
||||
|
||||
config = stack_run._generate_run_config_from_providers(providers=providers)
|
||||
config_dict = config.model_dump(mode="json")
|
||||
|
||||
# Verify basic structure
|
||||
assert config_dict["image_name"] == "providers-run"
|
||||
assert "inference" in config_dict["apis"]
|
||||
assert "inference" in config_dict["providers"]
|
||||
|
||||
# Verify storage has all required stores including prompts
|
||||
assert "storage" in config_dict
|
||||
stores = config_dict["storage"]["stores"]
|
||||
assert "prompts" in stores
|
||||
assert stores["prompts"]["namespace"] == "prompts"
|
||||
|
||||
# Verify config can be parsed back
|
||||
parsed = parse_and_maybe_upgrade_config(config_dict)
|
||||
assert parsed.image_name == "providers-run"
|
||||
|
|
|
|||
251
tests/unit/providers/nvidia/test_rerank_inference.py
Normal file
251
tests/unit/providers/nvidia/test_rerank_inference.py
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import aiohttp
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
|
||||
from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
|
||||
class MockResponse:
|
||||
def __init__(self, status=200, json_data=None, text_data="OK"):
|
||||
self.status = status
|
||||
self._json_data = json_data or {"rankings": []}
|
||||
self._text_data = text_data
|
||||
|
||||
async def json(self):
|
||||
return self._json_data
|
||||
|
||||
async def text(self):
|
||||
return self._text_data
|
||||
|
||||
|
||||
class MockSession:
|
||||
def __init__(self, response):
|
||||
self.response = response
|
||||
self.post_calls = []
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
return False
|
||||
|
||||
def post(self, url, **kwargs):
|
||||
self.post_calls.append((url, kwargs))
|
||||
|
||||
class PostContext:
|
||||
def __init__(self, response):
|
||||
self.response = response
|
||||
|
||||
async def __aenter__(self):
|
||||
return self.response
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
return False
|
||||
|
||||
return PostContext(self.response)
|
||||
|
||||
|
||||
def create_adapter(config=None, rerank_endpoints=None):
|
||||
if config is None:
|
||||
config = NVIDIAConfig(api_key="test-key")
|
||||
|
||||
adapter = NVIDIAInferenceAdapter(config=config)
|
||||
|
||||
class MockModel:
|
||||
provider_resource_id = "test-model"
|
||||
metadata = {}
|
||||
|
||||
adapter.model_store = AsyncMock()
|
||||
adapter.model_store.get_model = AsyncMock(return_value=MockModel())
|
||||
|
||||
if rerank_endpoints is not None:
|
||||
adapter.config.rerank_model_to_url = rerank_endpoints
|
||||
|
||||
return adapter
|
||||
|
||||
|
||||
async def test_rerank_basic_functionality():
|
||||
adapter = create_adapter()
|
||||
mock_response = MockResponse(json_data={"rankings": [{"index": 0, "logit": 0.5}]})
|
||||
mock_session = MockSession(mock_response)
|
||||
|
||||
with patch("aiohttp.ClientSession", return_value=mock_session):
|
||||
result = await adapter.rerank(model="test-model", query="test query", items=["item1", "item2"])
|
||||
|
||||
assert len(result.data) == 1
|
||||
assert result.data[0].index == 0
|
||||
assert result.data[0].relevance_score == 0.5
|
||||
|
||||
url, kwargs = mock_session.post_calls[0]
|
||||
payload = kwargs["json"]
|
||||
assert payload["model"] == "test-model"
|
||||
assert payload["query"] == {"text": "test query"}
|
||||
assert payload["passages"] == [{"text": "item1"}, {"text": "item2"}]
|
||||
|
||||
|
||||
async def test_missing_rankings_key():
|
||||
adapter = create_adapter()
|
||||
mock_session = MockSession(MockResponse(json_data={}))
|
||||
|
||||
with patch("aiohttp.ClientSession", return_value=mock_session):
|
||||
result = await adapter.rerank(model="test-model", query="q", items=["a"])
|
||||
|
||||
assert len(result.data) == 0
|
||||
|
||||
|
||||
async def test_hosted_with_endpoint():
|
||||
adapter = create_adapter(
|
||||
config=NVIDIAConfig(api_key="key"), rerank_endpoints={"test-model": "https://model.endpoint/rerank"}
|
||||
)
|
||||
mock_session = MockSession(MockResponse())
|
||||
|
||||
with patch("aiohttp.ClientSession", return_value=mock_session):
|
||||
await adapter.rerank(model="test-model", query="q", items=["a"])
|
||||
|
||||
url, _ = mock_session.post_calls[0]
|
||||
assert url == "https://model.endpoint/rerank"
|
||||
|
||||
|
||||
async def test_hosted_without_endpoint():
|
||||
adapter = create_adapter(
|
||||
config=NVIDIAConfig(api_key="key"), # This creates hosted config (integrate.api.nvidia.com).
|
||||
rerank_endpoints={}, # No endpoint mapping for test-model
|
||||
)
|
||||
mock_session = MockSession(MockResponse())
|
||||
|
||||
with patch("aiohttp.ClientSession", return_value=mock_session):
|
||||
await adapter.rerank(model="test-model", query="q", items=["a"])
|
||||
|
||||
url, _ = mock_session.post_calls[0]
|
||||
assert "https://integrate.api.nvidia.com" in url
|
||||
|
||||
|
||||
async def test_hosted_model_not_in_endpoint_mapping():
|
||||
adapter = create_adapter(
|
||||
config=NVIDIAConfig(api_key="key"), rerank_endpoints={"other-model": "https://other.endpoint/rerank"}
|
||||
)
|
||||
mock_session = MockSession(MockResponse())
|
||||
|
||||
with patch("aiohttp.ClientSession", return_value=mock_session):
|
||||
await adapter.rerank(model="test-model", query="q", items=["a"])
|
||||
|
||||
url, _ = mock_session.post_calls[0]
|
||||
assert "https://integrate.api.nvidia.com" in url
|
||||
assert url != "https://other.endpoint/rerank"
|
||||
|
||||
|
||||
async def test_self_hosted_ignores_endpoint():
|
||||
adapter = create_adapter(
|
||||
config=NVIDIAConfig(url="http://localhost:8000", api_key=None),
|
||||
rerank_endpoints={"test-model": "https://model.endpoint/rerank"}, # This should be ignored for self-hosted.
|
||||
)
|
||||
mock_session = MockSession(MockResponse())
|
||||
|
||||
with patch("aiohttp.ClientSession", return_value=mock_session):
|
||||
await adapter.rerank(model="test-model", query="q", items=["a"])
|
||||
|
||||
url, _ = mock_session.post_calls[0]
|
||||
assert "http://localhost:8000" in url
|
||||
assert "model.endpoint/rerank" not in url
|
||||
|
||||
|
||||
async def test_max_num_results():
|
||||
adapter = create_adapter()
|
||||
rankings = [{"index": 0, "logit": 0.8}, {"index": 1, "logit": 0.6}]
|
||||
mock_session = MockSession(MockResponse(json_data={"rankings": rankings}))
|
||||
|
||||
with patch("aiohttp.ClientSession", return_value=mock_session):
|
||||
result = await adapter.rerank(model="test-model", query="q", items=["a", "b"], max_num_results=1)
|
||||
|
||||
assert len(result.data) == 1
|
||||
assert result.data[0].index == 0
|
||||
assert result.data[0].relevance_score == 0.8
|
||||
|
||||
|
||||
async def test_http_error():
|
||||
adapter = create_adapter()
|
||||
mock_session = MockSession(MockResponse(status=500, text_data="Server Error"))
|
||||
|
||||
with patch("aiohttp.ClientSession", return_value=mock_session):
|
||||
with pytest.raises(ConnectionError, match="status 500.*Server Error"):
|
||||
await adapter.rerank(model="test-model", query="q", items=["a"])
|
||||
|
||||
|
||||
async def test_client_error():
|
||||
adapter = create_adapter()
|
||||
mock_session = AsyncMock()
|
||||
mock_session.__aenter__.side_effect = aiohttp.ClientError("Network error")
|
||||
|
||||
with patch("aiohttp.ClientSession", return_value=mock_session):
|
||||
with pytest.raises(ConnectionError, match="Failed to connect.*Network error"):
|
||||
await adapter.rerank(model="test-model", query="q", items=["a"])
|
||||
|
||||
|
||||
async def test_list_models_includes_configured_rerank_models():
|
||||
"""Test that list_models adds rerank models to the dynamic model list."""
|
||||
adapter = create_adapter()
|
||||
adapter.__provider_id__ = "nvidia"
|
||||
adapter.__provider_spec__ = MagicMock()
|
||||
|
||||
dynamic_ids = ["llm-1", "embedding-1"]
|
||||
with patch.object(OpenAIMixin, "list_provider_model_ids", new=AsyncMock(return_value=dynamic_ids)):
|
||||
result = await adapter.list_models()
|
||||
|
||||
assert result is not None
|
||||
|
||||
# Check that the rerank models are added
|
||||
model_ids = [m.identifier for m in result]
|
||||
assert "nv-rerank-qa-mistral-4b:1" in model_ids
|
||||
assert "nvidia/nv-rerankqa-mistral-4b-v3" in model_ids
|
||||
assert "nvidia/llama-3.2-nv-rerankqa-1b-v2" in model_ids
|
||||
|
||||
rerank_models = [m for m in result if m.model_type == ModelType.rerank]
|
||||
|
||||
assert len(rerank_models) == 3
|
||||
|
||||
for m in rerank_models:
|
||||
assert m.provider_id == "nvidia"
|
||||
assert m.model_type == ModelType.rerank
|
||||
assert m.metadata == {}
|
||||
assert m.identifier in adapter._model_cache
|
||||
|
||||
|
||||
async def test_list_provider_model_ids_has_no_duplicates():
|
||||
adapter = create_adapter()
|
||||
|
||||
dynamic_ids = [
|
||||
"llm-1",
|
||||
"nvidia/nv-rerankqa-mistral-4b-v3", # overlaps configured rerank ids
|
||||
"embedding-1",
|
||||
"llm-1",
|
||||
]
|
||||
|
||||
with patch.object(OpenAIMixin, "list_provider_model_ids", new=AsyncMock(return_value=dynamic_ids)):
|
||||
ids = list(await adapter.list_provider_model_ids())
|
||||
|
||||
assert len(ids) == len(set(ids))
|
||||
assert ids.count("nvidia/nv-rerankqa-mistral-4b-v3") == 1
|
||||
assert "nv-rerank-qa-mistral-4b:1" in ids
|
||||
assert "nvidia/llama-3.2-nv-rerankqa-1b-v2" in ids
|
||||
|
||||
|
||||
async def test_list_provider_model_ids_uses_configured_on_dynamic_failure():
|
||||
adapter = create_adapter()
|
||||
|
||||
# Simulate dynamic listing failure
|
||||
with patch.object(OpenAIMixin, "list_provider_model_ids", new=AsyncMock(side_effect=Exception)):
|
||||
ids = list(await adapter.list_provider_model_ids())
|
||||
|
||||
# Should still return configured rerank ids
|
||||
configured_ids = list(adapter.config.rerank_model_to_url.keys())
|
||||
assert set(ids) == set(configured_ids)
|
||||
10
uv.lock
generated
10
uv.lock
generated
|
|
@ -1,5 +1,5 @@
|
|||
version = 1
|
||||
revision = 2
|
||||
revision = 3
|
||||
requires-python = ">=3.12"
|
||||
resolution-markers = [
|
||||
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
|
|
@ -1933,7 +1933,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "llama-stack"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0.dev0"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "aiohttp" },
|
||||
|
|
@ -3530,8 +3530,10 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/2d/75/364847b879eb630b3ac8293798e380e441a957c53657995053c5ec39a316/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab8905b5dcb05bf3fb22e0cf90e10f469563486ffb6a96569e51f897c750a76a", size = 4411159, upload-time = "2025-10-10T11:12:00.49Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/a0/567f7ea38b6e1c62aafd58375665a547c00c608a471620c0edc364733e13/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:bf940cd7e7fec19181fdbc29d76911741153d51cab52e5c21165f3262125685e", size = 4468234, upload-time = "2025-10-10T11:12:04.892Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/da/4e42788fb811bbbfd7b7f045570c062f49e350e1d1f3df056c3fb5763353/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa0f693d3c68ae925966f0b14b8edda71696608039f4ed61b1fe9ffa468d16db", size = 4166236, upload-time = "2025-10-10T11:12:11.674Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/94/c1777c355bc560992af848d98216148be5f1be001af06e06fc49cbded578/psycopg2_binary-2.9.11-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a1cf393f1cdaf6a9b57c0a719a1068ba1069f022a59b8b1fe44b006745b59757", size = 3983083, upload-time = "2025-10-30T02:55:15.73Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bd/42/c9a21edf0e3daa7825ed04a4a8588686c6c14904344344a039556d78aa58/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7a6beb4beaa62f88592ccc65df20328029d721db309cb3250b0aae0fa146c3", size = 3652281, upload-time = "2025-10-10T11:12:17.713Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/22/dedfbcfa97917982301496b6b5e5e6c5531d1f35dd2b488b08d1ebc52482/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:31b32c457a6025e74d233957cc9736742ac5a6cb196c6b68499f6bb51390bd6a", size = 3298010, upload-time = "2025-10-10T11:12:22.671Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/66/ea/d3390e6696276078bd01b2ece417deac954dfdd552d2edc3d03204416c0c/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:edcb3aeb11cb4bf13a2af3c53a15b3d612edeb6409047ea0b5d6a21a9d744b34", size = 3044641, upload-time = "2025-10-30T02:55:19.929Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/9a/0402ded6cbd321da0c0ba7d34dc12b29b14f5764c2fc10750daa38e825fc/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b6d93d7c0b61a1dd6197d208ab613eb7dcfdcca0a49c42ceb082257991de9d", size = 3347940, upload-time = "2025-10-10T11:12:26.529Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b1/d2/99b55e85832ccde77b211738ff3925a5d73ad183c0b37bcbbe5a8ff04978/psycopg2_binary-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:b33fabeb1fde21180479b2d4667e994de7bbf0eec22832ba5d9b5e4cf65b6c6d", size = 2714147, upload-time = "2025-10-10T11:12:29.535Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/a8/a2709681b3ac11b0b1786def10006b8995125ba268c9a54bea6f5ae8bd3e/psycopg2_binary-2.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b8fb3db325435d34235b044b199e56cdf9ff41223a4b9752e8576465170bb38c", size = 3756572, upload-time = "2025-10-10T11:12:32.873Z" },
|
||||
|
|
@ -3539,8 +3541,10 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/11/32/b2ffe8f3853c181e88f0a157c5fb4e383102238d73c52ac6d93a5c8bffe6/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c55b385daa2f92cb64b12ec4536c66954ac53654c7f15a203578da4e78105c0", size = 4411242, upload-time = "2025-10-10T11:12:42.388Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/04/6ca7477e6160ae258dc96f67c371157776564679aefd247b66f4661501a2/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c0377174bf1dd416993d16edc15357f6eb17ac998244cca19bc67cdc0e2e5766", size = 4468258, upload-time = "2025-10-10T11:12:48.654Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/7e/6a1a38f86412df101435809f225d57c1a021307dd0689f7a5e7fe83588b1/psycopg2_binary-2.9.11-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5c6ff3335ce08c75afaed19e08699e8aacf95d4a260b495a4a8545244fe2ceb3", size = 4166295, upload-time = "2025-10-10T11:12:52.525Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/7d/c07374c501b45f3579a9eb761cbf2604ddef3d96ad48679112c2c5aa9c25/psycopg2_binary-2.9.11-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:84011ba3109e06ac412f95399b704d3d6950e386b7994475b231cf61eec2fc1f", size = 3983133, upload-time = "2025-10-30T02:55:24.329Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/82/56/993b7104cb8345ad7d4516538ccf8f0d0ac640b1ebd8c754a7b024e76878/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba34475ceb08cccbdd98f6b46916917ae6eeb92b5ae111df10b544c3a4621dc4", size = 3652383, upload-time = "2025-10-10T11:12:56.387Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2d/ac/eaeb6029362fd8d454a27374d84c6866c82c33bfc24587b4face5a8e43ef/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b31e90fdd0f968c2de3b26ab014314fe814225b6c324f770952f7d38abf17e3c", size = 3298168, upload-time = "2025-10-10T11:13:00.403Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2b/39/50c3facc66bded9ada5cbc0de867499a703dc6bca6be03070b4e3b65da6c/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d526864e0f67f74937a8fce859bd56c979f5e2ec57ca7c627f5f1071ef7fee60", size = 3044712, upload-time = "2025-10-30T02:55:27.975Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/8e/b7de019a1f562f72ada81081a12823d3c1590bedc48d7d2559410a2763fe/psycopg2_binary-2.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04195548662fa544626c8ea0f06561eb6203f1984ba5b4562764fbeb4c3d14b1", size = 3347549, upload-time = "2025-10-10T11:13:03.971Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/80/2d/1bb683f64737bbb1f86c82b7359db1eb2be4e2c0c13b947f80efefa7d3e5/psycopg2_binary-2.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:efff12b432179443f54e230fdf60de1f6cc726b6c832db8701227d089310e8aa", size = 2714215, upload-time = "2025-10-10T11:13:07.14Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/64/12/93ef0098590cf51d9732b4f139533732565704f45bdc1ffa741b7c95fb54/psycopg2_binary-2.9.11-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:92e3b669236327083a2e33ccfa0d320dd01b9803b3e14dd986a4fc54aa00f4e1", size = 3756567, upload-time = "2025-10-10T11:13:11.885Z" },
|
||||
|
|
@ -3548,8 +3552,10 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/13/1e/98874ce72fd29cbde93209977b196a2edae03f8490d1bd8158e7f1daf3a0/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b52a3f9bb540a3e4ec0f6ba6d31339727b2950c9772850d6545b7eae0b9d7c5", size = 4411646, upload-time = "2025-10-10T11:13:24.432Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/bd/a335ce6645334fb8d758cc358810defca14a1d19ffbc8a10bd38a2328565/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:db4fd476874ccfdbb630a54426964959e58da4c61c9feba73e6094d51303d7d8", size = 4468701, upload-time = "2025-10-10T11:13:29.266Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/44/d6/c8b4f53f34e295e45709b7568bf9b9407a612ea30387d35eb9fa84f269b4/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47f212c1d3be608a12937cc131bd85502954398aaa1320cb4c14421a0ffccf4c", size = 4166293, upload-time = "2025-10-10T11:13:33.336Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4b/e0/f8cc36eadd1b716ab36bb290618a3292e009867e5c97ce4aba908cb99644/psycopg2_binary-2.9.11-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e35b7abae2b0adab776add56111df1735ccc71406e56203515e228a8dc07089f", size = 3983184, upload-time = "2025-10-30T02:55:32.483Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/53/3e/2a8fe18a4e61cfb3417da67b6318e12691772c0696d79434184a511906dc/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fcf21be3ce5f5659daefd2b3b3b6e4727b028221ddc94e6c1523425579664747", size = 3652650, upload-time = "2025-10-10T11:13:38.181Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/76/36/03801461b31b29fe58d228c24388f999fe814dfc302856e0d17f97d7c54d/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:9bd81e64e8de111237737b29d68039b9c813bdf520156af36d26819c9a979e5f", size = 3298663, upload-time = "2025-10-10T11:13:44.878Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/97/77/21b0ea2e1a73aa5fa9222b2a6b8ba325c43c3a8d54272839c991f2345656/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:32770a4d666fbdafab017086655bcddab791d7cb260a16679cc5a7338b64343b", size = 3044737, upload-time = "2025-10-30T02:55:35.69Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/67/69/f36abe5f118c1dca6d3726ceae164b9356985805480731ac6712a63f24f0/psycopg2_binary-2.9.11-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3cb3a676873d7506825221045bd70e0427c905b9c8ee8d6acd70cfcbd6e576d", size = 3347643, upload-time = "2025-10-10T11:13:53.499Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e1/36/9c0c326fe3a4227953dfb29f5d0c8ae3b8eb8c1cd2967aa569f50cb3c61f/psycopg2_binary-2.9.11-cp314-cp314-win_amd64.whl", hash = "sha256:4012c9c954dfaccd28f94e84ab9f94e12df76b4afb22331b1f0d3154893a6316", size = 2803913, upload-time = "2025-10-10T11:13:57.058Z" },
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue