mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 20:12:33 +00:00
Merge
Signed-off-by: Bill Murdock <bmurdock@redhat.com>
This commit is contained in:
commit
e77b7a127c
854 changed files with 165238 additions and 99099 deletions
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
|
@ -2,4 +2,4 @@
|
||||||
|
|
||||||
# These owners will be the default owners for everything in
|
# These owners will be the default owners for everything in
|
||||||
# the repo. Unless a later match takes precedence,
|
# the repo. Unless a later match takes precedence,
|
||||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1
|
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
|
||||||
|
|
|
||||||
1
.github/TRIAGERS.md
vendored
1
.github/TRIAGERS.md
vendored
|
|
@ -1,2 +1 @@
|
||||||
# This file documents Triage members in the Llama Stack community
|
# This file documents Triage members in the Llama Stack community
|
||||||
@franciscojavierarceo
|
|
||||||
|
|
|
||||||
1
.github/workflows/README.md
vendored
1
.github/workflows/README.md
vendored
|
|
@ -12,6 +12,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
||||||
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
|
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
|
||||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||||
|
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
|
||||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||||
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
||||||
|
|
|
||||||
25
.github/workflows/integration-tests.yml
vendored
25
.github/workflows/integration-tests.yml
vendored
|
|
@ -42,18 +42,27 @@ jobs:
|
||||||
|
|
||||||
run-replay-mode-tests:
|
run-replay-mode-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }}
|
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
client-type: [library, server]
|
client-type: [library, server]
|
||||||
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
|
|
||||||
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
|
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
suite: [base, vision]
|
# Define (setup, suite) pairs - they are always matched and cannot be independent
|
||||||
|
# Weekly schedule (Sun 1 AM): vllm+base
|
||||||
|
# Input test-setup=ollama-vision: ollama-vision+vision
|
||||||
|
# Default (including test-setup=ollama): both ollama+base and ollama-vision+vision
|
||||||
|
config: >-
|
||||||
|
${{
|
||||||
|
github.event.schedule == '1 0 * * 0'
|
||||||
|
&& fromJSON('[{"setup": "vllm", "suite": "base"}]')
|
||||||
|
|| github.event.inputs.test-setup == 'ollama-vision'
|
||||||
|
&& fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
|
||||||
|
|| fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]')
|
||||||
|
}}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
|
@ -64,14 +73,14 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
client-version: ${{ matrix.client-version }}
|
client-version: ${{ matrix.client-version }}
|
||||||
setup: ${{ matrix.setup }}
|
setup: ${{ matrix.config.setup }}
|
||||||
suite: ${{ matrix.suite }}
|
suite: ${{ matrix.config.suite }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
setup: ${{ matrix.setup }}
|
setup: ${{ matrix.config.setup }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: ${{ matrix.suite }}
|
suite: ${{ matrix.config.suite }}
|
||||||
|
|
|
||||||
227
.github/workflows/precommit-trigger.yml
vendored
Normal file
227
.github/workflows/precommit-trigger.yml
vendored
Normal file
|
|
@ -0,0 +1,227 @@
|
||||||
|
name: Pre-commit Bot
|
||||||
|
|
||||||
|
run-name: Pre-commit bot for PR #${{ github.event.issue.number }}
|
||||||
|
|
||||||
|
on:
|
||||||
|
issue_comment:
|
||||||
|
types: [created]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
pre-commit:
|
||||||
|
# Only run on pull request comments
|
||||||
|
if: github.event.issue.pull_request && contains(github.event.comment.body, '@github-actions run precommit')
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Check comment author and get PR details
|
||||||
|
id: check_author
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
// Get PR details
|
||||||
|
const pr = await github.rest.pulls.get({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
pull_number: context.issue.number
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check if commenter has write access or is the PR author
|
||||||
|
const commenter = context.payload.comment.user.login;
|
||||||
|
const prAuthor = pr.data.user.login;
|
||||||
|
|
||||||
|
let hasPermission = false;
|
||||||
|
|
||||||
|
// Check if commenter is PR author
|
||||||
|
if (commenter === prAuthor) {
|
||||||
|
hasPermission = true;
|
||||||
|
console.log(`Comment author ${commenter} is the PR author`);
|
||||||
|
} else {
|
||||||
|
// Check if commenter has write/admin access
|
||||||
|
try {
|
||||||
|
const permission = await github.rest.repos.getCollaboratorPermissionLevel({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
username: commenter
|
||||||
|
});
|
||||||
|
|
||||||
|
const level = permission.data.permission;
|
||||||
|
hasPermission = ['write', 'admin', 'maintain'].includes(level);
|
||||||
|
console.log(`Comment author ${commenter} has permission: ${level}`);
|
||||||
|
} catch (error) {
|
||||||
|
console.log(`Could not check permissions for ${commenter}: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hasPermission) {
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: context.issue.number,
|
||||||
|
body: `❌ @${commenter} You don't have permission to trigger pre-commit. Only PR authors or repository collaborators can run this command.`
|
||||||
|
});
|
||||||
|
core.setFailed(`User ${commenter} does not have permission`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save PR info for later steps
|
||||||
|
core.setOutput('pr_number', context.issue.number);
|
||||||
|
core.setOutput('pr_head_ref', pr.data.head.ref);
|
||||||
|
core.setOutput('pr_head_sha', pr.data.head.sha);
|
||||||
|
core.setOutput('pr_head_repo', pr.data.head.repo.full_name);
|
||||||
|
core.setOutput('pr_base_ref', pr.data.base.ref);
|
||||||
|
core.setOutput('is_fork', pr.data.head.repo.full_name !== context.payload.repository.full_name);
|
||||||
|
core.setOutput('authorized', 'true');
|
||||||
|
|
||||||
|
- name: React to comment
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
await github.rest.reactions.createForIssueComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
comment_id: context.payload.comment.id,
|
||||||
|
content: 'rocket'
|
||||||
|
});
|
||||||
|
|
||||||
|
- name: Comment starting
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||||
|
body: `⏳ Running pre-commit hooks on PR #${{ steps.check_author.outputs.pr_number }}...`
|
||||||
|
});
|
||||||
|
|
||||||
|
- name: Checkout PR branch (same-repo)
|
||||||
|
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'false'
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
with:
|
||||||
|
ref: ${{ steps.check_author.outputs.pr_head_ref }}
|
||||||
|
fetch-depth: 0
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Checkout PR branch (fork)
|
||||||
|
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'true'
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
with:
|
||||||
|
repository: ${{ steps.check_author.outputs.pr_head_repo }}
|
||||||
|
ref: ${{ steps.check_author.outputs.pr_head_ref }}
|
||||||
|
fetch-depth: 0
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Verify checkout
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
run: |
|
||||||
|
echo "Current SHA: $(git rev-parse HEAD)"
|
||||||
|
echo "Expected SHA: ${{ steps.check_author.outputs.pr_head_sha }}"
|
||||||
|
if [[ "$(git rev-parse HEAD)" != "${{ steps.check_author.outputs.pr_head_sha }}" ]]; then
|
||||||
|
echo "::error::Checked out SHA does not match expected SHA"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
cache: pip
|
||||||
|
cache-dependency-path: |
|
||||||
|
**/requirements*.txt
|
||||||
|
.pre-commit-config.yaml
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||||
|
with:
|
||||||
|
node-version: '20'
|
||||||
|
cache: 'npm'
|
||||||
|
cache-dependency-path: 'llama_stack/ui/'
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
run: npm ci
|
||||||
|
working-directory: llama_stack/ui
|
||||||
|
|
||||||
|
- name: Run pre-commit
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
id: precommit
|
||||||
|
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||||
|
continue-on-error: true
|
||||||
|
env:
|
||||||
|
SKIP: no-commit-to-branch
|
||||||
|
RUFF_OUTPUT_FORMAT: github
|
||||||
|
|
||||||
|
- name: Check for changes
|
||||||
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
id: changes
|
||||||
|
run: |
|
||||||
|
if ! git diff --exit-code || [ -n "$(git ls-files --others --exclude-standard)" ]; then
|
||||||
|
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "Changes detected after pre-commit"
|
||||||
|
else
|
||||||
|
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||||
|
echo "No changes after pre-commit"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Commit and push changes
|
||||||
|
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||||
|
run: |
|
||||||
|
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||||
|
git config --local user.name "github-actions[bot]"
|
||||||
|
|
||||||
|
git add -A
|
||||||
|
git commit -m "style: apply pre-commit fixes
|
||||||
|
|
||||||
|
🤖 Applied by @github-actions bot via pre-commit workflow"
|
||||||
|
|
||||||
|
# Push changes
|
||||||
|
git push origin HEAD:${{ steps.check_author.outputs.pr_head_ref }}
|
||||||
|
|
||||||
|
- name: Comment success with changes
|
||||||
|
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||||
|
body: `✅ Pre-commit hooks completed successfully!\n\n🔧 Changes have been committed and pushed to the PR branch.`
|
||||||
|
});
|
||||||
|
|
||||||
|
- name: Comment success without changes
|
||||||
|
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success'
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||||
|
body: `✅ Pre-commit hooks passed!\n\n✨ No changes needed - your code is already formatted correctly.`
|
||||||
|
});
|
||||||
|
|
||||||
|
- name: Comment failure
|
||||||
|
if: failure()
|
||||||
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
script: |
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||||
|
body: `❌ Pre-commit workflow failed!\n\nPlease check the [workflow logs](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) for details.`
|
||||||
|
});
|
||||||
4
.github/workflows/providers-build.yml
vendored
4
.github/workflows/providers-build.yml
vendored
|
|
@ -112,7 +112,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
@ -150,7 +150,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
2
.github/workflows/python-build-test.yml
vendored
2
.github/workflows/python-build-test.yml
vendored
|
|
@ -24,7 +24,7 @@ jobs:
|
||||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@b75a909f75acd358c2196fb9a5f1299a9a8868a4 # v6.7.0
|
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6.8.0
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
activate-environment: true
|
activate-environment: true
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
||||||
|
|
||||||
[**Quick Start**](https://llamastack.github.io/latest/getting_started/index.html) | [**Documentation**](https://llamastack.github.io/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
[**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||||
|
|
||||||
|
|
||||||
### ✨🎉 Llama 4 Support 🎉✨
|
### ✨🎉 Llama 4 Support 🎉✨
|
||||||
|
|
|
||||||
|
|
@ -187,21 +187,21 @@ Configure telemetry behavior using environment variables:
|
||||||
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
|
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
|
||||||
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `console,sqlite`)
|
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `console,sqlite`)
|
||||||
|
|
||||||
## Visualization with Jaeger
|
### Quick Setup: Complete Telemetry Stack
|
||||||
|
|
||||||
The `otel_trace` sink works with any service compatible with the OpenTelemetry collector. Traces and metrics use separate endpoints but can share the same collector.
|
Use the automated setup script to launch the complete telemetry stack (Jaeger, OpenTelemetry Collector, Prometheus, and Grafana):
|
||||||
|
|
||||||
### Starting Jaeger
|
|
||||||
|
|
||||||
Start a Jaeger instance with OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run --pull always --rm --name jaeger \
|
./scripts/telemetry/setup_telemetry.sh
|
||||||
-p 16686:16686 -p 4318:4318 \
|
|
||||||
jaegertracing/jaeger:2.1.0
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Once running, you can visualize traces by navigating to [http://localhost:16686/](http://localhost:16686/).
|
This sets up:
|
||||||
|
- **Jaeger UI**: http://localhost:16686 (traces visualization)
|
||||||
|
- **Prometheus**: http://localhost:9090 (metrics)
|
||||||
|
- **Grafana**: http://localhost:3000 (dashboards with auto-configured data sources)
|
||||||
|
- **OTEL Collector**: http://localhost:4318 (OTLP endpoint)
|
||||||
|
|
||||||
|
Once running, you can visualize traces by navigating to [Grafana](http://localhost:3000/) and login with login `admin` and password `admin`.
|
||||||
|
|
||||||
## Querying Metrics
|
## Querying Metrics
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -357,7 +357,7 @@ server:
|
||||||
8. Run the server:
|
8. Run the server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
|
llama stack run ~/.llama/run-byoa.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
9. Test the API:
|
9. Test the API:
|
||||||
|
|
|
||||||
|
|
@ -170,7 +170,7 @@ spec:
|
||||||
- name: llama-stack
|
- name: llama-stack
|
||||||
image: localhost/llama-stack-run-k8s:latest
|
image: localhost/llama-stack-run-k8s:latest
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
|
command: ["llama", "stack", "run", "/app/config.yaml"]
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5000
|
- containerPort: 5000
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ spec:
|
||||||
value: "${SAFETY_MODEL}"
|
value: "${SAFETY_MODEL}"
|
||||||
- name: TAVILY_SEARCH_API_KEY
|
- name: TAVILY_SEARCH_API_KEY
|
||||||
value: "${TAVILY_SEARCH_API_KEY}"
|
value: "${TAVILY_SEARCH_API_KEY}"
|
||||||
command: ["python", "-m", "llama_stack.core.server.server", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
command: ["llama", "stack", "run", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8321
|
- containerPort: 8321
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,7 @@
|
||||||
---
|
---
|
||||||
|
description: "Files
|
||||||
|
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack APIs."
|
||||||
sidebar_label: Files
|
sidebar_label: Files
|
||||||
title: Files
|
title: Files
|
||||||
---
|
---
|
||||||
|
|
@ -7,4 +10,8 @@ title: Files
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
|
Files
|
||||||
|
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack APIs.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **files** API.
|
This section contains documentation for all available providers for the **files** API.
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
---
|
---
|
||||||
description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
description: "Inference
|
||||||
|
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||||
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
|
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
|
||||||
|
|
@ -12,7 +14,9 @@ title: Inference
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
Inference
|
||||||
|
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ Databricks inference provider for running models on Databricks' unified analytic
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `url` | `<class 'str'>` | No | | The URL for the Databricks model serving endpoint |
|
| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint |
|
||||||
| `api_token` | `<class 'pydantic.types.SecretStr'>` | No | | The Databricks API token |
|
| `api_token` | `<class 'pydantic.types.SecretStr'>` | No | | The Databricks API token |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,7 @@
|
||||||
---
|
---
|
||||||
|
description: "Safety
|
||||||
|
|
||||||
|
OpenAI-compatible Moderations API."
|
||||||
sidebar_label: Safety
|
sidebar_label: Safety
|
||||||
title: Safety
|
title: Safety
|
||||||
---
|
---
|
||||||
|
|
@ -7,4 +10,8 @@ title: Safety
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
|
Safety
|
||||||
|
|
||||||
|
OpenAI-compatible Moderations API.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **safety** API.
|
This section contains documentation for all available providers for the **safety** API.
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ from .specification import (
|
||||||
Document,
|
Document,
|
||||||
Example,
|
Example,
|
||||||
ExampleRef,
|
ExampleRef,
|
||||||
|
ExtraBodyParameter,
|
||||||
MediaType,
|
MediaType,
|
||||||
Operation,
|
Operation,
|
||||||
Parameter,
|
Parameter,
|
||||||
|
|
@ -677,6 +678,27 @@ class Generator:
|
||||||
# parameters passed anywhere
|
# parameters passed anywhere
|
||||||
parameters = path_parameters + query_parameters
|
parameters = path_parameters + query_parameters
|
||||||
|
|
||||||
|
# Build extra body parameters documentation
|
||||||
|
extra_body_parameters = []
|
||||||
|
for param_name, param_type, description in op.extra_body_params:
|
||||||
|
if is_type_optional(param_type):
|
||||||
|
inner_type: type = unwrap_optional_type(param_type)
|
||||||
|
required = False
|
||||||
|
else:
|
||||||
|
inner_type = param_type
|
||||||
|
required = True
|
||||||
|
|
||||||
|
# Use description from ExtraBodyField if available, otherwise from docstring
|
||||||
|
param_description = description or doc_params.get(param_name)
|
||||||
|
|
||||||
|
extra_body_param = ExtraBodyParameter(
|
||||||
|
name=param_name,
|
||||||
|
schema=self.schema_builder.classdef_to_ref(inner_type),
|
||||||
|
description=param_description,
|
||||||
|
required=required,
|
||||||
|
)
|
||||||
|
extra_body_parameters.append(extra_body_param)
|
||||||
|
|
||||||
webmethod = getattr(op.func_ref, "__webmethod__", None)
|
webmethod = getattr(op.func_ref, "__webmethod__", None)
|
||||||
raw_bytes_request_body = False
|
raw_bytes_request_body = False
|
||||||
if webmethod:
|
if webmethod:
|
||||||
|
|
@ -898,6 +920,7 @@ class Generator:
|
||||||
deprecated=getattr(op.webmethod, "deprecated", False)
|
deprecated=getattr(op.webmethod, "deprecated", False)
|
||||||
or "DEPRECATED" in op.func_name,
|
or "DEPRECATED" in op.func_name,
|
||||||
security=[] if op.public else None,
|
security=[] if op.public else None,
|
||||||
|
extraBodyParameters=extra_body_parameters if extra_body_parameters else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_api_stability_priority(self, api_level: str) -> int:
|
def _get_api_stability_priority(self, api_level: str) -> int:
|
||||||
|
|
|
||||||
|
|
@ -19,10 +19,12 @@ from llama_stack.strong_typing.inspection import get_signature
|
||||||
|
|
||||||
from typing import get_origin, get_args
|
from typing import get_origin, get_args
|
||||||
|
|
||||||
from fastapi import UploadFile
|
from fastapi import UploadFile
|
||||||
from fastapi.params import File, Form
|
from fastapi.params import File, Form
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
|
from llama_stack.schema_utils import ExtraBodyField
|
||||||
|
|
||||||
|
|
||||||
def split_prefix(
|
def split_prefix(
|
||||||
s: str, sep: str, prefix: Union[str, Iterable[str]]
|
s: str, sep: str, prefix: Union[str, Iterable[str]]
|
||||||
|
|
@ -89,6 +91,7 @@ class EndpointOperation:
|
||||||
:param query_params: Parameters of the operation signature that are passed in the query string as `key=value` pairs.
|
:param query_params: Parameters of the operation signature that are passed in the query string as `key=value` pairs.
|
||||||
:param request_params: The parameter that corresponds to the data transmitted in the request body.
|
:param request_params: The parameter that corresponds to the data transmitted in the request body.
|
||||||
:param multipart_params: Parameters that indicate multipart/form-data request body.
|
:param multipart_params: Parameters that indicate multipart/form-data request body.
|
||||||
|
:param extra_body_params: Parameters that arrive via extra_body and are documented but not in SDK.
|
||||||
:param event_type: The Python type of the data that is transmitted out-of-band (e.g. via websockets) while the operation is in progress.
|
:param event_type: The Python type of the data that is transmitted out-of-band (e.g. via websockets) while the operation is in progress.
|
||||||
:param response_type: The Python type of the data that is transmitted in the response body.
|
:param response_type: The Python type of the data that is transmitted in the response body.
|
||||||
:param http_method: The HTTP method used to invoke the endpoint such as POST, GET or PUT.
|
:param http_method: The HTTP method used to invoke the endpoint such as POST, GET or PUT.
|
||||||
|
|
@ -106,6 +109,7 @@ class EndpointOperation:
|
||||||
query_params: List[OperationParameter]
|
query_params: List[OperationParameter]
|
||||||
request_params: Optional[OperationParameter]
|
request_params: Optional[OperationParameter]
|
||||||
multipart_params: List[OperationParameter]
|
multipart_params: List[OperationParameter]
|
||||||
|
extra_body_params: List[tuple[str, type, str | None]]
|
||||||
event_type: Optional[type]
|
event_type: Optional[type]
|
||||||
response_type: type
|
response_type: type
|
||||||
http_method: HTTPMethod
|
http_method: HTTPMethod
|
||||||
|
|
@ -265,6 +269,7 @@ def get_endpoint_operations(
|
||||||
query_params = []
|
query_params = []
|
||||||
request_params = []
|
request_params = []
|
||||||
multipart_params = []
|
multipart_params = []
|
||||||
|
extra_body_params = []
|
||||||
|
|
||||||
for param_name, parameter in signature.parameters.items():
|
for param_name, parameter in signature.parameters.items():
|
||||||
param_type = _get_annotation_type(parameter.annotation, func_ref)
|
param_type = _get_annotation_type(parameter.annotation, func_ref)
|
||||||
|
|
@ -279,6 +284,13 @@ def get_endpoint_operations(
|
||||||
f"parameter '{param_name}' in function '{func_name}' has no type annotation"
|
f"parameter '{param_name}' in function '{func_name}' has no type annotation"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Check if this is an extra_body parameter
|
||||||
|
is_extra_body, extra_body_desc = _is_extra_body_param(param_type)
|
||||||
|
if is_extra_body:
|
||||||
|
# Store in a separate list for documentation
|
||||||
|
extra_body_params.append((param_name, param_type, extra_body_desc))
|
||||||
|
continue # Skip adding to request_params
|
||||||
|
|
||||||
is_multipart = _is_multipart_param(param_type)
|
is_multipart = _is_multipart_param(param_type)
|
||||||
|
|
||||||
if prefix in ["get", "delete"]:
|
if prefix in ["get", "delete"]:
|
||||||
|
|
@ -351,6 +363,7 @@ def get_endpoint_operations(
|
||||||
query_params=query_params,
|
query_params=query_params,
|
||||||
request_params=request_params,
|
request_params=request_params,
|
||||||
multipart_params=multipart_params,
|
multipart_params=multipart_params,
|
||||||
|
extra_body_params=extra_body_params,
|
||||||
event_type=event_type,
|
event_type=event_type,
|
||||||
response_type=response_type,
|
response_type=response_type,
|
||||||
http_method=http_method,
|
http_method=http_method,
|
||||||
|
|
@ -403,7 +416,7 @@ def get_endpoint_events(endpoint: type) -> Dict[str, type]:
|
||||||
def _is_multipart_param(param_type: type) -> bool:
|
def _is_multipart_param(param_type: type) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if a parameter type indicates multipart form data.
|
Check if a parameter type indicates multipart form data.
|
||||||
|
|
||||||
Returns True if the type is:
|
Returns True if the type is:
|
||||||
- UploadFile
|
- UploadFile
|
||||||
- Annotated[UploadFile, File()]
|
- Annotated[UploadFile, File()]
|
||||||
|
|
@ -413,19 +426,38 @@ def _is_multipart_param(param_type: type) -> bool:
|
||||||
"""
|
"""
|
||||||
if param_type is UploadFile:
|
if param_type is UploadFile:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Check for Annotated types
|
# Check for Annotated types
|
||||||
origin = get_origin(param_type)
|
origin = get_origin(param_type)
|
||||||
if origin is None:
|
if origin is None:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if origin is Annotated:
|
if origin is Annotated:
|
||||||
args = get_args(param_type)
|
args = get_args(param_type)
|
||||||
if len(args) < 2:
|
if len(args) < 2:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Check the annotations for File() or Form()
|
# Check the annotations for File() or Form()
|
||||||
for annotation in args[1:]:
|
for annotation in args[1:]:
|
||||||
if isinstance(annotation, (File, Form)):
|
if isinstance(annotation, (File, Form)):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _is_extra_body_param(param_type: type) -> tuple[bool, str | None]:
|
||||||
|
"""
|
||||||
|
Check if parameter is marked as coming from extra_body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(is_extra_body, description): Tuple of boolean and optional description
|
||||||
|
"""
|
||||||
|
origin = get_origin(param_type)
|
||||||
|
if origin is Annotated:
|
||||||
|
args = get_args(param_type)
|
||||||
|
for annotation in args[1:]:
|
||||||
|
if isinstance(annotation, ExtraBodyField):
|
||||||
|
return True, annotation.description
|
||||||
|
# Also check by type name for cases where import matters
|
||||||
|
if type(annotation).__name__ == 'ExtraBodyField':
|
||||||
|
return True, getattr(annotation, 'description', None)
|
||||||
|
return False, None
|
||||||
|
|
|
||||||
|
|
@ -106,6 +106,15 @@ class Parameter:
|
||||||
example: Optional[Any] = None
|
example: Optional[Any] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExtraBodyParameter:
|
||||||
|
"""Represents a parameter that arrives via extra_body in the request."""
|
||||||
|
name: str
|
||||||
|
schema: SchemaOrRef
|
||||||
|
description: Optional[str] = None
|
||||||
|
required: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Operation:
|
class Operation:
|
||||||
responses: Dict[str, Union[Response, ResponseRef]]
|
responses: Dict[str, Union[Response, ResponseRef]]
|
||||||
|
|
@ -118,6 +127,7 @@ class Operation:
|
||||||
callbacks: Optional[Dict[str, "Callback"]] = None
|
callbacks: Optional[Dict[str, "Callback"]] = None
|
||||||
security: Optional[List["SecurityRequirement"]] = None
|
security: Optional[List["SecurityRequirement"]] = None
|
||||||
deprecated: Optional[bool] = None
|
deprecated: Optional[bool] = None
|
||||||
|
extraBodyParameters: Optional[List[ExtraBodyParameter]] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,17 @@ class Specification:
|
||||||
if display_name:
|
if display_name:
|
||||||
tag["x-displayName"] = display_name
|
tag["x-displayName"] = display_name
|
||||||
|
|
||||||
|
# Handle operations to rename extraBodyParameters -> x-llama-stack-extra-body-params
|
||||||
|
paths = json_doc.get("paths", {})
|
||||||
|
for path_item in paths.values():
|
||||||
|
if isinstance(path_item, dict):
|
||||||
|
for method in ["get", "post", "put", "delete", "patch"]:
|
||||||
|
operation = path_item.get(method)
|
||||||
|
if operation and isinstance(operation, dict):
|
||||||
|
extra_body_params = operation.pop("extraBodyParameters", None)
|
||||||
|
if extra_body_params:
|
||||||
|
operation["x-llama-stack-extra-body-params"] = extra_body_params
|
||||||
|
|
||||||
return json_doc
|
return json_doc
|
||||||
|
|
||||||
def get_json_string(self, pretty_print: bool = False) -> str:
|
def get_json_string(self, pretty_print: bool = False) -> str:
|
||||||
|
|
|
||||||
111
docs/static/deprecated-llama-stack-spec.html
vendored
111
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -1443,8 +1443,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "List all chat completions.",
|
"summary": "List chat completions.",
|
||||||
"description": "List all chat completions.",
|
"description": "List chat completions.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -1520,8 +1520,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"summary": "Create chat completions.",
|
||||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1565,8 +1565,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Describe a chat completion by its ID.",
|
"summary": "Get chat completion.",
|
||||||
"description": "Describe a chat completion by its ID.",
|
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "completion_id",
|
"name": "completion_id",
|
||||||
|
|
@ -1610,8 +1610,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"summary": "Create completion.",
|
||||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1655,8 +1655,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"summary": "Create embeddings.",
|
||||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1700,8 +1700,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns a list of files that belong to the user's organization.",
|
"summary": "List files.",
|
||||||
"description": "Returns a list of files that belong to the user's organization.",
|
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -1770,8 +1770,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Upload a file that can be used across various endpoints.",
|
"summary": "Upload file.",
|
||||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1831,8 +1831,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns information about a specific file.",
|
"summary": "Retrieve file.",
|
||||||
"description": "Returns information about a specific file.",
|
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1874,8 +1874,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Delete a file.",
|
"summary": "Delete file.",
|
||||||
"description": "Delete a file.",
|
"description": "Delete file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1919,8 +1919,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns the contents of the specified file.",
|
"summary": "Retrieve file content.",
|
||||||
"description": "Returns the contents of the specified file.",
|
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1999,8 +1999,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Safety"
|
"Safety"
|
||||||
],
|
],
|
||||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
"summary": "Create moderation.",
|
||||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -2044,8 +2044,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List all OpenAI responses.",
|
"summary": "List all responses.",
|
||||||
"description": "List all OpenAI responses.",
|
"description": "List all responses.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -2119,8 +2119,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Create a new OpenAI response.",
|
"summary": "Create a model response.",
|
||||||
"description": "Create a new OpenAI response.",
|
"description": "Create a model response.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -2132,7 +2132,27 @@
|
||||||
},
|
},
|
||||||
"required": true
|
"required": true
|
||||||
},
|
},
|
||||||
"deprecated": true
|
"deprecated": true,
|
||||||
|
"x-llama-stack-extra-body-params": [
|
||||||
|
{
|
||||||
|
"name": "shields",
|
||||||
|
"schema": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/ResponseShieldSpec"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "List of shields to apply during response generation. Shields provide safety and content moderation.",
|
||||||
|
"required": false
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/openai/v1/responses/{response_id}": {
|
"/v1/openai/v1/responses/{response_id}": {
|
||||||
|
|
@ -2164,8 +2184,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Retrieve an OpenAI response by its ID.",
|
"summary": "Get a model response.",
|
||||||
"description": "Retrieve an OpenAI response by its ID.",
|
"description": "Get a model response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -2207,8 +2227,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Delete an OpenAI response by its ID.",
|
"summary": "Delete a response.",
|
||||||
"description": "Delete an OpenAI response by its ID.",
|
"description": "Delete a response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -2252,8 +2272,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List input items for a given OpenAI response.",
|
"summary": "List input items.",
|
||||||
"description": "List input items for a given OpenAI response.",
|
"description": "List input items.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -9521,6 +9541,21 @@
|
||||||
"title": "OpenAIResponseText",
|
"title": "OpenAIResponseText",
|
||||||
"description": "Text response configuration for OpenAI responses."
|
"description": "Text response configuration for OpenAI responses."
|
||||||
},
|
},
|
||||||
|
"ResponseShieldSpec": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The type/identifier of the shield."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type"
|
||||||
|
],
|
||||||
|
"title": "ResponseShieldSpec",
|
||||||
|
"description": "Specification for a shield to apply during response generation."
|
||||||
|
},
|
||||||
"OpenAIResponseInputTool": {
|
"OpenAIResponseInputTool": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
|
|
@ -13331,12 +13366,13 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
"description": ""
|
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||||
|
"x-displayName": "Files"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Inference",
|
"name": "Inference",
|
||||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
"x-displayName": "Inference"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Models",
|
"name": "Models",
|
||||||
|
|
@ -13348,7 +13384,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Safety",
|
"name": "Safety",
|
||||||
"description": ""
|
"description": "OpenAI-compatible Moderations API.",
|
||||||
|
"x-displayName": "Safety"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Telemetry",
|
"name": "Telemetry",
|
||||||
|
|
|
||||||
121
docs/static/deprecated-llama-stack-spec.yaml
vendored
121
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -1033,8 +1033,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: List all chat completions.
|
summary: List chat completions.
|
||||||
description: List all chat completions.
|
description: List chat completions.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -1087,10 +1087,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create chat completions.
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
|
||||||
the specified model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create chat completions.
|
||||||
|
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
Generate an OpenAI-compatible chat completion for the given messages using
|
||||||
the specified model.
|
the specified model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -1122,8 +1122,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: Describe a chat completion by its ID.
|
summary: Get chat completion.
|
||||||
description: Describe a chat completion by its ID.
|
description: >-
|
||||||
|
Get chat completion.
|
||||||
|
|
||||||
|
Describe a chat completion by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: completion_id
|
- name: completion_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1153,10 +1156,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create completion.
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create completion.
|
||||||
|
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -1189,10 +1192,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create embeddings.
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create embeddings.
|
||||||
|
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -1225,9 +1228,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: List files.
|
||||||
Returns a list of files that belong to the user's organization.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
List files.
|
||||||
|
|
||||||
Returns a list of files that belong to the user's organization.
|
Returns a list of files that belong to the user's organization.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
|
|
@ -1285,11 +1289,13 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Upload file.
|
||||||
Upload a file that can be used across various endpoints.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Upload file.
|
||||||
|
|
||||||
Upload a file that can be used across various endpoints.
|
Upload a file that can be used across various endpoints.
|
||||||
|
|
||||||
|
|
||||||
The file upload should be a multipart form request with:
|
The file upload should be a multipart form request with:
|
||||||
|
|
||||||
- file: The File object (not file name) to be uploaded.
|
- file: The File object (not file name) to be uploaded.
|
||||||
|
|
@ -1338,9 +1344,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file.
|
||||||
Returns information about a specific file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file.
|
||||||
|
|
||||||
Returns information about a specific file.
|
Returns information about a specific file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -1372,8 +1379,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: Delete a file.
|
summary: Delete file.
|
||||||
description: Delete a file.
|
description: Delete file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1405,9 +1412,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file content.
|
||||||
Returns the contents of the specified file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file content.
|
||||||
|
|
||||||
Returns the contents of the specified file.
|
Returns the contents of the specified file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -1464,9 +1472,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Safety
|
- Safety
|
||||||
summary: >-
|
summary: Create moderation.
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create moderation.
|
||||||
|
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
Classifies if text and/or image inputs are potentially harmful.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
|
|
@ -1497,8 +1506,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: List all OpenAI responses.
|
summary: List all responses.
|
||||||
description: List all OpenAI responses.
|
description: List all responses.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -1549,8 +1558,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Create a new OpenAI response.
|
summary: Create a model response.
|
||||||
description: Create a new OpenAI response.
|
description: Create a model response.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -1559,6 +1568,18 @@ paths:
|
||||||
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
||||||
required: true
|
required: true
|
||||||
deprecated: true
|
deprecated: true
|
||||||
|
x-llama-stack-extra-body-params:
|
||||||
|
- name: shields
|
||||||
|
schema:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- $ref: '#/components/schemas/ResponseShieldSpec'
|
||||||
|
description: >-
|
||||||
|
List of shields to apply during response generation. Shields provide safety
|
||||||
|
and content moderation.
|
||||||
|
required: false
|
||||||
/v1/openai/v1/responses/{response_id}:
|
/v1/openai/v1/responses/{response_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1580,8 +1601,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Retrieve an OpenAI response by its ID.
|
summary: Get a model response.
|
||||||
description: Retrieve an OpenAI response by its ID.
|
description: Get a model response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1611,8 +1632,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Delete an OpenAI response by its ID.
|
summary: Delete a response.
|
||||||
description: Delete an OpenAI response by its ID.
|
description: Delete a response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1642,10 +1663,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: >-
|
summary: List input items.
|
||||||
List input items for a given OpenAI response.
|
description: List input items.
|
||||||
description: >-
|
|
||||||
List input items for a given OpenAI response.
|
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -7076,6 +7095,18 @@ components:
|
||||||
title: OpenAIResponseText
|
title: OpenAIResponseText
|
||||||
description: >-
|
description: >-
|
||||||
Text response configuration for OpenAI responses.
|
Text response configuration for OpenAI responses.
|
||||||
|
ResponseShieldSpec:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
description: The type/identifier of the shield.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
title: ResponseShieldSpec
|
||||||
|
description: >-
|
||||||
|
Specification for a shield to apply during response generation.
|
||||||
OpenAIResponseInputTool:
|
OpenAIResponseInputTool:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
|
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
|
||||||
|
|
@ -9987,9 +10018,16 @@ tags:
|
||||||
x-displayName: >-
|
x-displayName: >-
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
- name: Files
|
- name: Files
|
||||||
description: ''
|
description: >-
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
APIs.
|
||||||
|
x-displayName: Files
|
||||||
- name: Inference
|
- name: Inference
|
||||||
description: >-
|
description: >-
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and
|
||||||
|
embeddings.
|
||||||
|
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models
|
This API provides the raw interface to the underlying models. Two kinds of models
|
||||||
are supported:
|
are supported:
|
||||||
|
|
||||||
|
|
@ -9997,15 +10035,14 @@ tags:
|
||||||
|
|
||||||
- Embedding models: these models generate embeddings to be used for semantic
|
- Embedding models: these models generate embeddings to be used for semantic
|
||||||
search.
|
search.
|
||||||
x-displayName: >-
|
x-displayName: Inference
|
||||||
Llama Stack Inference API for generating completions, chat completions, and
|
|
||||||
embeddings.
|
|
||||||
- name: Models
|
- name: Models
|
||||||
description: ''
|
description: ''
|
||||||
- name: PostTraining (Coming Soon)
|
- name: PostTraining (Coming Soon)
|
||||||
description: ''
|
description: ''
|
||||||
- name: Safety
|
- name: Safety
|
||||||
description: ''
|
description: OpenAI-compatible Moderations API.
|
||||||
|
x-displayName: Safety
|
||||||
- name: Telemetry
|
- name: Telemetry
|
||||||
description: ''
|
description: ''
|
||||||
- name: VectorIO
|
- name: VectorIO
|
||||||
|
|
|
||||||
182
docs/static/llama-stack-spec.html
vendored
182
docs/static/llama-stack-spec.html
vendored
|
|
@ -69,8 +69,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "List all chat completions.",
|
"summary": "List chat completions.",
|
||||||
"description": "List all chat completions.",
|
"description": "List chat completions.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -146,8 +146,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"summary": "Create chat completions.",
|
||||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -191,8 +191,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Describe a chat completion by its ID.",
|
"summary": "Get chat completion.",
|
||||||
"description": "Describe a chat completion by its ID.",
|
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "completion_id",
|
"name": "completion_id",
|
||||||
|
|
@ -236,8 +236,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"summary": "Create completion.",
|
||||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -758,8 +758,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"summary": "Create embeddings.",
|
||||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -803,8 +803,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns a list of files that belong to the user's organization.",
|
"summary": "List files.",
|
||||||
"description": "Returns a list of files that belong to the user's organization.",
|
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -873,8 +873,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Upload a file that can be used across various endpoints.",
|
"summary": "Upload file.",
|
||||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -934,8 +934,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns information about a specific file.",
|
"summary": "Retrieve file.",
|
||||||
"description": "Returns information about a specific file.",
|
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -977,8 +977,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Delete a file.",
|
"summary": "Delete file.",
|
||||||
"description": "Delete a file.",
|
"description": "Delete file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1022,8 +1022,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns the contents of the specified file.",
|
"summary": "Retrieve file content.",
|
||||||
"description": "Returns the contents of the specified file.",
|
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1067,8 +1067,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "Get the current health status of the service.",
|
"summary": "Get health status.",
|
||||||
"description": "Get the current health status of the service.",
|
"description": "Get health status.\nGet the current health status of the service.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1102,8 +1102,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "List all available API routes with their methods and implementing providers.",
|
"summary": "List routes.",
|
||||||
"description": "List all available API routes with their methods and implementing providers.",
|
"description": "List routes.\nList all available API routes with their methods and implementing providers.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1170,8 +1170,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Register a model.",
|
"summary": "Register model.",
|
||||||
"description": "Register a model.",
|
"description": "Register model.\nRegister a model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1215,8 +1215,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Get a model by its identifier.",
|
"summary": "Get model.",
|
||||||
"description": "Get a model by its identifier.",
|
"description": "Get model.\nGet a model by its identifier.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "model_id",
|
"name": "model_id",
|
||||||
|
|
@ -1251,8 +1251,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Unregister a model.",
|
"summary": "Unregister model.",
|
||||||
"description": "Unregister a model.",
|
"description": "Unregister model.\nUnregister a model.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "model_id",
|
"name": "model_id",
|
||||||
|
|
@ -1296,8 +1296,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Safety"
|
"Safety"
|
||||||
],
|
],
|
||||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
"summary": "Create moderation.",
|
||||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1374,8 +1374,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Create a new prompt.",
|
"summary": "Create prompt.",
|
||||||
"description": "Create a new prompt.",
|
"description": "Create prompt.\nCreate a new prompt.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1419,8 +1419,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Get a prompt by its identifier and optional version.",
|
"summary": "Get prompt.",
|
||||||
"description": "Get a prompt by its identifier and optional version.",
|
"description": "Get prompt.\nGet a prompt by its identifier and optional version.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1471,8 +1471,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Update an existing prompt (increments version).",
|
"summary": "Update prompt.",
|
||||||
"description": "Update an existing prompt (increments version).",
|
"description": "Update prompt.\nUpdate an existing prompt (increments version).",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1517,8 +1517,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Delete a prompt.",
|
"summary": "Delete prompt.",
|
||||||
"description": "Delete a prompt.",
|
"description": "Delete prompt.\nDelete a prompt.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1562,8 +1562,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Set which version of a prompt should be the default in get_prompt (latest).",
|
"summary": "Set prompt version.",
|
||||||
"description": "Set which version of a prompt should be the default in get_prompt (latest).",
|
"description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1617,8 +1617,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "List all versions of a specific prompt.",
|
"summary": "List prompt versions.",
|
||||||
"description": "List all versions of a specific prompt.",
|
"description": "List prompt versions.\nList all versions of a specific prompt.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1662,8 +1662,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Providers"
|
"Providers"
|
||||||
],
|
],
|
||||||
"summary": "List all available providers.",
|
"summary": "List providers.",
|
||||||
"description": "List all available providers.",
|
"description": "List providers.\nList all available providers.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1697,8 +1697,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Providers"
|
"Providers"
|
||||||
],
|
],
|
||||||
"summary": "Get detailed information about a specific provider.",
|
"summary": "Get provider.",
|
||||||
"description": "Get detailed information about a specific provider.",
|
"description": "Get provider.\nGet detailed information about a specific provider.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "provider_id",
|
"name": "provider_id",
|
||||||
|
|
@ -1742,8 +1742,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List all OpenAI responses.",
|
"summary": "List all responses.",
|
||||||
"description": "List all OpenAI responses.",
|
"description": "List all responses.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -1817,8 +1817,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Create a new OpenAI response.",
|
"summary": "Create a model response.",
|
||||||
"description": "Create a new OpenAI response.",
|
"description": "Create a model response.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1830,7 +1830,27 @@
|
||||||
},
|
},
|
||||||
"required": true
|
"required": true
|
||||||
},
|
},
|
||||||
"deprecated": false
|
"deprecated": false,
|
||||||
|
"x-llama-stack-extra-body-params": [
|
||||||
|
{
|
||||||
|
"name": "shields",
|
||||||
|
"schema": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/ResponseShieldSpec"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "List of shields to apply during response generation. Shields provide safety and content moderation.",
|
||||||
|
"required": false
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/responses/{response_id}": {
|
"/v1/responses/{response_id}": {
|
||||||
|
|
@ -1862,8 +1882,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Retrieve an OpenAI response by its ID.",
|
"summary": "Get a model response.",
|
||||||
"description": "Retrieve an OpenAI response by its ID.",
|
"description": "Get a model response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -1905,8 +1925,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Delete an OpenAI response by its ID.",
|
"summary": "Delete a response.",
|
||||||
"description": "Delete an OpenAI response by its ID.",
|
"description": "Delete a response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -1950,8 +1970,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List input items for a given OpenAI response.",
|
"summary": "List input items.",
|
||||||
"description": "List input items for a given OpenAI response.",
|
"description": "List input items.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -2043,8 +2063,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Safety"
|
"Safety"
|
||||||
],
|
],
|
||||||
"summary": "Run a shield.",
|
"summary": "Run shield.",
|
||||||
"description": "Run a shield.",
|
"description": "Run shield.\nRun a shield.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -4176,8 +4196,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "Get the version of the service.",
|
"summary": "Get version.",
|
||||||
"description": "Get the version of the service.",
|
"description": "Get version.\nGet the version of the service.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -7616,6 +7636,21 @@
|
||||||
"title": "OpenAIResponseText",
|
"title": "OpenAIResponseText",
|
||||||
"description": "Text response configuration for OpenAI responses."
|
"description": "Text response configuration for OpenAI responses."
|
||||||
},
|
},
|
||||||
|
"ResponseShieldSpec": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The type/identifier of the shield."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type"
|
||||||
|
],
|
||||||
|
"title": "ResponseShieldSpec",
|
||||||
|
"description": "Specification for a shield to apply during response generation."
|
||||||
|
},
|
||||||
"OpenAIResponseInputTool": {
|
"OpenAIResponseInputTool": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
|
|
@ -12879,16 +12914,18 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
"description": ""
|
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||||
|
"x-displayName": "Files"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Inference",
|
"name": "Inference",
|
||||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
"x-displayName": "Inference"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Inspect",
|
"name": "Inspect",
|
||||||
"description": ""
|
"description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
|
||||||
|
"x-displayName": "Inspect"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Models",
|
"name": "Models",
|
||||||
|
|
@ -12896,17 +12933,18 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Prompts",
|
"name": "Prompts",
|
||||||
"description": "",
|
"description": "Protocol for prompt management operations.",
|
||||||
"x-displayName": "Protocol for prompt management operations."
|
"x-displayName": "Prompts"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Providers",
|
"name": "Providers",
|
||||||
"description": "",
|
"description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
|
||||||
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
"x-displayName": "Providers"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Safety",
|
"name": "Safety",
|
||||||
"description": ""
|
"description": "OpenAI-compatible Moderations API.",
|
||||||
|
"x-displayName": "Safety"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Scoring",
|
"name": "Scoring",
|
||||||
|
|
|
||||||
227
docs/static/llama-stack-spec.yaml
vendored
227
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -33,8 +33,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: List all chat completions.
|
summary: List chat completions.
|
||||||
description: List all chat completions.
|
description: List chat completions.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -87,10 +87,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create chat completions.
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
|
||||||
the specified model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create chat completions.
|
||||||
|
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
Generate an OpenAI-compatible chat completion for the given messages using
|
||||||
the specified model.
|
the specified model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -122,8 +122,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: Describe a chat completion by its ID.
|
summary: Get chat completion.
|
||||||
description: Describe a chat completion by its ID.
|
description: >-
|
||||||
|
Get chat completion.
|
||||||
|
|
||||||
|
Describe a chat completion by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: completion_id
|
- name: completion_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -153,10 +156,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create completion.
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create completion.
|
||||||
|
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -603,10 +606,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create embeddings.
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create embeddings.
|
||||||
|
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -639,9 +642,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: List files.
|
||||||
Returns a list of files that belong to the user's organization.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
List files.
|
||||||
|
|
||||||
Returns a list of files that belong to the user's organization.
|
Returns a list of files that belong to the user's organization.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
|
|
@ -699,11 +703,13 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Upload file.
|
||||||
Upload a file that can be used across various endpoints.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Upload file.
|
||||||
|
|
||||||
Upload a file that can be used across various endpoints.
|
Upload a file that can be used across various endpoints.
|
||||||
|
|
||||||
|
|
||||||
The file upload should be a multipart form request with:
|
The file upload should be a multipart form request with:
|
||||||
|
|
||||||
- file: The File object (not file name) to be uploaded.
|
- file: The File object (not file name) to be uploaded.
|
||||||
|
|
@ -752,9 +758,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file.
|
||||||
Returns information about a specific file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file.
|
||||||
|
|
||||||
Returns information about a specific file.
|
Returns information about a specific file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -786,8 +793,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: Delete a file.
|
summary: Delete file.
|
||||||
description: Delete a file.
|
description: Delete file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -819,9 +826,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file content.
|
||||||
Returns the contents of the specified file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file content.
|
||||||
|
|
||||||
Returns the contents of the specified file.
|
Returns the contents of the specified file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -854,9 +862,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: >-
|
summary: Get health status.
|
||||||
Get the current health status of the service.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get health status.
|
||||||
|
|
||||||
Get the current health status of the service.
|
Get the current health status of the service.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
|
|
@ -882,9 +891,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: >-
|
summary: List routes.
|
||||||
List all available API routes with their methods and implementing providers.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
List routes.
|
||||||
|
|
||||||
List all available API routes with their methods and implementing providers.
|
List all available API routes with their methods and implementing providers.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
|
|
@ -933,8 +943,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Register a model.
|
summary: Register model.
|
||||||
description: Register a model.
|
description: >-
|
||||||
|
Register model.
|
||||||
|
|
||||||
|
Register a model.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -964,8 +977,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Get a model by its identifier.
|
summary: Get model.
|
||||||
description: Get a model by its identifier.
|
description: >-
|
||||||
|
Get model.
|
||||||
|
|
||||||
|
Get a model by its identifier.
|
||||||
parameters:
|
parameters:
|
||||||
- name: model_id
|
- name: model_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -990,8 +1006,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Unregister a model.
|
summary: Unregister model.
|
||||||
description: Unregister a model.
|
description: >-
|
||||||
|
Unregister model.
|
||||||
|
|
||||||
|
Unregister a model.
|
||||||
parameters:
|
parameters:
|
||||||
- name: model_id
|
- name: model_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1022,9 +1041,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Safety
|
- Safety
|
||||||
summary: >-
|
summary: Create moderation.
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create moderation.
|
||||||
|
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
Classifies if text and/or image inputs are potentially harmful.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
|
|
@ -1080,8 +1100,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: Create a new prompt.
|
summary: Create prompt.
|
||||||
description: Create a new prompt.
|
description: >-
|
||||||
|
Create prompt.
|
||||||
|
|
||||||
|
Create a new prompt.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -1111,9 +1134,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Get prompt.
|
||||||
Get a prompt by its identifier and optional version.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get prompt.
|
||||||
|
|
||||||
Get a prompt by its identifier and optional version.
|
Get a prompt by its identifier and optional version.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1151,9 +1175,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Update prompt.
|
||||||
Update an existing prompt (increments version).
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Update prompt.
|
||||||
|
|
||||||
Update an existing prompt (increments version).
|
Update an existing prompt (increments version).
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1185,8 +1210,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: Delete a prompt.
|
summary: Delete prompt.
|
||||||
description: Delete a prompt.
|
description: >-
|
||||||
|
Delete prompt.
|
||||||
|
|
||||||
|
Delete a prompt.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1217,9 +1245,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Set prompt version.
|
||||||
Set which version of a prompt should be the default in get_prompt (latest).
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Set prompt version.
|
||||||
|
|
||||||
Set which version of a prompt should be the default in get_prompt (latest).
|
Set which version of a prompt should be the default in get_prompt (latest).
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1257,8 +1286,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: List all versions of a specific prompt.
|
summary: List prompt versions.
|
||||||
description: List all versions of a specific prompt.
|
description: >-
|
||||||
|
List prompt versions.
|
||||||
|
|
||||||
|
List all versions of a specific prompt.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1290,8 +1322,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Providers
|
- Providers
|
||||||
summary: List all available providers.
|
summary: List providers.
|
||||||
description: List all available providers.
|
description: >-
|
||||||
|
List providers.
|
||||||
|
|
||||||
|
List all available providers.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
/v1/providers/{provider_id}:
|
/v1/providers/{provider_id}:
|
||||||
|
|
@ -1316,9 +1351,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Providers
|
- Providers
|
||||||
summary: >-
|
summary: Get provider.
|
||||||
Get detailed information about a specific provider.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get provider.
|
||||||
|
|
||||||
Get detailed information about a specific provider.
|
Get detailed information about a specific provider.
|
||||||
parameters:
|
parameters:
|
||||||
- name: provider_id
|
- name: provider_id
|
||||||
|
|
@ -1349,8 +1385,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: List all OpenAI responses.
|
summary: List all responses.
|
||||||
description: List all OpenAI responses.
|
description: List all responses.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -1401,8 +1437,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Create a new OpenAI response.
|
summary: Create a model response.
|
||||||
description: Create a new OpenAI response.
|
description: Create a model response.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -1411,6 +1447,18 @@ paths:
|
||||||
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: false
|
||||||
|
x-llama-stack-extra-body-params:
|
||||||
|
- name: shields
|
||||||
|
schema:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- $ref: '#/components/schemas/ResponseShieldSpec'
|
||||||
|
description: >-
|
||||||
|
List of shields to apply during response generation. Shields provide safety
|
||||||
|
and content moderation.
|
||||||
|
required: false
|
||||||
/v1/responses/{response_id}:
|
/v1/responses/{response_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1432,8 +1480,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Retrieve an OpenAI response by its ID.
|
summary: Get a model response.
|
||||||
description: Retrieve an OpenAI response by its ID.
|
description: Get a model response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1463,8 +1511,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Delete an OpenAI response by its ID.
|
summary: Delete a response.
|
||||||
description: Delete an OpenAI response by its ID.
|
description: Delete a response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1494,10 +1542,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: >-
|
summary: List input items.
|
||||||
List input items for a given OpenAI response.
|
description: List input items.
|
||||||
description: >-
|
|
||||||
List input items for a given OpenAI response.
|
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1566,8 +1612,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Safety
|
- Safety
|
||||||
summary: Run a shield.
|
summary: Run shield.
|
||||||
description: Run a shield.
|
description: >-
|
||||||
|
Run shield.
|
||||||
|
|
||||||
|
Run a shield.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -3123,8 +3172,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: Get the version of the service.
|
summary: Get version.
|
||||||
description: Get the version of the service.
|
description: >-
|
||||||
|
Get version.
|
||||||
|
|
||||||
|
Get the version of the service.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
jsonSchemaDialect: >-
|
jsonSchemaDialect: >-
|
||||||
|
|
@ -5739,6 +5791,18 @@ components:
|
||||||
title: OpenAIResponseText
|
title: OpenAIResponseText
|
||||||
description: >-
|
description: >-
|
||||||
Text response configuration for OpenAI responses.
|
Text response configuration for OpenAI responses.
|
||||||
|
ResponseShieldSpec:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
description: The type/identifier of the shield.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
title: ResponseShieldSpec
|
||||||
|
description: >-
|
||||||
|
Specification for a shield to apply during response generation.
|
||||||
OpenAIResponseInputTool:
|
OpenAIResponseInputTool:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
|
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
|
||||||
|
|
@ -9725,9 +9789,16 @@ tags:
|
||||||
x-displayName: >-
|
x-displayName: >-
|
||||||
Protocol for conversation management operations.
|
Protocol for conversation management operations.
|
||||||
- name: Files
|
- name: Files
|
||||||
description: ''
|
description: >-
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
APIs.
|
||||||
|
x-displayName: Files
|
||||||
- name: Inference
|
- name: Inference
|
||||||
description: >-
|
description: >-
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and
|
||||||
|
embeddings.
|
||||||
|
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models
|
This API provides the raw interface to the underlying models. Two kinds of models
|
||||||
are supported:
|
are supported:
|
||||||
|
|
||||||
|
|
@ -9735,23 +9806,25 @@ tags:
|
||||||
|
|
||||||
- Embedding models: these models generate embeddings to be used for semantic
|
- Embedding models: these models generate embeddings to be used for semantic
|
||||||
search.
|
search.
|
||||||
x-displayName: >-
|
x-displayName: Inference
|
||||||
Llama Stack Inference API for generating completions, chat completions, and
|
|
||||||
embeddings.
|
|
||||||
- name: Inspect
|
- name: Inspect
|
||||||
description: ''
|
description: >-
|
||||||
|
APIs for inspecting the Llama Stack service, including health status, available
|
||||||
|
API routes with methods and implementing providers.
|
||||||
|
x-displayName: Inspect
|
||||||
- name: Models
|
- name: Models
|
||||||
description: ''
|
description: ''
|
||||||
- name: Prompts
|
- name: Prompts
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Protocol for prompt management operations.
|
Protocol for prompt management operations.
|
||||||
|
x-displayName: Prompts
|
||||||
- name: Providers
|
- name: Providers
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||||
|
x-displayName: Providers
|
||||||
- name: Safety
|
- name: Safety
|
||||||
description: ''
|
description: OpenAI-compatible Moderations API.
|
||||||
|
x-displayName: Safety
|
||||||
- name: Scoring
|
- name: Scoring
|
||||||
description: ''
|
description: ''
|
||||||
- name: ScoringFunctions
|
- name: ScoringFunctions
|
||||||
|
|
|
||||||
182
docs/static/stainless-llama-stack-spec.html
vendored
182
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -69,8 +69,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "List all chat completions.",
|
"summary": "List chat completions.",
|
||||||
"description": "List all chat completions.",
|
"description": "List chat completions.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -146,8 +146,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"summary": "Create chat completions.",
|
||||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -191,8 +191,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Describe a chat completion by its ID.",
|
"summary": "Get chat completion.",
|
||||||
"description": "Describe a chat completion by its ID.",
|
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "completion_id",
|
"name": "completion_id",
|
||||||
|
|
@ -236,8 +236,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"summary": "Create completion.",
|
||||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -758,8 +758,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"summary": "Create embeddings.",
|
||||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -803,8 +803,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns a list of files that belong to the user's organization.",
|
"summary": "List files.",
|
||||||
"description": "Returns a list of files that belong to the user's organization.",
|
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -873,8 +873,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Upload a file that can be used across various endpoints.",
|
"summary": "Upload file.",
|
||||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -934,8 +934,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns information about a specific file.",
|
"summary": "Retrieve file.",
|
||||||
"description": "Returns information about a specific file.",
|
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -977,8 +977,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Delete a file.",
|
"summary": "Delete file.",
|
||||||
"description": "Delete a file.",
|
"description": "Delete file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1022,8 +1022,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns the contents of the specified file.",
|
"summary": "Retrieve file content.",
|
||||||
"description": "Returns the contents of the specified file.",
|
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1067,8 +1067,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "Get the current health status of the service.",
|
"summary": "Get health status.",
|
||||||
"description": "Get the current health status of the service.",
|
"description": "Get health status.\nGet the current health status of the service.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1102,8 +1102,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "List all available API routes with their methods and implementing providers.",
|
"summary": "List routes.",
|
||||||
"description": "List all available API routes with their methods and implementing providers.",
|
"description": "List routes.\nList all available API routes with their methods and implementing providers.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1170,8 +1170,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Register a model.",
|
"summary": "Register model.",
|
||||||
"description": "Register a model.",
|
"description": "Register model.\nRegister a model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1215,8 +1215,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Get a model by its identifier.",
|
"summary": "Get model.",
|
||||||
"description": "Get a model by its identifier.",
|
"description": "Get model.\nGet a model by its identifier.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "model_id",
|
"name": "model_id",
|
||||||
|
|
@ -1251,8 +1251,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Unregister a model.",
|
"summary": "Unregister model.",
|
||||||
"description": "Unregister a model.",
|
"description": "Unregister model.\nUnregister a model.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "model_id",
|
"name": "model_id",
|
||||||
|
|
@ -1296,8 +1296,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Safety"
|
"Safety"
|
||||||
],
|
],
|
||||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
"summary": "Create moderation.",
|
||||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1374,8 +1374,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Create a new prompt.",
|
"summary": "Create prompt.",
|
||||||
"description": "Create a new prompt.",
|
"description": "Create prompt.\nCreate a new prompt.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1419,8 +1419,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Get a prompt by its identifier and optional version.",
|
"summary": "Get prompt.",
|
||||||
"description": "Get a prompt by its identifier and optional version.",
|
"description": "Get prompt.\nGet a prompt by its identifier and optional version.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1471,8 +1471,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Update an existing prompt (increments version).",
|
"summary": "Update prompt.",
|
||||||
"description": "Update an existing prompt (increments version).",
|
"description": "Update prompt.\nUpdate an existing prompt (increments version).",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1517,8 +1517,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Delete a prompt.",
|
"summary": "Delete prompt.",
|
||||||
"description": "Delete a prompt.",
|
"description": "Delete prompt.\nDelete a prompt.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1562,8 +1562,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Set which version of a prompt should be the default in get_prompt (latest).",
|
"summary": "Set prompt version.",
|
||||||
"description": "Set which version of a prompt should be the default in get_prompt (latest).",
|
"description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1617,8 +1617,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "List all versions of a specific prompt.",
|
"summary": "List prompt versions.",
|
||||||
"description": "List all versions of a specific prompt.",
|
"description": "List prompt versions.\nList all versions of a specific prompt.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1662,8 +1662,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Providers"
|
"Providers"
|
||||||
],
|
],
|
||||||
"summary": "List all available providers.",
|
"summary": "List providers.",
|
||||||
"description": "List all available providers.",
|
"description": "List providers.\nList all available providers.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1697,8 +1697,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Providers"
|
"Providers"
|
||||||
],
|
],
|
||||||
"summary": "Get detailed information about a specific provider.",
|
"summary": "Get provider.",
|
||||||
"description": "Get detailed information about a specific provider.",
|
"description": "Get provider.\nGet detailed information about a specific provider.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "provider_id",
|
"name": "provider_id",
|
||||||
|
|
@ -1742,8 +1742,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List all OpenAI responses.",
|
"summary": "List all responses.",
|
||||||
"description": "List all OpenAI responses.",
|
"description": "List all responses.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -1817,8 +1817,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Create a new OpenAI response.",
|
"summary": "Create a model response.",
|
||||||
"description": "Create a new OpenAI response.",
|
"description": "Create a model response.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1830,7 +1830,27 @@
|
||||||
},
|
},
|
||||||
"required": true
|
"required": true
|
||||||
},
|
},
|
||||||
"deprecated": false
|
"deprecated": false,
|
||||||
|
"x-llama-stack-extra-body-params": [
|
||||||
|
{
|
||||||
|
"name": "shields",
|
||||||
|
"schema": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/ResponseShieldSpec"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "List of shields to apply during response generation. Shields provide safety and content moderation.",
|
||||||
|
"required": false
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/responses/{response_id}": {
|
"/v1/responses/{response_id}": {
|
||||||
|
|
@ -1862,8 +1882,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Retrieve an OpenAI response by its ID.",
|
"summary": "Get a model response.",
|
||||||
"description": "Retrieve an OpenAI response by its ID.",
|
"description": "Get a model response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -1905,8 +1925,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Delete an OpenAI response by its ID.",
|
"summary": "Delete a response.",
|
||||||
"description": "Delete an OpenAI response by its ID.",
|
"description": "Delete a response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -1950,8 +1970,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List input items for a given OpenAI response.",
|
"summary": "List input items.",
|
||||||
"description": "List input items for a given OpenAI response.",
|
"description": "List input items.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -2043,8 +2063,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Safety"
|
"Safety"
|
||||||
],
|
],
|
||||||
"summary": "Run a shield.",
|
"summary": "Run shield.",
|
||||||
"description": "Run a shield.",
|
"description": "Run shield.\nRun a shield.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -4176,8 +4196,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "Get the version of the service.",
|
"summary": "Get version.",
|
||||||
"description": "Get the version of the service.",
|
"description": "Get version.\nGet the version of the service.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -9625,6 +9645,21 @@
|
||||||
"title": "OpenAIResponseText",
|
"title": "OpenAIResponseText",
|
||||||
"description": "Text response configuration for OpenAI responses."
|
"description": "Text response configuration for OpenAI responses."
|
||||||
},
|
},
|
||||||
|
"ResponseShieldSpec": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The type/identifier of the shield."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type"
|
||||||
|
],
|
||||||
|
"title": "ResponseShieldSpec",
|
||||||
|
"description": "Specification for a shield to apply during response generation."
|
||||||
|
},
|
||||||
"OpenAIResponseInputTool": {
|
"OpenAIResponseInputTool": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
|
|
@ -18452,16 +18487,18 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
"description": ""
|
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||||
|
"x-displayName": "Files"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Inference",
|
"name": "Inference",
|
||||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
"x-displayName": "Inference"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Inspect",
|
"name": "Inspect",
|
||||||
"description": ""
|
"description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
|
||||||
|
"x-displayName": "Inspect"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Models",
|
"name": "Models",
|
||||||
|
|
@ -18473,17 +18510,18 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Prompts",
|
"name": "Prompts",
|
||||||
"description": "",
|
"description": "Protocol for prompt management operations.",
|
||||||
"x-displayName": "Protocol for prompt management operations."
|
"x-displayName": "Prompts"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Providers",
|
"name": "Providers",
|
||||||
"description": "",
|
"description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
|
||||||
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
"x-displayName": "Providers"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Safety",
|
"name": "Safety",
|
||||||
"description": ""
|
"description": "OpenAI-compatible Moderations API.",
|
||||||
|
"x-displayName": "Safety"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Scoring",
|
"name": "Scoring",
|
||||||
|
|
|
||||||
227
docs/static/stainless-llama-stack-spec.yaml
vendored
227
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -36,8 +36,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: List all chat completions.
|
summary: List chat completions.
|
||||||
description: List all chat completions.
|
description: List chat completions.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -90,10 +90,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create chat completions.
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
|
||||||
the specified model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create chat completions.
|
||||||
|
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
Generate an OpenAI-compatible chat completion for the given messages using
|
||||||
the specified model.
|
the specified model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -125,8 +125,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: Describe a chat completion by its ID.
|
summary: Get chat completion.
|
||||||
description: Describe a chat completion by its ID.
|
description: >-
|
||||||
|
Get chat completion.
|
||||||
|
|
||||||
|
Describe a chat completion by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: completion_id
|
- name: completion_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -156,10 +159,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create completion.
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create completion.
|
||||||
|
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -606,10 +609,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create embeddings.
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create embeddings.
|
||||||
|
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -642,9 +645,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: List files.
|
||||||
Returns a list of files that belong to the user's organization.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
List files.
|
||||||
|
|
||||||
Returns a list of files that belong to the user's organization.
|
Returns a list of files that belong to the user's organization.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
|
|
@ -702,11 +706,13 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Upload file.
|
||||||
Upload a file that can be used across various endpoints.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Upload file.
|
||||||
|
|
||||||
Upload a file that can be used across various endpoints.
|
Upload a file that can be used across various endpoints.
|
||||||
|
|
||||||
|
|
||||||
The file upload should be a multipart form request with:
|
The file upload should be a multipart form request with:
|
||||||
|
|
||||||
- file: The File object (not file name) to be uploaded.
|
- file: The File object (not file name) to be uploaded.
|
||||||
|
|
@ -755,9 +761,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file.
|
||||||
Returns information about a specific file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file.
|
||||||
|
|
||||||
Returns information about a specific file.
|
Returns information about a specific file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -789,8 +796,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: Delete a file.
|
summary: Delete file.
|
||||||
description: Delete a file.
|
description: Delete file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -822,9 +829,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file content.
|
||||||
Returns the contents of the specified file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file content.
|
||||||
|
|
||||||
Returns the contents of the specified file.
|
Returns the contents of the specified file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -857,9 +865,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: >-
|
summary: Get health status.
|
||||||
Get the current health status of the service.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get health status.
|
||||||
|
|
||||||
Get the current health status of the service.
|
Get the current health status of the service.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
|
|
@ -885,9 +894,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: >-
|
summary: List routes.
|
||||||
List all available API routes with their methods and implementing providers.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
List routes.
|
||||||
|
|
||||||
List all available API routes with their methods and implementing providers.
|
List all available API routes with their methods and implementing providers.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
|
|
@ -936,8 +946,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Register a model.
|
summary: Register model.
|
||||||
description: Register a model.
|
description: >-
|
||||||
|
Register model.
|
||||||
|
|
||||||
|
Register a model.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -967,8 +980,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Get a model by its identifier.
|
summary: Get model.
|
||||||
description: Get a model by its identifier.
|
description: >-
|
||||||
|
Get model.
|
||||||
|
|
||||||
|
Get a model by its identifier.
|
||||||
parameters:
|
parameters:
|
||||||
- name: model_id
|
- name: model_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -993,8 +1009,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Unregister a model.
|
summary: Unregister model.
|
||||||
description: Unregister a model.
|
description: >-
|
||||||
|
Unregister model.
|
||||||
|
|
||||||
|
Unregister a model.
|
||||||
parameters:
|
parameters:
|
||||||
- name: model_id
|
- name: model_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1025,9 +1044,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Safety
|
- Safety
|
||||||
summary: >-
|
summary: Create moderation.
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create moderation.
|
||||||
|
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
Classifies if text and/or image inputs are potentially harmful.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
|
|
@ -1083,8 +1103,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: Create a new prompt.
|
summary: Create prompt.
|
||||||
description: Create a new prompt.
|
description: >-
|
||||||
|
Create prompt.
|
||||||
|
|
||||||
|
Create a new prompt.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -1114,9 +1137,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Get prompt.
|
||||||
Get a prompt by its identifier and optional version.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get prompt.
|
||||||
|
|
||||||
Get a prompt by its identifier and optional version.
|
Get a prompt by its identifier and optional version.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1154,9 +1178,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Update prompt.
|
||||||
Update an existing prompt (increments version).
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Update prompt.
|
||||||
|
|
||||||
Update an existing prompt (increments version).
|
Update an existing prompt (increments version).
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1188,8 +1213,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: Delete a prompt.
|
summary: Delete prompt.
|
||||||
description: Delete a prompt.
|
description: >-
|
||||||
|
Delete prompt.
|
||||||
|
|
||||||
|
Delete a prompt.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1220,9 +1248,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Set prompt version.
|
||||||
Set which version of a prompt should be the default in get_prompt (latest).
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Set prompt version.
|
||||||
|
|
||||||
Set which version of a prompt should be the default in get_prompt (latest).
|
Set which version of a prompt should be the default in get_prompt (latest).
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1260,8 +1289,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: List all versions of a specific prompt.
|
summary: List prompt versions.
|
||||||
description: List all versions of a specific prompt.
|
description: >-
|
||||||
|
List prompt versions.
|
||||||
|
|
||||||
|
List all versions of a specific prompt.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1293,8 +1325,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Providers
|
- Providers
|
||||||
summary: List all available providers.
|
summary: List providers.
|
||||||
description: List all available providers.
|
description: >-
|
||||||
|
List providers.
|
||||||
|
|
||||||
|
List all available providers.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
/v1/providers/{provider_id}:
|
/v1/providers/{provider_id}:
|
||||||
|
|
@ -1319,9 +1354,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Providers
|
- Providers
|
||||||
summary: >-
|
summary: Get provider.
|
||||||
Get detailed information about a specific provider.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get provider.
|
||||||
|
|
||||||
Get detailed information about a specific provider.
|
Get detailed information about a specific provider.
|
||||||
parameters:
|
parameters:
|
||||||
- name: provider_id
|
- name: provider_id
|
||||||
|
|
@ -1352,8 +1388,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: List all OpenAI responses.
|
summary: List all responses.
|
||||||
description: List all OpenAI responses.
|
description: List all responses.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -1404,8 +1440,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Create a new OpenAI response.
|
summary: Create a model response.
|
||||||
description: Create a new OpenAI response.
|
description: Create a model response.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -1414,6 +1450,18 @@ paths:
|
||||||
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: false
|
||||||
|
x-llama-stack-extra-body-params:
|
||||||
|
- name: shields
|
||||||
|
schema:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- $ref: '#/components/schemas/ResponseShieldSpec'
|
||||||
|
description: >-
|
||||||
|
List of shields to apply during response generation. Shields provide safety
|
||||||
|
and content moderation.
|
||||||
|
required: false
|
||||||
/v1/responses/{response_id}:
|
/v1/responses/{response_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -1435,8 +1483,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Retrieve an OpenAI response by its ID.
|
summary: Get a model response.
|
||||||
description: Retrieve an OpenAI response by its ID.
|
description: Get a model response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1466,8 +1514,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Delete an OpenAI response by its ID.
|
summary: Delete a response.
|
||||||
description: Delete an OpenAI response by its ID.
|
description: Delete a response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1497,10 +1545,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: >-
|
summary: List input items.
|
||||||
List input items for a given OpenAI response.
|
description: List input items.
|
||||||
description: >-
|
|
||||||
List input items for a given OpenAI response.
|
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1569,8 +1615,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Safety
|
- Safety
|
||||||
summary: Run a shield.
|
summary: Run shield.
|
||||||
description: Run a shield.
|
description: >-
|
||||||
|
Run shield.
|
||||||
|
|
||||||
|
Run a shield.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -3126,8 +3175,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: Get the version of the service.
|
summary: Get version.
|
||||||
description: Get the version of the service.
|
description: >-
|
||||||
|
Get version.
|
||||||
|
|
||||||
|
Get the version of the service.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
/v1beta/datasetio/append-rows/{dataset_id}:
|
/v1beta/datasetio/append-rows/{dataset_id}:
|
||||||
|
|
@ -7184,6 +7236,18 @@ components:
|
||||||
title: OpenAIResponseText
|
title: OpenAIResponseText
|
||||||
description: >-
|
description: >-
|
||||||
Text response configuration for OpenAI responses.
|
Text response configuration for OpenAI responses.
|
||||||
|
ResponseShieldSpec:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
description: The type/identifier of the shield.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
title: ResponseShieldSpec
|
||||||
|
description: >-
|
||||||
|
Specification for a shield to apply during response generation.
|
||||||
OpenAIResponseInputTool:
|
OpenAIResponseInputTool:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
|
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
|
||||||
|
|
@ -13771,9 +13835,16 @@ tags:
|
||||||
x-displayName: >-
|
x-displayName: >-
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
- name: Files
|
- name: Files
|
||||||
description: ''
|
description: >-
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
APIs.
|
||||||
|
x-displayName: Files
|
||||||
- name: Inference
|
- name: Inference
|
||||||
description: >-
|
description: >-
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and
|
||||||
|
embeddings.
|
||||||
|
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models
|
This API provides the raw interface to the underlying models. Two kinds of models
|
||||||
are supported:
|
are supported:
|
||||||
|
|
||||||
|
|
@ -13781,25 +13852,27 @@ tags:
|
||||||
|
|
||||||
- Embedding models: these models generate embeddings to be used for semantic
|
- Embedding models: these models generate embeddings to be used for semantic
|
||||||
search.
|
search.
|
||||||
x-displayName: >-
|
x-displayName: Inference
|
||||||
Llama Stack Inference API for generating completions, chat completions, and
|
|
||||||
embeddings.
|
|
||||||
- name: Inspect
|
- name: Inspect
|
||||||
description: ''
|
description: >-
|
||||||
|
APIs for inspecting the Llama Stack service, including health status, available
|
||||||
|
API routes with methods and implementing providers.
|
||||||
|
x-displayName: Inspect
|
||||||
- name: Models
|
- name: Models
|
||||||
description: ''
|
description: ''
|
||||||
- name: PostTraining (Coming Soon)
|
- name: PostTraining (Coming Soon)
|
||||||
description: ''
|
description: ''
|
||||||
- name: Prompts
|
- name: Prompts
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Protocol for prompt management operations.
|
Protocol for prompt management operations.
|
||||||
|
x-displayName: Prompts
|
||||||
- name: Providers
|
- name: Providers
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||||
|
x-displayName: Providers
|
||||||
- name: Safety
|
- name: Safety
|
||||||
description: ''
|
description: OpenAI-compatible Moderations API.
|
||||||
|
x-displayName: Safety
|
||||||
- name: Scoring
|
- name: Scoring
|
||||||
description: ''
|
description: ''
|
||||||
- name: ScoringFunctions
|
- name: ScoringFunctions
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ from llama_stack.apis.inference import (
|
||||||
from llama_stack.apis.safety import SafetyViolation
|
from llama_stack.apis.safety import SafetyViolation
|
||||||
from llama_stack.apis.tools import ToolDef
|
from llama_stack.apis.tools import ToolDef
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
from llama_stack.schema_utils import ExtraBodyField, json_schema_type, register_schema, webmethod
|
||||||
|
|
||||||
from .openai_responses import (
|
from .openai_responses import (
|
||||||
ListOpenAIResponseInputItem,
|
ListOpenAIResponseInputItem,
|
||||||
|
|
@ -42,6 +42,20 @@ from .openai_responses import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ResponseShieldSpec(BaseModel):
|
||||||
|
"""Specification for a shield to apply during response generation.
|
||||||
|
|
||||||
|
:param type: The type/identifier of the shield.
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: str
|
||||||
|
# TODO: more fields to be added for shield configuration
|
||||||
|
|
||||||
|
|
||||||
|
ResponseShield = str | ResponseShieldSpec
|
||||||
|
|
||||||
|
|
||||||
class Attachment(BaseModel):
|
class Attachment(BaseModel):
|
||||||
"""An attachment to an agent turn.
|
"""An attachment to an agent turn.
|
||||||
|
|
||||||
|
|
@ -783,7 +797,7 @@ class Agents(Protocol):
|
||||||
self,
|
self,
|
||||||
response_id: str,
|
response_id: str,
|
||||||
) -> OpenAIResponseObject:
|
) -> OpenAIResponseObject:
|
||||||
"""Retrieve an OpenAI response by its ID.
|
"""Get a model response.
|
||||||
|
|
||||||
:param response_id: The ID of the OpenAI response to retrieve.
|
:param response_id: The ID of the OpenAI response to retrieve.
|
||||||
:returns: An OpenAIResponseObject.
|
:returns: An OpenAIResponseObject.
|
||||||
|
|
@ -805,13 +819,20 @@ class Agents(Protocol):
|
||||||
tools: list[OpenAIResponseInputTool] | None = None,
|
tools: list[OpenAIResponseInputTool] | None = None,
|
||||||
include: list[str] | None = None,
|
include: list[str] | None = None,
|
||||||
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
||||||
|
shields: Annotated[
|
||||||
|
list[ResponseShield] | None,
|
||||||
|
ExtraBodyField(
|
||||||
|
"List of shields to apply during response generation. Shields provide safety and content moderation."
|
||||||
|
),
|
||||||
|
] = None,
|
||||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||||
"""Create a new OpenAI response.
|
"""Create a model response.
|
||||||
|
|
||||||
:param input: Input message(s) to create the response.
|
:param input: Input message(s) to create the response.
|
||||||
:param model: The underlying LLM used for completions.
|
:param model: The underlying LLM used for completions.
|
||||||
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
||||||
:param include: (Optional) Additional fields to include in the response.
|
:param include: (Optional) Additional fields to include in the response.
|
||||||
|
:param shields: (Optional) List of shields to apply during response generation. Can be shield IDs (strings) or shield specifications.
|
||||||
:returns: An OpenAIResponseObject.
|
:returns: An OpenAIResponseObject.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
@ -825,7 +846,7 @@ class Agents(Protocol):
|
||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
order: Order | None = Order.desc,
|
order: Order | None = Order.desc,
|
||||||
) -> ListOpenAIResponseObject:
|
) -> ListOpenAIResponseObject:
|
||||||
"""List all OpenAI responses.
|
"""List all responses.
|
||||||
|
|
||||||
:param after: The ID of the last response to return.
|
:param after: The ID of the last response to return.
|
||||||
:param limit: The number of responses to return.
|
:param limit: The number of responses to return.
|
||||||
|
|
@ -848,7 +869,7 @@ class Agents(Protocol):
|
||||||
limit: int | None = 20,
|
limit: int | None = 20,
|
||||||
order: Order | None = Order.desc,
|
order: Order | None = Order.desc,
|
||||||
) -> ListOpenAIResponseInputItem:
|
) -> ListOpenAIResponseInputItem:
|
||||||
"""List input items for a given OpenAI response.
|
"""List input items.
|
||||||
|
|
||||||
:param response_id: The ID of the response to retrieve input items for.
|
:param response_id: The ID of the response to retrieve input items for.
|
||||||
:param after: An item ID to list items after, used for pagination.
|
:param after: An item ID to list items after, used for pagination.
|
||||||
|
|
@ -863,7 +884,7 @@ class Agents(Protocol):
|
||||||
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||||
"""Delete an OpenAI response by its ID.
|
"""Delete a response.
|
||||||
|
|
||||||
:param response_id: The ID of the OpenAI response to delete.
|
:param response_id: The ID of the OpenAI response to delete.
|
||||||
:returns: An OpenAIDeleteResponseObject
|
:returns: An OpenAIDeleteResponseObject
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,11 @@ class OpenAIFileDeleteResponse(BaseModel):
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@trace_protocol
|
@trace_protocol
|
||||||
class Files(Protocol):
|
class Files(Protocol):
|
||||||
|
"""Files
|
||||||
|
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack APIs.
|
||||||
|
"""
|
||||||
|
|
||||||
# OpenAI Files API Endpoints
|
# OpenAI Files API Endpoints
|
||||||
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
|
|
@ -113,7 +118,8 @@ class Files(Protocol):
|
||||||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||||
expires_after: Annotated[ExpiresAfter | None, Form()] = None,
|
expires_after: Annotated[ExpiresAfter | None, Form()] = None,
|
||||||
) -> OpenAIFileObject:
|
) -> OpenAIFileObject:
|
||||||
"""
|
"""Upload file.
|
||||||
|
|
||||||
Upload a file that can be used across various endpoints.
|
Upload a file that can be used across various endpoints.
|
||||||
|
|
||||||
The file upload should be a multipart form request with:
|
The file upload should be a multipart form request with:
|
||||||
|
|
@ -137,7 +143,8 @@ class Files(Protocol):
|
||||||
order: Order | None = Order.desc,
|
order: Order | None = Order.desc,
|
||||||
purpose: OpenAIFilePurpose | None = None,
|
purpose: OpenAIFilePurpose | None = None,
|
||||||
) -> ListOpenAIFileResponse:
|
) -> ListOpenAIFileResponse:
|
||||||
"""
|
"""List files.
|
||||||
|
|
||||||
Returns a list of files that belong to the user's organization.
|
Returns a list of files that belong to the user's organization.
|
||||||
|
|
||||||
:param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
|
:param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
|
||||||
|
|
@ -154,7 +161,8 @@ class Files(Protocol):
|
||||||
self,
|
self,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
) -> OpenAIFileObject:
|
) -> OpenAIFileObject:
|
||||||
"""
|
"""Retrieve file.
|
||||||
|
|
||||||
Returns information about a specific file.
|
Returns information about a specific file.
|
||||||
|
|
||||||
:param file_id: The ID of the file to use for this request.
|
:param file_id: The ID of the file to use for this request.
|
||||||
|
|
@ -168,8 +176,7 @@ class Files(Protocol):
|
||||||
self,
|
self,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
) -> OpenAIFileDeleteResponse:
|
) -> OpenAIFileDeleteResponse:
|
||||||
"""
|
"""Delete file.
|
||||||
Delete a file.
|
|
||||||
|
|
||||||
:param file_id: The ID of the file to use for this request.
|
:param file_id: The ID of the file to use for this request.
|
||||||
:returns: An OpenAIFileDeleteResponse indicating successful deletion.
|
:returns: An OpenAIFileDeleteResponse indicating successful deletion.
|
||||||
|
|
@ -182,7 +189,8 @@ class Files(Protocol):
|
||||||
self,
|
self,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
) -> Response:
|
) -> Response:
|
||||||
"""
|
"""Retrieve file content.
|
||||||
|
|
||||||
Returns the contents of the specified file.
|
Returns the contents of the specified file.
|
||||||
|
|
||||||
:param file_id: The ID of the file to use for this request.
|
:param file_id: The ID of the file to use for this request.
|
||||||
|
|
|
||||||
|
|
@ -1053,7 +1053,9 @@ class InferenceProvider(Protocol):
|
||||||
# for fill-in-the-middle type completion
|
# for fill-in-the-middle type completion
|
||||||
suffix: str | None = None,
|
suffix: str | None = None,
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
"""Generate an OpenAI-compatible completion for the given prompt using the specified model.
|
"""Create completion.
|
||||||
|
|
||||||
|
Generate an OpenAI-compatible completion for the given prompt using the specified model.
|
||||||
|
|
||||||
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||||
:param prompt: The prompt to generate a completion for.
|
:param prompt: The prompt to generate a completion for.
|
||||||
|
|
@ -1105,7 +1107,9 @@ class InferenceProvider(Protocol):
|
||||||
top_p: float | None = None,
|
top_p: float | None = None,
|
||||||
user: str | None = None,
|
user: str | None = None,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
"""Generate an OpenAI-compatible chat completion for the given messages using the specified model.
|
"""Create chat completions.
|
||||||
|
|
||||||
|
Generate an OpenAI-compatible chat completion for the given messages using the specified model.
|
||||||
|
|
||||||
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||||
:param messages: List of messages in the conversation.
|
:param messages: List of messages in the conversation.
|
||||||
|
|
@ -1144,7 +1148,9 @@ class InferenceProvider(Protocol):
|
||||||
dimensions: int | None = None,
|
dimensions: int | None = None,
|
||||||
user: str | None = None,
|
user: str | None = None,
|
||||||
) -> OpenAIEmbeddingsResponse:
|
) -> OpenAIEmbeddingsResponse:
|
||||||
"""Generate OpenAI-compatible embeddings for the given input using the specified model.
|
"""Create embeddings.
|
||||||
|
|
||||||
|
Generate OpenAI-compatible embeddings for the given input using the specified model.
|
||||||
|
|
||||||
:param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
|
:param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
|
||||||
:param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
|
:param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
|
||||||
|
|
@ -1157,7 +1163,9 @@ class InferenceProvider(Protocol):
|
||||||
|
|
||||||
|
|
||||||
class Inference(InferenceProvider):
|
class Inference(InferenceProvider):
|
||||||
"""Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
"""Inference
|
||||||
|
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||||
|
|
@ -1173,7 +1181,7 @@ class Inference(InferenceProvider):
|
||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
order: Order | None = Order.desc,
|
order: Order | None = Order.desc,
|
||||||
) -> ListOpenAIChatCompletionResponse:
|
) -> ListOpenAIChatCompletionResponse:
|
||||||
"""List all chat completions.
|
"""List chat completions.
|
||||||
|
|
||||||
:param after: The ID of the last chat completion to return.
|
:param after: The ID of the last chat completion to return.
|
||||||
:param limit: The maximum number of chat completions to return.
|
:param limit: The maximum number of chat completions to return.
|
||||||
|
|
@ -1188,7 +1196,9 @@ class Inference(InferenceProvider):
|
||||||
)
|
)
|
||||||
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
||||||
"""Describe a chat completion by its ID.
|
"""Get chat completion.
|
||||||
|
|
||||||
|
Describe a chat completion by its ID.
|
||||||
|
|
||||||
:param completion_id: ID of the chat completion.
|
:param completion_id: ID of the chat completion.
|
||||||
:returns: A OpenAICompletionWithInputMessages.
|
:returns: A OpenAICompletionWithInputMessages.
|
||||||
|
|
|
||||||
|
|
@ -58,9 +58,16 @@ class ListRoutesResponse(BaseModel):
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class Inspect(Protocol):
|
class Inspect(Protocol):
|
||||||
|
"""Inspect
|
||||||
|
|
||||||
|
APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
|
||||||
|
"""
|
||||||
|
|
||||||
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_routes(self) -> ListRoutesResponse:
|
async def list_routes(self) -> ListRoutesResponse:
|
||||||
"""List all available API routes with their methods and implementing providers.
|
"""List routes.
|
||||||
|
|
||||||
|
List all available API routes with their methods and implementing providers.
|
||||||
|
|
||||||
:returns: Response containing information about all available routes.
|
:returns: Response containing information about all available routes.
|
||||||
"""
|
"""
|
||||||
|
|
@ -68,7 +75,9 @@ class Inspect(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def health(self) -> HealthInfo:
|
async def health(self) -> HealthInfo:
|
||||||
"""Get the current health status of the service.
|
"""Get health status.
|
||||||
|
|
||||||
|
Get the current health status of the service.
|
||||||
|
|
||||||
:returns: Health information indicating if the service is operational.
|
:returns: Health information indicating if the service is operational.
|
||||||
"""
|
"""
|
||||||
|
|
@ -76,7 +85,9 @@ class Inspect(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def version(self) -> VersionInfo:
|
async def version(self) -> VersionInfo:
|
||||||
"""Get the version of the service.
|
"""Get version.
|
||||||
|
|
||||||
|
Get the version of the service.
|
||||||
|
|
||||||
:returns: Version information containing the service version number.
|
:returns: Version information containing the service version number.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,9 @@ class Models(Protocol):
|
||||||
self,
|
self,
|
||||||
model_id: str,
|
model_id: str,
|
||||||
) -> Model:
|
) -> Model:
|
||||||
"""Get a model by its identifier.
|
"""Get model.
|
||||||
|
|
||||||
|
Get a model by its identifier.
|
||||||
|
|
||||||
:param model_id: The identifier of the model to get.
|
:param model_id: The identifier of the model to get.
|
||||||
:returns: A Model.
|
:returns: A Model.
|
||||||
|
|
@ -140,7 +142,9 @@ class Models(Protocol):
|
||||||
metadata: dict[str, Any] | None = None,
|
metadata: dict[str, Any] | None = None,
|
||||||
model_type: ModelType | None = None,
|
model_type: ModelType | None = None,
|
||||||
) -> Model:
|
) -> Model:
|
||||||
"""Register a model.
|
"""Register model.
|
||||||
|
|
||||||
|
Register a model.
|
||||||
|
|
||||||
:param model_id: The identifier of the model to register.
|
:param model_id: The identifier of the model to register.
|
||||||
:param provider_model_id: The identifier of the model in the provider.
|
:param provider_model_id: The identifier of the model in the provider.
|
||||||
|
|
@ -156,7 +160,9 @@ class Models(Protocol):
|
||||||
self,
|
self,
|
||||||
model_id: str,
|
model_id: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Unregister a model.
|
"""Unregister model.
|
||||||
|
|
||||||
|
Unregister a model.
|
||||||
|
|
||||||
:param model_id: The identifier of the model to unregister.
|
:param model_id: The identifier of the model to unregister.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,9 @@ class ListPromptsResponse(BaseModel):
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@trace_protocol
|
@trace_protocol
|
||||||
class Prompts(Protocol):
|
class Prompts(Protocol):
|
||||||
"""Protocol for prompt management operations."""
|
"""Prompts
|
||||||
|
|
||||||
|
Protocol for prompt management operations."""
|
||||||
|
|
||||||
@webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_prompts(self) -> ListPromptsResponse:
|
async def list_prompts(self) -> ListPromptsResponse:
|
||||||
|
|
@ -109,7 +111,9 @@ class Prompts(Protocol):
|
||||||
self,
|
self,
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
) -> ListPromptsResponse:
|
) -> ListPromptsResponse:
|
||||||
"""List all versions of a specific prompt.
|
"""List prompt versions.
|
||||||
|
|
||||||
|
List all versions of a specific prompt.
|
||||||
|
|
||||||
:param prompt_id: The identifier of the prompt to list versions for.
|
:param prompt_id: The identifier of the prompt to list versions for.
|
||||||
:returns: A ListPromptsResponse containing all versions of the prompt.
|
:returns: A ListPromptsResponse containing all versions of the prompt.
|
||||||
|
|
@ -122,7 +126,9 @@ class Prompts(Protocol):
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
version: int | None = None,
|
version: int | None = None,
|
||||||
) -> Prompt:
|
) -> Prompt:
|
||||||
"""Get a prompt by its identifier and optional version.
|
"""Get prompt.
|
||||||
|
|
||||||
|
Get a prompt by its identifier and optional version.
|
||||||
|
|
||||||
:param prompt_id: The identifier of the prompt to get.
|
:param prompt_id: The identifier of the prompt to get.
|
||||||
:param version: The version of the prompt to get (defaults to latest).
|
:param version: The version of the prompt to get (defaults to latest).
|
||||||
|
|
@ -136,7 +142,9 @@ class Prompts(Protocol):
|
||||||
prompt: str,
|
prompt: str,
|
||||||
variables: list[str] | None = None,
|
variables: list[str] | None = None,
|
||||||
) -> Prompt:
|
) -> Prompt:
|
||||||
"""Create a new prompt.
|
"""Create prompt.
|
||||||
|
|
||||||
|
Create a new prompt.
|
||||||
|
|
||||||
:param prompt: The prompt text content with variable placeholders.
|
:param prompt: The prompt text content with variable placeholders.
|
||||||
:param variables: List of variable names that can be used in the prompt template.
|
:param variables: List of variable names that can be used in the prompt template.
|
||||||
|
|
@ -153,7 +161,9 @@ class Prompts(Protocol):
|
||||||
variables: list[str] | None = None,
|
variables: list[str] | None = None,
|
||||||
set_as_default: bool = True,
|
set_as_default: bool = True,
|
||||||
) -> Prompt:
|
) -> Prompt:
|
||||||
"""Update an existing prompt (increments version).
|
"""Update prompt.
|
||||||
|
|
||||||
|
Update an existing prompt (increments version).
|
||||||
|
|
||||||
:param prompt_id: The identifier of the prompt to update.
|
:param prompt_id: The identifier of the prompt to update.
|
||||||
:param prompt: The updated prompt text content.
|
:param prompt: The updated prompt text content.
|
||||||
|
|
@ -169,7 +179,9 @@ class Prompts(Protocol):
|
||||||
self,
|
self,
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Delete a prompt.
|
"""Delete prompt.
|
||||||
|
|
||||||
|
Delete a prompt.
|
||||||
|
|
||||||
:param prompt_id: The identifier of the prompt to delete.
|
:param prompt_id: The identifier of the prompt to delete.
|
||||||
"""
|
"""
|
||||||
|
|
@ -181,7 +193,9 @@ class Prompts(Protocol):
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
version: int,
|
version: int,
|
||||||
) -> Prompt:
|
) -> Prompt:
|
||||||
"""Set which version of a prompt should be the default in get_prompt (latest).
|
"""Set prompt version.
|
||||||
|
|
||||||
|
Set which version of a prompt should be the default in get_prompt (latest).
|
||||||
|
|
||||||
:param prompt_id: The identifier of the prompt.
|
:param prompt_id: The identifier of the prompt.
|
||||||
:param version: The version to set as default.
|
:param version: The version to set as default.
|
||||||
|
|
|
||||||
|
|
@ -42,13 +42,16 @@ class ListProvidersResponse(BaseModel):
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class Providers(Protocol):
|
class Providers(Protocol):
|
||||||
"""
|
"""Providers
|
||||||
|
|
||||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_providers(self) -> ListProvidersResponse:
|
async def list_providers(self) -> ListProvidersResponse:
|
||||||
"""List all available providers.
|
"""List providers.
|
||||||
|
|
||||||
|
List all available providers.
|
||||||
|
|
||||||
:returns: A ListProvidersResponse containing information about all providers.
|
:returns: A ListProvidersResponse containing information about all providers.
|
||||||
"""
|
"""
|
||||||
|
|
@ -56,7 +59,9 @@ class Providers(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def inspect_provider(self, provider_id: str) -> ProviderInfo:
|
async def inspect_provider(self, provider_id: str) -> ProviderInfo:
|
||||||
"""Get detailed information about a specific provider.
|
"""Get provider.
|
||||||
|
|
||||||
|
Get detailed information about a specific provider.
|
||||||
|
|
||||||
:param provider_id: The ID of the provider to inspect.
|
:param provider_id: The ID of the provider to inspect.
|
||||||
:returns: A ProviderInfo object containing the provider's details.
|
:returns: A ProviderInfo object containing the provider's details.
|
||||||
|
|
|
||||||
|
|
@ -96,6 +96,11 @@ class ShieldStore(Protocol):
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@trace_protocol
|
@trace_protocol
|
||||||
class Safety(Protocol):
|
class Safety(Protocol):
|
||||||
|
"""Safety
|
||||||
|
|
||||||
|
OpenAI-compatible Moderations API.
|
||||||
|
"""
|
||||||
|
|
||||||
shield_store: ShieldStore
|
shield_store: ShieldStore
|
||||||
|
|
||||||
@webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
|
|
@ -105,7 +110,9 @@ class Safety(Protocol):
|
||||||
messages: list[Message],
|
messages: list[Message],
|
||||||
params: dict[str, Any],
|
params: dict[str, Any],
|
||||||
) -> RunShieldResponse:
|
) -> RunShieldResponse:
|
||||||
"""Run a shield.
|
"""Run shield.
|
||||||
|
|
||||||
|
Run a shield.
|
||||||
|
|
||||||
:param shield_id: The identifier of the shield to run.
|
:param shield_id: The identifier of the shield to run.
|
||||||
:param messages: The messages to run the shield on.
|
:param messages: The messages to run the shield on.
|
||||||
|
|
@ -117,7 +124,9 @@ class Safety(Protocol):
|
||||||
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
||||||
"""Classifies if text and/or image inputs are potentially harmful.
|
"""Create moderation.
|
||||||
|
|
||||||
|
Classifies if text and/or image inputs are potentially harmful.
|
||||||
:param input: Input (or inputs) to classify.
|
:param input: Input (or inputs) to classify.
|
||||||
Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
|
Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
|
||||||
:param model: The content moderation model you would like to use.
|
:param model: The content moderation model you would like to use.
|
||||||
|
|
|
||||||
|
|
@ -6,11 +6,18 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
|
import ssl
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import uvicorn
|
||||||
|
import yaml
|
||||||
|
|
||||||
from llama_stack.cli.stack.utils import ImageType
|
from llama_stack.cli.stack.utils import ImageType
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
from llama_stack.core.datatypes import LoggingConfig, StackRunConfig
|
||||||
|
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars, validate_env_pair
|
||||||
|
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
||||||
|
|
@ -146,23 +153,7 @@ class StackRun(Subcommand):
|
||||||
# using the current environment packages.
|
# using the current environment packages.
|
||||||
if not image_type and not image_name:
|
if not image_type and not image_name:
|
||||||
logger.info("No image type or image name provided. Assuming environment packages.")
|
logger.info("No image type or image name provided. Assuming environment packages.")
|
||||||
from llama_stack.core.server.server import main as server_main
|
self._uvicorn_run(config_file, args)
|
||||||
|
|
||||||
# Build the server args from the current args passed to the CLI
|
|
||||||
server_args = argparse.Namespace()
|
|
||||||
for arg in vars(args):
|
|
||||||
# If this is a function, avoid passing it
|
|
||||||
# "args" contains:
|
|
||||||
# func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
|
|
||||||
if callable(getattr(args, arg)):
|
|
||||||
continue
|
|
||||||
if arg == "config":
|
|
||||||
server_args.config = str(config_file)
|
|
||||||
else:
|
|
||||||
setattr(server_args, arg, getattr(args, arg))
|
|
||||||
|
|
||||||
# Run the server
|
|
||||||
server_main(server_args)
|
|
||||||
else:
|
else:
|
||||||
run_args = formulate_run_args(image_type, image_name)
|
run_args = formulate_run_args(image_type, image_name)
|
||||||
|
|
||||||
|
|
@ -184,6 +175,76 @@ class StackRun(Subcommand):
|
||||||
|
|
||||||
run_command(run_args)
|
run_command(run_args)
|
||||||
|
|
||||||
|
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
||||||
|
if not config_file:
|
||||||
|
self.parser.error("Config file is required")
|
||||||
|
|
||||||
|
# Set environment variables if provided
|
||||||
|
if args.env:
|
||||||
|
for env_pair in args.env:
|
||||||
|
try:
|
||||||
|
key, value = validate_env_pair(env_pair)
|
||||||
|
logger.info(f"Setting environment variable {key} => {value}")
|
||||||
|
os.environ[key] = value
|
||||||
|
except ValueError as e:
|
||||||
|
logger.error(f"Error: {str(e)}")
|
||||||
|
self.parser.error(f"Invalid environment variable format: {env_pair}")
|
||||||
|
|
||||||
|
config_file = resolve_config_or_distro(str(config_file), Mode.RUN)
|
||||||
|
with open(config_file) as fp:
|
||||||
|
config_contents = yaml.safe_load(fp)
|
||||||
|
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
||||||
|
logger_config = LoggingConfig(**cfg)
|
||||||
|
else:
|
||||||
|
logger_config = None
|
||||||
|
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
||||||
|
|
||||||
|
port = args.port or config.server.port
|
||||||
|
host = config.server.host or ["::", "0.0.0.0"]
|
||||||
|
|
||||||
|
# Set the config file in environment so create_app can find it
|
||||||
|
os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
|
||||||
|
|
||||||
|
uvicorn_config = {
|
||||||
|
"factory": True,
|
||||||
|
"host": host,
|
||||||
|
"port": port,
|
||||||
|
"lifespan": "on",
|
||||||
|
"log_level": logger.getEffectiveLevel(),
|
||||||
|
"log_config": logger_config,
|
||||||
|
}
|
||||||
|
|
||||||
|
keyfile = config.server.tls_keyfile
|
||||||
|
certfile = config.server.tls_certfile
|
||||||
|
if keyfile and certfile:
|
||||||
|
uvicorn_config["ssl_keyfile"] = config.server.tls_keyfile
|
||||||
|
uvicorn_config["ssl_certfile"] = config.server.tls_certfile
|
||||||
|
if config.server.tls_cafile:
|
||||||
|
uvicorn_config["ssl_ca_certs"] = config.server.tls_cafile
|
||||||
|
uvicorn_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
||||||
|
|
||||||
|
logger.info(f"Listening on {host}:{port}")
|
||||||
|
|
||||||
|
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
||||||
|
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
||||||
|
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
||||||
|
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
||||||
|
# SIGTERM (kill -15) works fine without this because Python doesn't
|
||||||
|
# have a default handler for it.
|
||||||
|
#
|
||||||
|
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
||||||
|
# signal handling but this is quite intrusive and not worth the effort.
|
||||||
|
try:
|
||||||
|
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config)
|
||||||
|
except (KeyboardInterrupt, SystemExit):
|
||||||
|
logger.info("Received interrupt signal, shutting down gracefully...")
|
||||||
|
|
||||||
def _start_ui_development_server(self, stack_server_port: int):
|
def _start_ui_development_server(self, stack_server_port: int):
|
||||||
logger.info("Attempting to start UI development server...")
|
logger.info("Attempting to start UI development server...")
|
||||||
# Check if npm is available
|
# Check if npm is available
|
||||||
|
|
|
||||||
|
|
@ -324,14 +324,14 @@ fi
|
||||||
RUN pip uninstall -y uv
|
RUN pip uninstall -y uv
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# If a run config is provided, we use the --config flag
|
# If a run config is provided, we use the llama stack CLI
|
||||||
if [[ -n "$run_config" ]]; then
|
if [[ -n "$run_config" ]]; then
|
||||||
add_to_container << EOF
|
add_to_container << EOF
|
||||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$RUN_CONFIG_PATH"]
|
ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"]
|
||||||
EOF
|
EOF
|
||||||
elif [[ "$distro_or_config" != *.yaml ]]; then
|
elif [[ "$distro_or_config" != *.yaml ]]; then
|
||||||
add_to_container << EOF
|
add_to_container << EOF
|
||||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$distro_or_config"]
|
ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"]
|
||||||
EOF
|
EOF
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -243,6 +243,7 @@ def get_external_providers_from_module(
|
||||||
spec = module.get_provider_spec()
|
spec = module.get_provider_spec()
|
||||||
else:
|
else:
|
||||||
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
|
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
|
||||||
|
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
||||||
spec = ProviderSpec(
|
spec = ProviderSpec(
|
||||||
api=Api(provider_api),
|
api=Api(provider_api),
|
||||||
provider_type=provider.provider_type,
|
provider_type=provider.provider_type,
|
||||||
|
|
@ -251,9 +252,20 @@ def get_external_providers_from_module(
|
||||||
config_class="",
|
config_class="",
|
||||||
)
|
)
|
||||||
provider_type = provider.provider_type
|
provider_type = provider.provider_type
|
||||||
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
if isinstance(spec, list):
|
||||||
# return a partially filled out spec that the build script will populate.
|
# optionally allow people to pass inline and remote provider specs as a returned list.
|
||||||
registry[Api(provider_api)][provider_type] = spec
|
# with the old method, users could pass in directories of specs using overlapping code
|
||||||
|
# we want to ensure we preserve that flexibility in this method.
|
||||||
|
logger.info(
|
||||||
|
f"Detected a list of external provider specs from {provider.module} adding all to the registry"
|
||||||
|
)
|
||||||
|
for provider_spec in spec:
|
||||||
|
if provider_spec.provider_type != provider.provider_type:
|
||||||
|
continue
|
||||||
|
logger.info(f"Adding {provider.provider_type} to registry")
|
||||||
|
registry[Api(provider_api)][provider.provider_type] = provider_spec
|
||||||
|
else:
|
||||||
|
registry[Api(provider_api)][provider_type] = spec
|
||||||
except ModuleNotFoundError as exc:
|
except ModuleNotFoundError as exc:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"get_provider_spec not found. If specifying an external provider via `module` in the Provider spec, the Provider must have the `provider.get_provider_spec` module available"
|
"get_provider_spec not found. If specifying an external provider via `module` in the Provider spec, the Provider must have the `provider.get_provider_spec` module available"
|
||||||
|
|
|
||||||
|
|
@ -374,6 +374,10 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||||
body = options.params or {}
|
body = options.params or {}
|
||||||
body |= options.json_data or {}
|
body |= options.json_data or {}
|
||||||
|
|
||||||
|
# Merge extra_json parameters (extra_body from SDK is converted to extra_json)
|
||||||
|
if hasattr(options, "extra_json") and options.extra_json:
|
||||||
|
body |= options.extra_json
|
||||||
|
|
||||||
matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
|
matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
|
||||||
body |= path_params
|
body |= path_params
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import argparse
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import functools
|
import functools
|
||||||
|
|
@ -12,7 +11,6 @@ import inspect
|
||||||
import json
|
import json
|
||||||
import logging # allow-direct-logging
|
import logging # allow-direct-logging
|
||||||
import os
|
import os
|
||||||
import ssl
|
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
import warnings
|
import warnings
|
||||||
|
|
@ -35,7 +33,6 @@ from pydantic import BaseModel, ValidationError
|
||||||
|
|
||||||
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
||||||
from llama_stack.apis.common.responses import PaginatedResponse
|
from llama_stack.apis.common.responses import PaginatedResponse
|
||||||
from llama_stack.cli.utils import add_config_distro_args, get_config_from_args
|
|
||||||
from llama_stack.core.access_control.access_control import AccessDeniedError
|
from llama_stack.core.access_control.access_control import AccessDeniedError
|
||||||
from llama_stack.core.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AuthenticationRequiredError,
|
AuthenticationRequiredError,
|
||||||
|
|
@ -55,7 +52,6 @@ from llama_stack.core.stack import (
|
||||||
Stack,
|
Stack,
|
||||||
cast_image_name_to_string,
|
cast_image_name_to_string,
|
||||||
replace_env_vars,
|
replace_env_vars,
|
||||||
validate_env_pair,
|
|
||||||
)
|
)
|
||||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||||
|
|
@ -333,23 +329,18 @@ class ClientVersionMiddleware:
|
||||||
return await self.app(scope, receive, send)
|
return await self.app(scope, receive, send)
|
||||||
|
|
||||||
|
|
||||||
def create_app(
|
def create_app() -> StackApp:
|
||||||
config_file: str | None = None,
|
|
||||||
env_vars: list[str] | None = None,
|
|
||||||
) -> StackApp:
|
|
||||||
"""Create and configure the FastAPI application.
|
"""Create and configure the FastAPI application.
|
||||||
|
|
||||||
Args:
|
This factory function reads configuration from environment variables:
|
||||||
config_file: Path to config file. If None, uses LLAMA_STACK_CONFIG env var or default resolution.
|
- LLAMA_STACK_CONFIG: Path to config file (required)
|
||||||
env_vars: List of environment variables in KEY=value format.
|
|
||||||
disable_version_check: Whether to disable version checking. If None, uses LLAMA_STACK_DISABLE_VERSION_CHECK env var.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Configured StackApp instance.
|
Configured StackApp instance.
|
||||||
"""
|
"""
|
||||||
config_file = config_file or os.getenv("LLAMA_STACK_CONFIG")
|
config_file = os.getenv("LLAMA_STACK_CONFIG")
|
||||||
if config_file is None:
|
if config_file is None:
|
||||||
raise ValueError("No config file provided and LLAMA_STACK_CONFIG env var is not set")
|
raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
|
||||||
|
|
||||||
config_file = resolve_config_or_distro(config_file, Mode.RUN)
|
config_file = resolve_config_or_distro(config_file, Mode.RUN)
|
||||||
|
|
||||||
|
|
@ -361,16 +352,6 @@ def create_app(
|
||||||
logger_config = LoggingConfig(**cfg)
|
logger_config = LoggingConfig(**cfg)
|
||||||
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
||||||
|
|
||||||
if env_vars:
|
|
||||||
for env_pair in env_vars:
|
|
||||||
try:
|
|
||||||
key, value = validate_env_pair(env_pair)
|
|
||||||
logger.info(f"Setting environment variable {key} => {value}")
|
|
||||||
os.environ[key] = value
|
|
||||||
except ValueError as e:
|
|
||||||
logger.error(f"Error: {str(e)}")
|
|
||||||
raise ValueError(f"Invalid environment variable format: {env_pair}") from e
|
|
||||||
|
|
||||||
config = replace_env_vars(config_contents)
|
config = replace_env_vars(config_contents)
|
||||||
config = StackRunConfig(**cast_image_name_to_string(config))
|
config = StackRunConfig(**cast_image_name_to_string(config))
|
||||||
|
|
||||||
|
|
@ -494,101 +475,6 @@ def create_app(
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
def main(args: argparse.Namespace | None = None):
|
|
||||||
"""Start the LlamaStack server."""
|
|
||||||
parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
|
|
||||||
|
|
||||||
add_config_distro_args(parser)
|
|
||||||
parser.add_argument(
|
|
||||||
"--port",
|
|
||||||
type=int,
|
|
||||||
default=int(os.getenv("LLAMA_STACK_PORT", 8321)),
|
|
||||||
help="Port to listen on",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--env",
|
|
||||||
action="append",
|
|
||||||
help="Environment variables in KEY=value format. Can be specified multiple times.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Determine whether the server args are being passed by the "run" command, if this is the case
|
|
||||||
# the args will be passed as a Namespace object to the main function, otherwise they will be
|
|
||||||
# parsed from the command line
|
|
||||||
if args is None:
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
config_or_distro = get_config_from_args(args)
|
|
||||||
|
|
||||||
try:
|
|
||||||
app = create_app(
|
|
||||||
config_file=config_or_distro,
|
|
||||||
env_vars=args.env,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error creating app: {str(e)}")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
config_file = resolve_config_or_distro(config_or_distro, Mode.RUN)
|
|
||||||
with open(config_file) as fp:
|
|
||||||
config_contents = yaml.safe_load(fp)
|
|
||||||
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
|
||||||
logger_config = LoggingConfig(**cfg)
|
|
||||||
else:
|
|
||||||
logger_config = None
|
|
||||||
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
|
||||||
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
# Configure SSL if certificates are provided
|
|
||||||
port = args.port or config.server.port
|
|
||||||
|
|
||||||
ssl_config = None
|
|
||||||
keyfile = config.server.tls_keyfile
|
|
||||||
certfile = config.server.tls_certfile
|
|
||||||
|
|
||||||
if keyfile and certfile:
|
|
||||||
ssl_config = {
|
|
||||||
"ssl_keyfile": keyfile,
|
|
||||||
"ssl_certfile": certfile,
|
|
||||||
}
|
|
||||||
if config.server.tls_cafile:
|
|
||||||
ssl_config["ssl_ca_certs"] = config.server.tls_cafile
|
|
||||||
ssl_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
|
||||||
logger.info(
|
|
||||||
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
|
||||||
|
|
||||||
listen_host = config.server.host or ["::", "0.0.0.0"]
|
|
||||||
logger.info(f"Listening on {listen_host}:{port}")
|
|
||||||
|
|
||||||
uvicorn_config = {
|
|
||||||
"app": app,
|
|
||||||
"host": listen_host,
|
|
||||||
"port": port,
|
|
||||||
"lifespan": "on",
|
|
||||||
"log_level": logger.getEffectiveLevel(),
|
|
||||||
"log_config": logger_config,
|
|
||||||
}
|
|
||||||
if ssl_config:
|
|
||||||
uvicorn_config.update(ssl_config)
|
|
||||||
|
|
||||||
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
|
||||||
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
|
||||||
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
|
||||||
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
|
||||||
# SIGTERM (kill -15) works fine without this because Python doesn't
|
|
||||||
# have a default handler for it.
|
|
||||||
#
|
|
||||||
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
|
||||||
# signal handling but this is quite intrusive and not worth the effort.
|
|
||||||
try:
|
|
||||||
asyncio.run(uvicorn.Server(uvicorn.Config(**uvicorn_config)).serve())
|
|
||||||
except (KeyboardInterrupt, SystemExit):
|
|
||||||
logger.info("Received interrupt signal, shutting down gracefully...")
|
|
||||||
|
|
||||||
|
|
||||||
def _log_run_config(run_config: StackRunConfig):
|
def _log_run_config(run_config: StackRunConfig):
|
||||||
"""Logs the run config with redacted fields and disabled providers removed."""
|
"""Logs the run config with redacted fields and disabled providers removed."""
|
||||||
logger.info("Run configuration:")
|
logger.info("Run configuration:")
|
||||||
|
|
@ -615,7 +501,3 @@ def remove_disabled_providers(obj):
|
||||||
return [item for item in (remove_disabled_providers(i) for i in obj) if item is not None]
|
return [item for item in (remove_disabled_providers(i) for i in obj) if item is not None]
|
||||||
else:
|
else:
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
||||||
|
|
@ -116,7 +116,7 @@ if [[ "$env_type" == "venv" ]]; then
|
||||||
yaml_config_arg=""
|
yaml_config_arg=""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
$PYTHON_BINARY -m llama_stack.core.server.server \
|
llama stack run \
|
||||||
$yaml_config_arg \
|
$yaml_config_arg \
|
||||||
--port "$port" \
|
--port "$port" \
|
||||||
$env_vars \
|
$env_vars \
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
logger = get_logger(__name__, "tokenizer_utils")
|
logger = get_logger(__name__, "models")
|
||||||
|
|
||||||
|
|
||||||
def load_bpe_file(model_path: Path) -> dict[bytes, int]:
|
def load_bpe_file(model_path: Path) -> dict[bytes, int]:
|
||||||
|
|
|
||||||
|
|
@ -329,6 +329,7 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
tools: list[OpenAIResponseInputTool] | None = None,
|
tools: list[OpenAIResponseInputTool] | None = None,
|
||||||
include: list[str] | None = None,
|
include: list[str] | None = None,
|
||||||
max_infer_iters: int | None = 10,
|
max_infer_iters: int | None = 10,
|
||||||
|
shields: list | None = None,
|
||||||
) -> OpenAIResponseObject:
|
) -> OpenAIResponseObject:
|
||||||
return await self.openai_responses_impl.create_openai_response(
|
return await self.openai_responses_impl.create_openai_response(
|
||||||
input,
|
input,
|
||||||
|
|
@ -342,6 +343,7 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
tools,
|
tools,
|
||||||
include,
|
include,
|
||||||
max_infer_iters,
|
max_infer_iters,
|
||||||
|
shields,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def list_openai_responses(
|
async def list_openai_responses(
|
||||||
|
|
|
||||||
|
|
@ -208,10 +208,15 @@ class OpenAIResponsesImpl:
|
||||||
tools: list[OpenAIResponseInputTool] | None = None,
|
tools: list[OpenAIResponseInputTool] | None = None,
|
||||||
include: list[str] | None = None,
|
include: list[str] | None = None,
|
||||||
max_infer_iters: int | None = 10,
|
max_infer_iters: int | None = 10,
|
||||||
|
shields: list | None = None,
|
||||||
):
|
):
|
||||||
stream = bool(stream)
|
stream = bool(stream)
|
||||||
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
|
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
|
||||||
|
|
||||||
|
# Shields parameter received via extra_body - not yet implemented
|
||||||
|
if shields is not None:
|
||||||
|
raise NotImplementedError("Shields parameter is not yet implemented in the meta-reference provider")
|
||||||
|
|
||||||
stream_gen = self._create_streaming_response(
|
stream_gen = self._create_streaming_response(
|
||||||
input=input,
|
input=input,
|
||||||
model=model,
|
model=model,
|
||||||
|
|
|
||||||
|
|
@ -52,9 +52,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter_type="cerebras",
|
adapter_type="cerebras",
|
||||||
provider_type="remote::cerebras",
|
provider_type="remote::cerebras",
|
||||||
pip_packages=[
|
pip_packages=[],
|
||||||
"cerebras_cloud_sdk",
|
|
||||||
],
|
|
||||||
module="llama_stack.providers.remote.inference.cerebras",
|
module="llama_stack.providers.remote.inference.cerebras",
|
||||||
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
||||||
description="Cerebras inference provider for running models on Cerebras Cloud platform.",
|
description="Cerebras inference provider for running models on Cerebras Cloud platform.",
|
||||||
|
|
@ -169,7 +167,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter_type="openai",
|
adapter_type="openai",
|
||||||
provider_type="remote::openai",
|
provider_type="remote::openai",
|
||||||
pip_packages=["litellm"],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.remote.inference.openai",
|
module="llama_stack.providers.remote.inference.openai",
|
||||||
config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
|
config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
|
||||||
|
|
@ -179,7 +177,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter_type="anthropic",
|
adapter_type="anthropic",
|
||||||
provider_type="remote::anthropic",
|
provider_type="remote::anthropic",
|
||||||
pip_packages=["litellm"],
|
pip_packages=["anthropic"],
|
||||||
module="llama_stack.providers.remote.inference.anthropic",
|
module="llama_stack.providers.remote.inference.anthropic",
|
||||||
config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
|
config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
|
||||||
|
|
@ -189,9 +187,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter_type="gemini",
|
adapter_type="gemini",
|
||||||
provider_type="remote::gemini",
|
provider_type="remote::gemini",
|
||||||
pip_packages=[
|
pip_packages=[],
|
||||||
"litellm",
|
|
||||||
],
|
|
||||||
module="llama_stack.providers.remote.inference.gemini",
|
module="llama_stack.providers.remote.inference.gemini",
|
||||||
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
||||||
|
|
@ -202,7 +198,6 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
adapter_type="vertexai",
|
adapter_type="vertexai",
|
||||||
provider_type="remote::vertexai",
|
provider_type="remote::vertexai",
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"litellm",
|
|
||||||
"google-cloud-aiplatform",
|
"google-cloud-aiplatform",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.inference.vertexai",
|
module="llama_stack.providers.remote.inference.vertexai",
|
||||||
|
|
@ -233,9 +228,7 @@ Available Models:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter_type="groq",
|
adapter_type="groq",
|
||||||
provider_type="remote::groq",
|
provider_type="remote::groq",
|
||||||
pip_packages=[
|
pip_packages=[],
|
||||||
"litellm",
|
|
||||||
],
|
|
||||||
module="llama_stack.providers.remote.inference.groq",
|
module="llama_stack.providers.remote.inference.groq",
|
||||||
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
||||||
|
|
@ -245,7 +238,7 @@ Available Models:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter_type="llama-openai-compat",
|
adapter_type="llama-openai-compat",
|
||||||
provider_type="remote::llama-openai-compat",
|
provider_type="remote::llama-openai-compat",
|
||||||
pip_packages=["litellm"],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.remote.inference.llama_openai_compat",
|
module="llama_stack.providers.remote.inference.llama_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
|
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
|
||||||
|
|
@ -255,9 +248,7 @@ Available Models:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter_type="sambanova",
|
adapter_type="sambanova",
|
||||||
provider_type="remote::sambanova",
|
provider_type="remote::sambanova",
|
||||||
pip_packages=[
|
pip_packages=[],
|
||||||
"litellm",
|
|
||||||
],
|
|
||||||
module="llama_stack.providers.remote.inference.sambanova",
|
module="llama_stack.providers.remote.inference.sambanova",
|
||||||
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
||||||
|
|
@ -287,7 +278,7 @@ Available Models:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
provider_type="remote::azure",
|
provider_type="remote::azure",
|
||||||
adapter_type="azure",
|
adapter_type="azure",
|
||||||
pip_packages=["litellm"],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.remote.inference.azure",
|
module="llama_stack.providers.remote.inference.azure",
|
||||||
config_class="llama_stack.providers.remote.inference.azure.AzureConfig",
|
config_class="llama_stack.providers.remote.inference.azure.AzureConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.azure.config.AzureProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.azure.config.AzureProviderDataValidator",
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,6 @@ from .config import AnthropicConfig
|
||||||
async def get_adapter_impl(config: AnthropicConfig, _deps):
|
async def get_adapter_impl(config: AnthropicConfig, _deps):
|
||||||
from .anthropic import AnthropicInferenceAdapter
|
from .anthropic import AnthropicInferenceAdapter
|
||||||
|
|
||||||
impl = AnthropicInferenceAdapter(config)
|
impl = AnthropicInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,19 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
from anthropic import AsyncAnthropic
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import AnthropicConfig
|
from .config import AnthropicConfig
|
||||||
|
|
||||||
|
|
||||||
class AnthropicInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
class AnthropicInferenceAdapter(OpenAIMixin):
|
||||||
|
config: AnthropicConfig
|
||||||
|
|
||||||
|
provider_data_api_key_field: str = "anthropic_api_key"
|
||||||
# source: https://docs.claude.com/en/docs/build-with-claude/embeddings
|
# source: https://docs.claude.com/en/docs/build-with-claude/embeddings
|
||||||
# TODO: add support for voyageai, which is where these models are hosted
|
# TODO: add support for voyageai, which is where these models are hosted
|
||||||
# embedding_model_metadata = {
|
# embedding_model_metadata = {
|
||||||
|
|
@ -23,22 +29,11 @@ class AnthropicInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
# "voyage-multimodal-3": {"embedding_dimension": 1024, "context_length": 32000},
|
# "voyage-multimodal-3": {"embedding_dimension": 1024, "context_length": 32000},
|
||||||
# }
|
# }
|
||||||
|
|
||||||
def __init__(self, config: AnthropicConfig) -> None:
|
def get_api_key(self) -> str:
|
||||||
LiteLLMOpenAIMixin.__init__(
|
return self.config.api_key or ""
|
||||||
self,
|
|
||||||
litellm_provider_name="anthropic",
|
|
||||||
api_key_from_config=config.api_key,
|
|
||||||
provider_data_api_key_field="anthropic_api_key",
|
|
||||||
)
|
|
||||||
self.config = config
|
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
await super().initialize()
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
await super().shutdown()
|
|
||||||
|
|
||||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
|
||||||
|
|
||||||
def get_base_url(self):
|
def get_base_url(self):
|
||||||
return "https://api.anthropic.com/v1"
|
return "https://api.anthropic.com/v1"
|
||||||
|
|
||||||
|
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||||
|
return [m.id async for m in AsyncAnthropic(api_key=self.get_api_key()).models.list()]
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,6 @@ from .config import AzureConfig
|
||||||
async def get_adapter_impl(config: AzureConfig, _deps):
|
async def get_adapter_impl(config: AzureConfig, _deps):
|
||||||
from .azure import AzureInferenceAdapter
|
from .azure import AzureInferenceAdapter
|
||||||
|
|
||||||
impl = AzureInferenceAdapter(config)
|
impl = AzureInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -4,31 +4,20 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
from llama_stack.apis.inference import ChatCompletionRequest
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
|
||||||
LiteLLMOpenAIMixin,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import AzureConfig
|
from .config import AzureConfig
|
||||||
|
|
||||||
|
|
||||||
class AzureInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
class AzureInferenceAdapter(OpenAIMixin):
|
||||||
def __init__(self, config: AzureConfig) -> None:
|
config: AzureConfig
|
||||||
LiteLLMOpenAIMixin.__init__(
|
|
||||||
self,
|
|
||||||
litellm_provider_name="azure",
|
|
||||||
api_key_from_config=config.api_key.get_secret_value(),
|
|
||||||
provider_data_api_key_field="azure_api_key",
|
|
||||||
openai_compat_api_base=str(config.api_base),
|
|
||||||
)
|
|
||||||
self.config = config
|
|
||||||
|
|
||||||
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
provider_data_api_key_field: str = "azure_api_key"
|
||||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
|
||||||
|
def get_api_key(self) -> str:
|
||||||
|
return self.config.api_key.get_secret_value()
|
||||||
|
|
||||||
def get_base_url(self) -> str:
|
def get_base_url(self) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -37,26 +26,3 @@ class AzureInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
Returns the Azure API base URL from the configuration.
|
Returns the Azure API base URL from the configuration.
|
||||||
"""
|
"""
|
||||||
return urljoin(str(self.config.api_base), "/openai/v1")
|
return urljoin(str(self.config.api_base), "/openai/v1")
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
|
||||||
# Get base parameters from parent
|
|
||||||
params = await super()._get_params(request)
|
|
||||||
|
|
||||||
# Add Azure specific parameters
|
|
||||||
provider_data = self.get_request_provider_data()
|
|
||||||
if provider_data:
|
|
||||||
if getattr(provider_data, "azure_api_key", None):
|
|
||||||
params["api_key"] = provider_data.azure_api_key
|
|
||||||
if getattr(provider_data, "azure_api_base", None):
|
|
||||||
params["api_base"] = provider_data.azure_api_base
|
|
||||||
if getattr(provider_data, "azure_api_version", None):
|
|
||||||
params["api_version"] = provider_data.azure_api_version
|
|
||||||
if getattr(provider_data, "azure_api_type", None):
|
|
||||||
params["api_type"] = provider_data.azure_api_type
|
|
||||||
else:
|
|
||||||
params["api_key"] = self.config.api_key.get_secret_value()
|
|
||||||
params["api_base"] = str(self.config.api_base)
|
|
||||||
params["api_version"] = self.config.api_version
|
|
||||||
params["api_type"] = self.config.api_type
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ async def get_adapter_impl(config: CerebrasImplConfig, _deps):
|
||||||
|
|
||||||
assert isinstance(config, CerebrasImplConfig), f"Unexpected config type: {type(config)}"
|
assert isinstance(config, CerebrasImplConfig), f"Unexpected config type: {type(config)}"
|
||||||
|
|
||||||
impl = CerebrasInferenceAdapter(config)
|
impl = CerebrasInferenceAdapter(config=config)
|
||||||
|
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,39 +6,14 @@
|
||||||
|
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
from cerebras.cloud.sdk import AsyncCerebras
|
from llama_stack.apis.inference import OpenAIEmbeddingsResponse
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
|
||||||
ChatCompletionRequest,
|
|
||||||
CompletionRequest,
|
|
||||||
Inference,
|
|
||||||
OpenAIEmbeddingsResponse,
|
|
||||||
TopKSamplingStrategy,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
|
||||||
get_sampling_options,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
|
||||||
chat_completion_request_to_prompt,
|
|
||||||
completion_request_to_prompt,
|
|
||||||
)
|
|
||||||
|
|
||||||
from .config import CerebrasImplConfig
|
from .config import CerebrasImplConfig
|
||||||
|
|
||||||
|
|
||||||
class CerebrasInferenceAdapter(
|
class CerebrasInferenceAdapter(OpenAIMixin):
|
||||||
OpenAIMixin,
|
config: CerebrasImplConfig
|
||||||
Inference,
|
|
||||||
):
|
|
||||||
def __init__(self, config: CerebrasImplConfig) -> None:
|
|
||||||
self.config = config
|
|
||||||
|
|
||||||
# TODO: make this use provider data, etc. like other providers
|
|
||||||
self._cerebras_client = AsyncCerebras(
|
|
||||||
base_url=self.config.base_url,
|
|
||||||
api_key=self.config.api_key.get_secret_value(),
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_api_key(self) -> str:
|
def get_api_key(self) -> str:
|
||||||
return self.config.api_key.get_secret_value()
|
return self.config.api_key.get_secret_value()
|
||||||
|
|
@ -46,31 +21,6 @@ class CerebrasInferenceAdapter(
|
||||||
def get_base_url(self) -> str:
|
def get_base_url(self) -> str:
|
||||||
return urljoin(self.config.base_url, "v1")
|
return urljoin(self.config.base_url, "v1")
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
return
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
|
|
||||||
if request.sampling_params and isinstance(request.sampling_params.strategy, TopKSamplingStrategy):
|
|
||||||
raise ValueError("`top_k` not supported by Cerebras")
|
|
||||||
|
|
||||||
prompt = ""
|
|
||||||
if isinstance(request, ChatCompletionRequest):
|
|
||||||
prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model))
|
|
||||||
elif isinstance(request, CompletionRequest):
|
|
||||||
prompt = await completion_request_to_prompt(request)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown request type {type(request)}")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"model": request.model,
|
|
||||||
"prompt": prompt,
|
|
||||||
"stream": request.stream,
|
|
||||||
**get_sampling_options(request.sampling_params),
|
|
||||||
}
|
|
||||||
|
|
||||||
async def openai_embeddings(
|
async def openai_embeddings(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ class CerebrasImplConfig(RemoteInferenceProviderConfig):
|
||||||
description="Base URL for the Cerebras API",
|
description="Base URL for the Cerebras API",
|
||||||
)
|
)
|
||||||
api_key: SecretStr = Field(
|
api_key: SecretStr = Field(
|
||||||
default=SecretStr(os.environ.get("CEREBRAS_API_KEY")),
|
default=SecretStr(os.environ.get("CEREBRAS_API_KEY")), # type: ignore[arg-type]
|
||||||
description="Cerebras API Key",
|
description="Cerebras API Key",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,6 @@ async def get_adapter_impl(config: DatabricksImplConfig, _deps):
|
||||||
from .databricks import DatabricksInferenceAdapter
|
from .databricks import DatabricksInferenceAdapter
|
||||||
|
|
||||||
assert isinstance(config, DatabricksImplConfig), f"Unexpected config type: {type(config)}"
|
assert isinstance(config, DatabricksImplConfig), f"Unexpected config type: {type(config)}"
|
||||||
impl = DatabricksInferenceAdapter(config)
|
impl = DatabricksInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -14,12 +14,12 @@ from llama_stack.schema_utils import json_schema_type
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class DatabricksImplConfig(RemoteInferenceProviderConfig):
|
class DatabricksImplConfig(RemoteInferenceProviderConfig):
|
||||||
url: str = Field(
|
url: str | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="The URL for the Databricks model serving endpoint",
|
description="The URL for the Databricks model serving endpoint",
|
||||||
)
|
)
|
||||||
api_token: SecretStr = Field(
|
api_token: SecretStr = Field(
|
||||||
default=SecretStr(None),
|
default=SecretStr(None), # type: ignore[arg-type]
|
||||||
description="The Databricks API token",
|
description="The Databricks API token",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,16 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from collections.abc import Iterable
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from databricks.sdk import WorkspaceClient
|
from databricks.sdk import WorkspaceClient
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import OpenAICompletion
|
||||||
Inference,
|
|
||||||
Model,
|
|
||||||
OpenAICompletion,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.models import ModelType
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
|
|
@ -22,30 +18,31 @@ from .config import DatabricksImplConfig
|
||||||
logger = get_logger(name=__name__, category="inference::databricks")
|
logger = get_logger(name=__name__, category="inference::databricks")
|
||||||
|
|
||||||
|
|
||||||
class DatabricksInferenceAdapter(
|
class DatabricksInferenceAdapter(OpenAIMixin):
|
||||||
OpenAIMixin,
|
config: DatabricksImplConfig
|
||||||
Inference,
|
|
||||||
):
|
|
||||||
# source: https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/supported-models
|
# source: https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/supported-models
|
||||||
embedding_model_metadata = {
|
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||||
"databricks-gte-large-en": {"embedding_dimension": 1024, "context_length": 8192},
|
"databricks-gte-large-en": {"embedding_dimension": 1024, "context_length": 8192},
|
||||||
"databricks-bge-large-en": {"embedding_dimension": 1024, "context_length": 512},
|
"databricks-bge-large-en": {"embedding_dimension": 1024, "context_length": 512},
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, config: DatabricksImplConfig) -> None:
|
|
||||||
self.config = config
|
|
||||||
|
|
||||||
def get_api_key(self) -> str:
|
def get_api_key(self) -> str:
|
||||||
return self.config.api_token.get_secret_value()
|
return self.config.api_token.get_secret_value()
|
||||||
|
|
||||||
def get_base_url(self) -> str:
|
def get_base_url(self) -> str:
|
||||||
return f"{self.config.url}/serving-endpoints"
|
return f"{self.config.url}/serving-endpoints"
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||||
return
|
return [
|
||||||
|
endpoint.name
|
||||||
|
for endpoint in WorkspaceClient(
|
||||||
|
host=self.config.url, token=self.get_api_key()
|
||||||
|
).serving_endpoints.list() # TODO: this is not async
|
||||||
|
]
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def should_refresh_models(self) -> bool:
|
||||||
pass
|
return False
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
|
|
@ -71,32 +68,3 @@ class DatabricksInferenceAdapter(
|
||||||
suffix: str | None = None,
|
suffix: str | None = None,
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
async def list_models(self) -> list[Model] | None:
|
|
||||||
self._model_cache = {} # from OpenAIMixin
|
|
||||||
ws_client = WorkspaceClient(host=self.config.url, token=self.get_api_key()) # TODO: this is not async
|
|
||||||
endpoints = ws_client.serving_endpoints.list()
|
|
||||||
for endpoint in endpoints:
|
|
||||||
model = Model(
|
|
||||||
provider_id=self.__provider_id__,
|
|
||||||
provider_resource_id=endpoint.name,
|
|
||||||
identifier=endpoint.name,
|
|
||||||
)
|
|
||||||
if endpoint.task == "llm/v1/chat":
|
|
||||||
model.model_type = ModelType.llm # this is redundant, but informative
|
|
||||||
elif endpoint.task == "llm/v1/embeddings":
|
|
||||||
if endpoint.name not in self.embedding_model_metadata:
|
|
||||||
logger.warning(f"No metadata information available for embedding model {endpoint.name}, skipping.")
|
|
||||||
continue
|
|
||||||
model.model_type = ModelType.embedding
|
|
||||||
model.metadata = self.embedding_model_metadata[endpoint.name]
|
|
||||||
else:
|
|
||||||
logger.warning(f"Unknown model type, skipping: {endpoint}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
self._model_cache[endpoint.name] = model
|
|
||||||
|
|
||||||
return list(self._model_cache.values())
|
|
||||||
|
|
||||||
async def should_refresh_models(self) -> bool:
|
|
||||||
return False
|
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,6 @@ async def get_adapter_impl(config: FireworksImplConfig, _deps):
|
||||||
from .fireworks import FireworksInferenceAdapter
|
from .fireworks import FireworksInferenceAdapter
|
||||||
|
|
||||||
assert isinstance(config, FireworksImplConfig), f"Unexpected config type: {type(config)}"
|
assert isinstance(config, FireworksImplConfig), f"Unexpected config type: {type(config)}"
|
||||||
impl = FireworksInferenceAdapter(config)
|
impl = FireworksInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -5,124 +5,26 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
from fireworks.client import Fireworks
|
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
|
||||||
ChatCompletionRequest,
|
|
||||||
Inference,
|
|
||||||
LogProbConfig,
|
|
||||||
ResponseFormat,
|
|
||||||
ResponseFormatType,
|
|
||||||
SamplingParams,
|
|
||||||
)
|
|
||||||
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
|
||||||
ModelRegistryHelper,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
|
||||||
convert_message_to_openai_dict,
|
|
||||||
get_sampling_options,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
|
||||||
chat_completion_request_to_prompt,
|
|
||||||
request_has_media,
|
|
||||||
)
|
|
||||||
|
|
||||||
from .config import FireworksImplConfig
|
from .config import FireworksImplConfig
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="inference::fireworks")
|
logger = get_logger(name=__name__, category="inference::fireworks")
|
||||||
|
|
||||||
|
|
||||||
class FireworksInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData):
|
class FireworksInferenceAdapter(OpenAIMixin):
|
||||||
embedding_model_metadata = {
|
config: FireworksImplConfig
|
||||||
|
|
||||||
|
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||||
"nomic-ai/nomic-embed-text-v1.5": {"embedding_dimension": 768, "context_length": 8192},
|
"nomic-ai/nomic-embed-text-v1.5": {"embedding_dimension": 768, "context_length": 8192},
|
||||||
"accounts/fireworks/models/qwen3-embedding-8b": {"embedding_dimension": 4096, "context_length": 40960},
|
"accounts/fireworks/models/qwen3-embedding-8b": {"embedding_dimension": 4096, "context_length": 40960},
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, config: FireworksImplConfig) -> None:
|
provider_data_api_key_field: str = "fireworks_api_key"
|
||||||
ModelRegistryHelper.__init__(self)
|
|
||||||
self.config = config
|
|
||||||
self.allowed_models = config.allowed_models
|
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_api_key(self) -> str:
|
def get_api_key(self) -> str:
|
||||||
config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None
|
return self.config.api_key.get_secret_value() if self.config.api_key else None # type: ignore[return-value]
|
||||||
if config_api_key:
|
|
||||||
return config_api_key
|
|
||||||
else:
|
|
||||||
provider_data = self.get_request_provider_data()
|
|
||||||
if provider_data is None or not provider_data.fireworks_api_key:
|
|
||||||
raise ValueError(
|
|
||||||
'Pass Fireworks API Key in the header X-LlamaStack-Provider-Data as { "fireworks_api_key": <your api key>}'
|
|
||||||
)
|
|
||||||
return provider_data.fireworks_api_key
|
|
||||||
|
|
||||||
def get_base_url(self) -> str:
|
def get_base_url(self) -> str:
|
||||||
return "https://api.fireworks.ai/inference/v1"
|
return "https://api.fireworks.ai/inference/v1"
|
||||||
|
|
||||||
def _get_client(self) -> Fireworks:
|
|
||||||
fireworks_api_key = self.get_api_key()
|
|
||||||
return Fireworks(api_key=fireworks_api_key)
|
|
||||||
|
|
||||||
def _build_options(
|
|
||||||
self,
|
|
||||||
sampling_params: SamplingParams | None,
|
|
||||||
fmt: ResponseFormat | None,
|
|
||||||
logprobs: LogProbConfig | None,
|
|
||||||
) -> dict:
|
|
||||||
options = get_sampling_options(sampling_params)
|
|
||||||
options.setdefault("max_tokens", 512)
|
|
||||||
|
|
||||||
if fmt:
|
|
||||||
if fmt.type == ResponseFormatType.json_schema.value:
|
|
||||||
options["response_format"] = {
|
|
||||||
"type": "json_object",
|
|
||||||
"schema": fmt.json_schema,
|
|
||||||
}
|
|
||||||
elif fmt.type == ResponseFormatType.grammar.value:
|
|
||||||
options["response_format"] = {
|
|
||||||
"type": "grammar",
|
|
||||||
"grammar": fmt.bnf,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown response format {fmt.type}")
|
|
||||||
|
|
||||||
if logprobs and logprobs.top_k:
|
|
||||||
options["logprobs"] = logprobs.top_k
|
|
||||||
if options["logprobs"] <= 0 or options["logprobs"] >= 5:
|
|
||||||
raise ValueError("Required range: 0 < top_k < 5")
|
|
||||||
|
|
||||||
return options
|
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
|
||||||
input_dict = {}
|
|
||||||
media_present = request_has_media(request)
|
|
||||||
|
|
||||||
llama_model = self.get_llama_model(request.model)
|
|
||||||
# TODO: tools are never added to the request, so we need to add them here
|
|
||||||
if media_present or not llama_model:
|
|
||||||
input_dict["messages"] = [await convert_message_to_openai_dict(m, download=True) for m in request.messages]
|
|
||||||
else:
|
|
||||||
input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model)
|
|
||||||
|
|
||||||
# Fireworks always prepends with BOS
|
|
||||||
if "prompt" in input_dict:
|
|
||||||
if input_dict["prompt"].startswith("<|begin_of_text|>"):
|
|
||||||
input_dict["prompt"] = input_dict["prompt"][len("<|begin_of_text|>") :]
|
|
||||||
|
|
||||||
params = {
|
|
||||||
"model": request.model,
|
|
||||||
**input_dict,
|
|
||||||
"stream": bool(request.stream),
|
|
||||||
**self._build_options(request.sampling_params, request.response_format, request.logprobs),
|
|
||||||
}
|
|
||||||
logger.debug(f"params to fireworks: {params}")
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,6 @@ from .config import GeminiConfig
|
||||||
async def get_adapter_impl(config: GeminiConfig, _deps):
|
async def get_adapter_impl(config: GeminiConfig, _deps):
|
||||||
from .gemini import GeminiInferenceAdapter
|
from .gemini import GeminiInferenceAdapter
|
||||||
|
|
||||||
impl = GeminiInferenceAdapter(config)
|
impl = GeminiInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -4,33 +4,21 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import GeminiConfig
|
from .config import GeminiConfig
|
||||||
|
|
||||||
|
|
||||||
class GeminiInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
class GeminiInferenceAdapter(OpenAIMixin):
|
||||||
embedding_model_metadata = {
|
config: GeminiConfig
|
||||||
|
|
||||||
|
provider_data_api_key_field: str = "gemini_api_key"
|
||||||
|
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||||
"text-embedding-004": {"embedding_dimension": 768, "context_length": 2048},
|
"text-embedding-004": {"embedding_dimension": 768, "context_length": 2048},
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, config: GeminiConfig) -> None:
|
def get_api_key(self) -> str:
|
||||||
LiteLLMOpenAIMixin.__init__(
|
return self.config.api_key or ""
|
||||||
self,
|
|
||||||
litellm_provider_name="gemini",
|
|
||||||
api_key_from_config=config.api_key,
|
|
||||||
provider_data_api_key_field="gemini_api_key",
|
|
||||||
)
|
|
||||||
self.config = config
|
|
||||||
|
|
||||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
|
||||||
|
|
||||||
def get_base_url(self):
|
def get_base_url(self):
|
||||||
return "https://generativelanguage.googleapis.com/v1beta/openai/"
|
return "https://generativelanguage.googleapis.com/v1beta/openai/"
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
await super().initialize()
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
await super().shutdown()
|
|
||||||
|
|
|
||||||
|
|
@ -11,5 +11,5 @@ async def get_adapter_impl(config: GroqConfig, _deps):
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .groq import GroqInferenceAdapter
|
from .groq import GroqInferenceAdapter
|
||||||
|
|
||||||
adapter = GroqInferenceAdapter(config)
|
adapter = GroqInferenceAdapter(config=config)
|
||||||
return adapter
|
return adapter
|
||||||
|
|
|
||||||
|
|
@ -6,30 +6,16 @@
|
||||||
|
|
||||||
|
|
||||||
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
|
|
||||||
class GroqInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
class GroqInferenceAdapter(OpenAIMixin):
|
||||||
_config: GroqConfig
|
config: GroqConfig
|
||||||
|
|
||||||
def __init__(self, config: GroqConfig):
|
provider_data_api_key_field: str = "groq_api_key"
|
||||||
LiteLLMOpenAIMixin.__init__(
|
|
||||||
self,
|
|
||||||
litellm_provider_name="groq",
|
|
||||||
api_key_from_config=config.api_key,
|
|
||||||
provider_data_api_key_field="groq_api_key",
|
|
||||||
)
|
|
||||||
self.config = config
|
|
||||||
|
|
||||||
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
def get_api_key(self) -> str:
|
||||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
return self.config.api_key or ""
|
||||||
|
|
||||||
def get_base_url(self) -> str:
|
def get_base_url(self) -> str:
|
||||||
return f"{self.config.url}/openai/v1"
|
return f"{self.config.url}/openai/v1"
|
||||||
|
|
||||||
async def initialize(self):
|
|
||||||
await super().initialize()
|
|
||||||
|
|
||||||
async def shutdown(self):
|
|
||||||
await super().shutdown()
|
|
||||||
|
|
|
||||||
|
|
@ -4,14 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import InferenceProvider
|
|
||||||
|
|
||||||
from .config import LlamaCompatConfig
|
from .config import LlamaCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> InferenceProvider:
|
async def get_adapter_impl(config: LlamaCompatConfig, _deps):
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .llama import LlamaCompatInferenceAdapter
|
from .llama import LlamaCompatInferenceAdapter
|
||||||
|
|
||||||
adapter = LlamaCompatInferenceAdapter(config)
|
adapter = LlamaCompatInferenceAdapter(config=config)
|
||||||
return adapter
|
return adapter
|
||||||
|
|
|
||||||
|
|
@ -3,40 +3,26 @@
|
||||||
#
|
#
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from llama_stack.apis.inference.inference import OpenAICompletion, OpenAIEmbeddingsResponse
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
|
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="inference::llama_openai_compat")
|
logger = get_logger(name=__name__, category="inference::llama_openai_compat")
|
||||||
|
|
||||||
|
|
||||||
class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
class LlamaCompatInferenceAdapter(OpenAIMixin):
|
||||||
|
config: LlamaCompatConfig
|
||||||
|
|
||||||
|
provider_data_api_key_field: str = "llama_api_key"
|
||||||
"""
|
"""
|
||||||
Llama API Inference Adapter for Llama Stack.
|
Llama API Inference Adapter for Llama Stack.
|
||||||
|
|
||||||
Note: The inheritance order is important here. OpenAIMixin must come before
|
|
||||||
LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
|
|
||||||
is used instead of ModelRegistryHelper.check_model_availability().
|
|
||||||
|
|
||||||
- OpenAIMixin.check_model_availability() queries the Llama API to check if a model exists
|
|
||||||
- ModelRegistryHelper.check_model_availability() (inherited by LiteLLMOpenAIMixin) just returns False and shows a warning
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_config: LlamaCompatConfig
|
def get_api_key(self) -> str:
|
||||||
|
return self.config.api_key or ""
|
||||||
def __init__(self, config: LlamaCompatConfig):
|
|
||||||
LiteLLMOpenAIMixin.__init__(
|
|
||||||
self,
|
|
||||||
litellm_provider_name="meta_llama",
|
|
||||||
api_key_from_config=config.api_key,
|
|
||||||
provider_data_api_key_field="llama_api_key",
|
|
||||||
openai_compat_api_base=config.openai_compat_api_base,
|
|
||||||
)
|
|
||||||
self.config = config
|
|
||||||
|
|
||||||
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
|
||||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
|
||||||
|
|
||||||
def get_base_url(self) -> str:
|
def get_base_url(self) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -46,8 +32,37 @@ class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
"""
|
"""
|
||||||
return self.config.openai_compat_api_base
|
return self.config.openai_compat_api_base
|
||||||
|
|
||||||
async def initialize(self):
|
async def openai_completion(
|
||||||
await super().initialize()
|
self,
|
||||||
|
model: str,
|
||||||
|
prompt: str | list[str] | list[int] | list[list[int]],
|
||||||
|
best_of: int | None = None,
|
||||||
|
echo: bool | None = None,
|
||||||
|
frequency_penalty: float | None = None,
|
||||||
|
logit_bias: dict[str, float] | None = None,
|
||||||
|
logprobs: bool | None = None,
|
||||||
|
max_tokens: int | None = None,
|
||||||
|
n: int | None = None,
|
||||||
|
presence_penalty: float | None = None,
|
||||||
|
seed: int | None = None,
|
||||||
|
stop: str | list[str] | None = None,
|
||||||
|
stream: bool | None = None,
|
||||||
|
stream_options: dict[str, Any] | None = None,
|
||||||
|
temperature: float | None = None,
|
||||||
|
top_p: float | None = None,
|
||||||
|
user: str | None = None,
|
||||||
|
guided_choice: list[str] | None = None,
|
||||||
|
prompt_logprobs: int | None = None,
|
||||||
|
suffix: str | None = None,
|
||||||
|
) -> OpenAICompletion:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
async def shutdown(self):
|
async def openai_embeddings(
|
||||||
await super().shutdown()
|
self,
|
||||||
|
model: str,
|
||||||
|
input: str | list[str],
|
||||||
|
encoding_format: str | None = "float",
|
||||||
|
dimensions: int | None = None,
|
||||||
|
user: str | None = None,
|
||||||
|
) -> OpenAIEmbeddingsResponse:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,8 @@ async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference:
|
||||||
|
|
||||||
if not isinstance(config, NVIDIAConfig):
|
if not isinstance(config, NVIDIAConfig):
|
||||||
raise RuntimeError(f"Unexpected config type: {type(config)}")
|
raise RuntimeError(f"Unexpected config type: {type(config)}")
|
||||||
adapter = NVIDIAInferenceAdapter(config)
|
adapter = NVIDIAInferenceAdapter(config=config)
|
||||||
|
await adapter.initialize()
|
||||||
return adapter
|
return adapter
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@
|
||||||
from openai import NOT_GIVEN
|
from openai import NOT_GIVEN
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
Inference,
|
|
||||||
OpenAIEmbeddingData,
|
OpenAIEmbeddingData,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
OpenAIEmbeddingUsage,
|
OpenAIEmbeddingUsage,
|
||||||
|
|
@ -22,7 +21,9 @@ from .utils import _is_nvidia_hosted
|
||||||
logger = get_logger(name=__name__, category="inference::nvidia")
|
logger = get_logger(name=__name__, category="inference::nvidia")
|
||||||
|
|
||||||
|
|
||||||
class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
class NVIDIAInferenceAdapter(OpenAIMixin):
|
||||||
|
config: NVIDIAConfig
|
||||||
|
|
||||||
"""
|
"""
|
||||||
NVIDIA Inference Adapter for Llama Stack.
|
NVIDIA Inference Adapter for Llama Stack.
|
||||||
|
|
||||||
|
|
@ -37,32 +38,21 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
||||||
embedding_model_metadata = {
|
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||||
"nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192},
|
"nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192},
|
||||||
"nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024},
|
"nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024},
|
||||||
"nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096},
|
"nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096},
|
||||||
"snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024},
|
"snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024},
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, config: NVIDIAConfig) -> None:
|
async def initialize(self) -> None:
|
||||||
logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")
|
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
|
||||||
|
|
||||||
if _is_nvidia_hosted(config):
|
if _is_nvidia_hosted(self.config):
|
||||||
if not config.api_key:
|
if not self.config.api_key:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM."
|
"API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM."
|
||||||
)
|
)
|
||||||
# elif self._config.api_key:
|
|
||||||
#
|
|
||||||
# we don't raise this warning because a user may have deployed their
|
|
||||||
# self-hosted NIM with an API key requirement.
|
|
||||||
#
|
|
||||||
# warnings.warn(
|
|
||||||
# "API key is not required for self-hosted NVIDIA NIM. "
|
|
||||||
# "Consider removing the api_key from the configuration."
|
|
||||||
# )
|
|
||||||
|
|
||||||
self._config = config
|
|
||||||
|
|
||||||
def get_api_key(self) -> str:
|
def get_api_key(self) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -70,7 +60,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
||||||
|
|
||||||
:return: The NVIDIA API key
|
:return: The NVIDIA API key
|
||||||
"""
|
"""
|
||||||
return self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY"
|
return self.config.api_key.get_secret_value() if self.config.api_key else "NO KEY"
|
||||||
|
|
||||||
def get_base_url(self) -> str:
|
def get_base_url(self) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -78,7 +68,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
||||||
|
|
||||||
:return: The NVIDIA API base URL
|
:return: The NVIDIA API base URL
|
||||||
"""
|
"""
|
||||||
return f"{self._config.url}/v1" if self._config.append_api_version else self._config.url
|
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
|
||||||
|
|
||||||
async def openai_embeddings(
|
async def openai_embeddings(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,6 @@ from .config import OllamaImplConfig
|
||||||
async def get_adapter_impl(config: OllamaImplConfig, _deps):
|
async def get_adapter_impl(config: OllamaImplConfig, _deps):
|
||||||
from .ollama import OllamaInferenceAdapter
|
from .ollama import OllamaInferenceAdapter
|
||||||
|
|
||||||
impl = OllamaInferenceAdapter(config)
|
impl = OllamaInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -6,58 +6,29 @@
|
||||||
|
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from ollama import AsyncClient as AsyncOllamaClient
|
from ollama import AsyncClient as AsyncOllamaClient
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import (
|
|
||||||
ImageContentItem,
|
|
||||||
TextContentItem,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.common.errors import UnsupportedModelError
|
from llama_stack.apis.common.errors import UnsupportedModelError
|
||||||
from llama_stack.apis.inference import (
|
|
||||||
ChatCompletionRequest,
|
|
||||||
GrammarResponseFormat,
|
|
||||||
InferenceProvider,
|
|
||||||
JsonSchemaResponseFormat,
|
|
||||||
Message,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.models import Model
|
from llama_stack.apis.models import Model
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.models.llama.sku_types import CoreModelId
|
|
||||||
from llama_stack.providers.datatypes import (
|
from llama_stack.providers.datatypes import (
|
||||||
HealthResponse,
|
HealthResponse,
|
||||||
HealthStatus,
|
HealthStatus,
|
||||||
ModelsProtocolPrivate,
|
|
||||||
)
|
)
|
||||||
from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
|
from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
|
||||||
ModelRegistryHelper,
|
|
||||||
build_hf_repo_model_entry,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
|
||||||
get_sampling_options,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
|
||||||
chat_completion_request_to_prompt,
|
|
||||||
convert_image_content_to_url,
|
|
||||||
request_has_media,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="inference::ollama")
|
logger = get_logger(name=__name__, category="inference::ollama")
|
||||||
|
|
||||||
|
|
||||||
class OllamaInferenceAdapter(
|
class OllamaInferenceAdapter(OpenAIMixin):
|
||||||
OpenAIMixin,
|
config: OllamaImplConfig
|
||||||
ModelRegistryHelper,
|
|
||||||
InferenceProvider,
|
|
||||||
ModelsProtocolPrivate,
|
|
||||||
):
|
|
||||||
# automatically set by the resolver when instantiating the provider
|
# automatically set by the resolver when instantiating the provider
|
||||||
__provider_id__: str
|
__provider_id__: str
|
||||||
|
|
||||||
embedding_model_metadata = {
|
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||||
"all-minilm:l6-v2": {
|
"all-minilm:l6-v2": {
|
||||||
"embedding_dimension": 384,
|
"embedding_dimension": 384,
|
||||||
"context_length": 512,
|
"context_length": 512,
|
||||||
|
|
@ -76,29 +47,8 @@ class OllamaInferenceAdapter(
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, config: OllamaImplConfig) -> None:
|
download_images: bool = True
|
||||||
# TODO: remove ModelRegistryHelper.__init__ when completion and
|
_clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {}
|
||||||
# chat_completion are. this exists to satisfy the input /
|
|
||||||
# output processing for llama models. specifically,
|
|
||||||
# tool_calling is handled by raw template processing,
|
|
||||||
# instead of using the /api/chat endpoint w/ tools=...
|
|
||||||
ModelRegistryHelper.__init__(
|
|
||||||
self,
|
|
||||||
model_entries=[
|
|
||||||
build_hf_repo_model_entry(
|
|
||||||
"llama3.2:3b-instruct-fp16",
|
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
|
||||||
),
|
|
||||||
build_hf_repo_model_entry(
|
|
||||||
"llama-guard3:1b",
|
|
||||||
CoreModelId.llama_guard_3_1b.value,
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
self.config = config
|
|
||||||
# Ollama does not support image urls, so we need to download the image and convert it to base64
|
|
||||||
self.download_images = True
|
|
||||||
self._clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {}
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ollama_client(self) -> AsyncOllamaClient:
|
def ollama_client(self) -> AsyncOllamaClient:
|
||||||
|
|
@ -142,50 +92,6 @@ class OllamaInferenceAdapter(
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
self._clients.clear()
|
self._clients.clear()
|
||||||
|
|
||||||
async def _get_model(self, model_id: str) -> Model:
|
|
||||||
if not self.model_store:
|
|
||||||
raise ValueError("Model store not set")
|
|
||||||
return await self.model_store.get_model(model_id)
|
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
|
||||||
sampling_options = get_sampling_options(request.sampling_params)
|
|
||||||
# This is needed since the Ollama API expects num_predict to be set
|
|
||||||
# for early truncation instead of max_tokens.
|
|
||||||
if sampling_options.get("max_tokens") is not None:
|
|
||||||
sampling_options["num_predict"] = sampling_options["max_tokens"]
|
|
||||||
|
|
||||||
input_dict: dict[str, Any] = {}
|
|
||||||
media_present = request_has_media(request)
|
|
||||||
llama_model = self.get_llama_model(request.model)
|
|
||||||
if media_present or not llama_model:
|
|
||||||
contents = [await convert_message_to_openai_dict_for_ollama(m) for m in request.messages]
|
|
||||||
# flatten the list of lists
|
|
||||||
input_dict["messages"] = [item for sublist in contents for item in sublist]
|
|
||||||
else:
|
|
||||||
input_dict["raw"] = True
|
|
||||||
input_dict["prompt"] = await chat_completion_request_to_prompt(
|
|
||||||
request,
|
|
||||||
llama_model,
|
|
||||||
)
|
|
||||||
|
|
||||||
if fmt := request.response_format:
|
|
||||||
if isinstance(fmt, JsonSchemaResponseFormat):
|
|
||||||
input_dict["format"] = fmt.json_schema
|
|
||||||
elif isinstance(fmt, GrammarResponseFormat):
|
|
||||||
raise NotImplementedError("Grammar response format is not supported")
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown response format type: {fmt.type}")
|
|
||||||
|
|
||||||
params = {
|
|
||||||
"model": request.model,
|
|
||||||
**input_dict,
|
|
||||||
"options": sampling_options,
|
|
||||||
"stream": request.stream,
|
|
||||||
}
|
|
||||||
logger.debug(f"params to ollama: {params}")
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
async def register_model(self, model: Model) -> Model:
|
async def register_model(self, model: Model) -> Model:
|
||||||
if await self.check_model_availability(model.provider_model_id):
|
if await self.check_model_availability(model.provider_model_id):
|
||||||
return model
|
return model
|
||||||
|
|
@ -197,24 +103,3 @@ class OllamaInferenceAdapter(
|
||||||
return model
|
return model
|
||||||
|
|
||||||
raise UnsupportedModelError(model.provider_model_id, list(self._model_cache.keys()))
|
raise UnsupportedModelError(model.provider_model_id, list(self._model_cache.keys()))
|
||||||
|
|
||||||
|
|
||||||
async def convert_message_to_openai_dict_for_ollama(message: Message) -> list[dict]:
|
|
||||||
async def _convert_content(content) -> dict:
|
|
||||||
if isinstance(content, ImageContentItem):
|
|
||||||
return {
|
|
||||||
"role": message.role,
|
|
||||||
"images": [await convert_image_content_to_url(content, download=True, include_format=False)],
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
text = content.text if isinstance(content, TextContentItem) else content
|
|
||||||
assert isinstance(text, str)
|
|
||||||
return {
|
|
||||||
"role": message.role,
|
|
||||||
"content": text,
|
|
||||||
}
|
|
||||||
|
|
||||||
if isinstance(message.content, list):
|
|
||||||
return [await _convert_content(c) for c in message.content]
|
|
||||||
else:
|
|
||||||
return [await _convert_content(message.content)]
|
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,6 @@ from .config import OpenAIConfig
|
||||||
async def get_adapter_impl(config: OpenAIConfig, _deps):
|
async def get_adapter_impl(config: OpenAIConfig, _deps):
|
||||||
from .openai import OpenAIInferenceAdapter
|
from .openai import OpenAIInferenceAdapter
|
||||||
|
|
||||||
impl = OpenAIInferenceAdapter(config)
|
impl = OpenAIInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import OpenAIConfig
|
from .config import OpenAIConfig
|
||||||
|
|
@ -14,52 +13,24 @@ logger = get_logger(name=__name__, category="inference::openai")
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# This OpenAI adapter implements Inference methods using two mixins -
|
# This OpenAI adapter implements Inference methods using OpenAIMixin
|
||||||
#
|
#
|
||||||
# | Inference Method | Implementation Source |
|
class OpenAIInferenceAdapter(OpenAIMixin):
|
||||||
# |----------------------------|--------------------------|
|
|
||||||
# | completion | LiteLLMOpenAIMixin |
|
|
||||||
# | chat_completion | LiteLLMOpenAIMixin |
|
|
||||||
# | embedding | LiteLLMOpenAIMixin |
|
|
||||||
# | openai_completion | OpenAIMixin |
|
|
||||||
# | openai_chat_completion | OpenAIMixin |
|
|
||||||
# | openai_embeddings | OpenAIMixin |
|
|
||||||
#
|
|
||||||
class OpenAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
|
||||||
"""
|
"""
|
||||||
OpenAI Inference Adapter for Llama Stack.
|
OpenAI Inference Adapter for Llama Stack.
|
||||||
|
|
||||||
Note: The inheritance order is important here. OpenAIMixin must come before
|
|
||||||
LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
|
|
||||||
is used instead of ModelRegistryHelper.check_model_availability().
|
|
||||||
|
|
||||||
- OpenAIMixin.check_model_availability() queries the OpenAI API to check if a model exists
|
|
||||||
- ModelRegistryHelper.check_model_availability() (inherited by LiteLLMOpenAIMixin) just returns False and shows a warning
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
embedding_model_metadata = {
|
config: OpenAIConfig
|
||||||
|
|
||||||
|
provider_data_api_key_field: str = "openai_api_key"
|
||||||
|
|
||||||
|
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||||
"text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192},
|
"text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192},
|
||||||
"text-embedding-3-large": {"embedding_dimension": 3072, "context_length": 8192},
|
"text-embedding-3-large": {"embedding_dimension": 3072, "context_length": 8192},
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, config: OpenAIConfig) -> None:
|
def get_api_key(self) -> str:
|
||||||
LiteLLMOpenAIMixin.__init__(
|
return self.config.api_key or ""
|
||||||
self,
|
|
||||||
litellm_provider_name="openai",
|
|
||||||
api_key_from_config=config.api_key,
|
|
||||||
provider_data_api_key_field="openai_api_key",
|
|
||||||
)
|
|
||||||
self.config = config
|
|
||||||
# we set is_openai_compat so users can use the canonical
|
|
||||||
# openai model names like "gpt-4" or "gpt-3.5-turbo"
|
|
||||||
# and the model name will be translated to litellm's
|
|
||||||
# "openai/gpt-4" or "openai/gpt-3.5-turbo" transparently.
|
|
||||||
# if we do not set this, users will be exposed to the
|
|
||||||
# litellm specific model names, an abstraction leak.
|
|
||||||
self.is_openai_compat = True
|
|
||||||
|
|
||||||
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
|
||||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
|
||||||
|
|
||||||
def get_base_url(self) -> str:
|
def get_base_url(self) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -68,9 +39,3 @@ class OpenAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
Returns the OpenAI API base URL from the configuration.
|
Returns the OpenAI API base URL from the configuration.
|
||||||
"""
|
"""
|
||||||
return self.config.base_url
|
return self.config.base_url
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
await super().initialize()
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
await super().shutdown()
|
|
||||||
|
|
|
||||||
|
|
@ -31,12 +31,6 @@ class PassthroughInferenceAdapter(Inference):
|
||||||
ModelRegistryHelper.__init__(self)
|
ModelRegistryHelper.__init__(self)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def unregister_model(self, model_id: str) -> None:
|
async def unregister_model(self, model_id: str) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -53,12 +53,6 @@ class RunpodInferenceAdapter(
|
||||||
ModelRegistryHelper.__init__(self, stack_to_provider_models_map=RUNPOD_SUPPORTED_MODELS)
|
ModelRegistryHelper.__init__(self, stack_to_provider_models_map=RUNPOD_SUPPORTED_MODELS)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
return
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _get_params(self, request: ChatCompletionRequest) -> dict:
|
def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||||
return {
|
return {
|
||||||
"model": self.map_to_provider_model(request.model),
|
"model": self.map_to_provider_model(request.model),
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,6 @@ async def get_adapter_impl(config: SambaNovaImplConfig, _deps):
|
||||||
from .sambanova import SambaNovaInferenceAdapter
|
from .sambanova import SambaNovaInferenceAdapter
|
||||||
|
|
||||||
assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}"
|
assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}"
|
||||||
impl = SambaNovaInferenceAdapter(config)
|
impl = SambaNovaInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -5,39 +5,22 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import SambaNovaImplConfig
|
from .config import SambaNovaImplConfig
|
||||||
|
|
||||||
|
|
||||||
class SambaNovaInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
class SambaNovaInferenceAdapter(OpenAIMixin):
|
||||||
|
config: SambaNovaImplConfig
|
||||||
|
|
||||||
|
provider_data_api_key_field: str = "sambanova_api_key"
|
||||||
|
download_images: bool = True # SambaNova does not support image downloads server-size, perform them on the client
|
||||||
"""
|
"""
|
||||||
SambaNova Inference Adapter for Llama Stack.
|
SambaNova Inference Adapter for Llama Stack.
|
||||||
|
|
||||||
Note: The inheritance order is important here. OpenAIMixin must come before
|
|
||||||
LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
|
|
||||||
is used instead of LiteLLMOpenAIMixin.check_model_availability().
|
|
||||||
|
|
||||||
- OpenAIMixin.check_model_availability() queries the /v1/models to check if a model exists
|
|
||||||
- LiteLLMOpenAIMixin.check_model_availability() checks the static registry within LiteLLM
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config: SambaNovaImplConfig):
|
def get_api_key(self) -> str:
|
||||||
self.config = config
|
return self.config.api_key.get_secret_value() if self.config.api_key else ""
|
||||||
self.environment_available_models: list[str] = []
|
|
||||||
LiteLLMOpenAIMixin.__init__(
|
|
||||||
self,
|
|
||||||
litellm_provider_name="sambanova",
|
|
||||||
api_key_from_config=self.config.api_key.get_secret_value() if self.config.api_key else None,
|
|
||||||
provider_data_api_key_field="sambanova_api_key",
|
|
||||||
openai_compat_api_base=self.config.url,
|
|
||||||
download_images=True, # SambaNova requires base64 image encoding
|
|
||||||
json_schema_strict=False, # SambaNova doesn't support strict=True yet
|
|
||||||
)
|
|
||||||
|
|
||||||
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
|
||||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
|
||||||
|
|
||||||
def get_base_url(self) -> str:
|
def get_base_url(self) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -5,53 +5,21 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
from huggingface_hub import AsyncInferenceClient, HfApi
|
from huggingface_hub import AsyncInferenceClient, HfApi
|
||||||
from pydantic import SecretStr
|
from pydantic import SecretStr
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import OpenAIEmbeddingsResponse
|
||||||
ChatCompletionRequest,
|
|
||||||
Inference,
|
|
||||||
OpenAIEmbeddingsResponse,
|
|
||||||
ResponseFormat,
|
|
||||||
ResponseFormatType,
|
|
||||||
SamplingParams,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.models import Model
|
|
||||||
from llama_stack.apis.models.models import ModelType
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
|
||||||
ModelRegistryHelper,
|
|
||||||
build_hf_repo_model_entry,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
|
||||||
get_sampling_options,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
|
||||||
chat_completion_request_to_model_input_info,
|
|
||||||
)
|
|
||||||
|
|
||||||
from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
|
from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
|
||||||
|
|
||||||
log = get_logger(name=__name__, category="inference::tgi")
|
log = get_logger(name=__name__, category="inference::tgi")
|
||||||
|
|
||||||
|
|
||||||
def build_hf_repo_model_entries():
|
class _HfAdapter(OpenAIMixin):
|
||||||
return [
|
|
||||||
build_hf_repo_model_entry(
|
|
||||||
model.huggingface_repo,
|
|
||||||
model.descriptor(),
|
|
||||||
)
|
|
||||||
for model in all_registered_models()
|
|
||||||
if model.huggingface_repo
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class _HfAdapter(
|
|
||||||
OpenAIMixin,
|
|
||||||
Inference,
|
|
||||||
):
|
|
||||||
url: str
|
url: str
|
||||||
api_key: SecretStr
|
api_key: SecretStr
|
||||||
|
|
||||||
|
|
@ -61,90 +29,14 @@ class _HfAdapter(
|
||||||
|
|
||||||
overwrite_completion_id = True # TGI always returns id=""
|
overwrite_completion_id = True # TGI always returns id=""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
|
|
||||||
self.huggingface_repo_to_llama_model_id = {
|
|
||||||
model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_api_key(self):
|
def get_api_key(self):
|
||||||
return self.api_key.get_secret_value()
|
return self.api_key.get_secret_value()
|
||||||
|
|
||||||
def get_base_url(self):
|
def get_base_url(self):
|
||||||
return self.url
|
return self.url
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||||
pass
|
return [self.model_id]
|
||||||
|
|
||||||
async def list_models(self) -> list[Model] | None:
|
|
||||||
models = []
|
|
||||||
async for model in self.client.models.list():
|
|
||||||
models.append(
|
|
||||||
Model(
|
|
||||||
identifier=model.id,
|
|
||||||
provider_resource_id=model.id,
|
|
||||||
provider_id=self.__provider_id__,
|
|
||||||
metadata={},
|
|
||||||
model_type=ModelType.llm,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return models
|
|
||||||
|
|
||||||
async def register_model(self, model: Model) -> Model:
|
|
||||||
if model.provider_resource_id != self.model_id:
|
|
||||||
raise ValueError(
|
|
||||||
f"Model {model.provider_resource_id} does not match the model {self.model_id} served by TGI."
|
|
||||||
)
|
|
||||||
return model
|
|
||||||
|
|
||||||
async def unregister_model(self, model_id: str) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _get_max_new_tokens(self, sampling_params, input_tokens):
|
|
||||||
return min(
|
|
||||||
sampling_params.max_tokens or (self.max_tokens - input_tokens),
|
|
||||||
self.max_tokens - input_tokens - 1,
|
|
||||||
)
|
|
||||||
|
|
||||||
def _build_options(
|
|
||||||
self,
|
|
||||||
sampling_params: SamplingParams | None = None,
|
|
||||||
fmt: ResponseFormat = None,
|
|
||||||
):
|
|
||||||
options = get_sampling_options(sampling_params)
|
|
||||||
# TGI does not support temperature=0 when using greedy sampling
|
|
||||||
# We set it to 1e-3 instead, anything lower outputs garbage from TGI
|
|
||||||
# We can use top_p sampling strategy to specify lower temperature
|
|
||||||
if abs(options["temperature"]) < 1e-10:
|
|
||||||
options["temperature"] = 1e-3
|
|
||||||
|
|
||||||
# delete key "max_tokens" from options since its not supported by the API
|
|
||||||
options.pop("max_tokens", None)
|
|
||||||
if fmt:
|
|
||||||
if fmt.type == ResponseFormatType.json_schema.value:
|
|
||||||
options["grammar"] = {
|
|
||||||
"type": "json",
|
|
||||||
"value": fmt.json_schema,
|
|
||||||
}
|
|
||||||
elif fmt.type == ResponseFormatType.grammar.value:
|
|
||||||
raise ValueError("Grammar response format not supported yet")
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unexpected response format: {fmt.type}")
|
|
||||||
|
|
||||||
return options
|
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
|
||||||
prompt, input_tokens = await chat_completion_request_to_model_input_info(
|
|
||||||
request, self.register_helper.get_llama_model(request.model)
|
|
||||||
)
|
|
||||||
return dict(
|
|
||||||
prompt=prompt,
|
|
||||||
stream=request.stream,
|
|
||||||
details=True,
|
|
||||||
max_new_tokens=self._get_max_new_tokens(request.sampling_params, input_tokens),
|
|
||||||
stop_sequences=["<|eom_id|>", "<|eot_id|>"],
|
|
||||||
**self._build_options(request.sampling_params, request.response_format),
|
|
||||||
)
|
|
||||||
|
|
||||||
async def openai_embeddings(
|
async def openai_embeddings(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,6 @@ async def get_adapter_impl(config: TogetherImplConfig, _deps):
|
||||||
from .together import TogetherInferenceAdapter
|
from .together import TogetherInferenceAdapter
|
||||||
|
|
||||||
assert isinstance(config, TogetherImplConfig), f"Unexpected config type: {type(config)}"
|
assert isinstance(config, TogetherImplConfig), f"Unexpected config type: {type(config)}"
|
||||||
impl = TogetherInferenceAdapter(config)
|
impl = TogetherInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -5,41 +5,29 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
from openai import AsyncOpenAI
|
from collections.abc import Iterable
|
||||||
|
|
||||||
from together import AsyncTogether
|
from together import AsyncTogether
|
||||||
from together.constants import BASE_URL
|
from together.constants import BASE_URL
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
ChatCompletionRequest,
|
|
||||||
Inference,
|
|
||||||
LogProbConfig,
|
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
ResponseFormat,
|
|
||||||
ResponseFormatType,
|
|
||||||
SamplingParams,
|
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
|
from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
|
||||||
from llama_stack.apis.models import Model, ModelType
|
from llama_stack.apis.models import Model
|
||||||
from llama_stack.core.request_headers import NeedsRequestProviderData
|
from llama_stack.core.request_headers import NeedsRequestProviderData
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
|
||||||
convert_message_to_openai_dict,
|
|
||||||
get_sampling_options,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
|
||||||
chat_completion_request_to_prompt,
|
|
||||||
request_has_media,
|
|
||||||
)
|
|
||||||
|
|
||||||
from .config import TogetherImplConfig
|
from .config import TogetherImplConfig
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="inference::together")
|
logger = get_logger(name=__name__, category="inference::together")
|
||||||
|
|
||||||
|
|
||||||
class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData):
|
class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
|
||||||
embedding_model_metadata = {
|
config: TogetherImplConfig
|
||||||
|
|
||||||
|
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||||
"togethercomputer/m2-bert-80M-32k-retrieval": {"embedding_dimension": 768, "context_length": 32768},
|
"togethercomputer/m2-bert-80M-32k-retrieval": {"embedding_dimension": 768, "context_length": 32768},
|
||||||
"BAAI/bge-large-en-v1.5": {"embedding_dimension": 1024, "context_length": 512},
|
"BAAI/bge-large-en-v1.5": {"embedding_dimension": 1024, "context_length": 512},
|
||||||
"BAAI/bge-base-en-v1.5": {"embedding_dimension": 768, "context_length": 512},
|
"BAAI/bge-base-en-v1.5": {"embedding_dimension": 768, "context_length": 512},
|
||||||
|
|
@ -47,24 +35,16 @@ class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData)
|
||||||
"intfloat/multilingual-e5-large-instruct": {"embedding_dimension": 1024, "context_length": 512},
|
"intfloat/multilingual-e5-large-instruct": {"embedding_dimension": 1024, "context_length": 512},
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, config: TogetherImplConfig) -> None:
|
_model_cache: dict[str, Model] = {}
|
||||||
ModelRegistryHelper.__init__(self)
|
|
||||||
self.config = config
|
provider_data_api_key_field: str = "together_api_key"
|
||||||
self.allowed_models = config.allowed_models
|
|
||||||
self._model_cache: dict[str, Model] = {}
|
|
||||||
|
|
||||||
def get_api_key(self):
|
def get_api_key(self):
|
||||||
return self.config.api_key.get_secret_value()
|
return self.config.api_key.get_secret_value() if self.config.api_key else None
|
||||||
|
|
||||||
def get_base_url(self):
|
def get_base_url(self):
|
||||||
return BASE_URL
|
return BASE_URL
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _get_client(self) -> AsyncTogether:
|
def _get_client(self) -> AsyncTogether:
|
||||||
together_api_key = None
|
together_api_key = None
|
||||||
config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None
|
config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None
|
||||||
|
|
@ -79,90 +59,13 @@ class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData)
|
||||||
together_api_key = provider_data.together_api_key
|
together_api_key = provider_data.together_api_key
|
||||||
return AsyncTogether(api_key=together_api_key)
|
return AsyncTogether(api_key=together_api_key)
|
||||||
|
|
||||||
def _get_openai_client(self) -> AsyncOpenAI:
|
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||||
together_client = self._get_client().client
|
|
||||||
return AsyncOpenAI(
|
|
||||||
base_url=together_client.base_url,
|
|
||||||
api_key=together_client.api_key,
|
|
||||||
)
|
|
||||||
|
|
||||||
def _build_options(
|
|
||||||
self,
|
|
||||||
sampling_params: SamplingParams | None,
|
|
||||||
logprobs: LogProbConfig | None,
|
|
||||||
fmt: ResponseFormat,
|
|
||||||
) -> dict:
|
|
||||||
options = get_sampling_options(sampling_params)
|
|
||||||
if fmt:
|
|
||||||
if fmt.type == ResponseFormatType.json_schema.value:
|
|
||||||
options["response_format"] = {
|
|
||||||
"type": "json_object",
|
|
||||||
"schema": fmt.json_schema,
|
|
||||||
}
|
|
||||||
elif fmt.type == ResponseFormatType.grammar.value:
|
|
||||||
raise NotImplementedError("Grammar response format not supported yet")
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown response format {fmt.type}")
|
|
||||||
|
|
||||||
if logprobs and logprobs.top_k:
|
|
||||||
if logprobs.top_k != 1:
|
|
||||||
raise ValueError(
|
|
||||||
f"Unsupported value: Together only supports logprobs top_k=1. {logprobs.top_k} was provided",
|
|
||||||
)
|
|
||||||
options["logprobs"] = 1
|
|
||||||
|
|
||||||
return options
|
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
|
||||||
input_dict = {}
|
|
||||||
media_present = request_has_media(request)
|
|
||||||
llama_model = self.get_llama_model(request.model)
|
|
||||||
if media_present or not llama_model:
|
|
||||||
input_dict["messages"] = [await convert_message_to_openai_dict(m) for m in request.messages]
|
|
||||||
else:
|
|
||||||
input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model)
|
|
||||||
|
|
||||||
params = {
|
|
||||||
"model": request.model,
|
|
||||||
**input_dict,
|
|
||||||
"stream": request.stream,
|
|
||||||
**self._build_options(request.sampling_params, request.logprobs, request.response_format),
|
|
||||||
}
|
|
||||||
logger.debug(f"params to together: {params}")
|
|
||||||
return params
|
|
||||||
|
|
||||||
async def list_models(self) -> list[Model] | None:
|
|
||||||
self._model_cache = {}
|
|
||||||
# Together's /v1/models is not compatible with OpenAI's /v1/models. Together support ticket #13355 -> will not fix, use Together's own client
|
# Together's /v1/models is not compatible with OpenAI's /v1/models. Together support ticket #13355 -> will not fix, use Together's own client
|
||||||
for m in await self._get_client().models.list():
|
return [m.id for m in await self._get_client().models.list()]
|
||||||
if m.type == "embedding":
|
|
||||||
if m.id not in self.embedding_model_metadata:
|
|
||||||
logger.warning(f"Unknown embedding dimension for model {m.id}, skipping.")
|
|
||||||
continue
|
|
||||||
metadata = self.embedding_model_metadata[m.id]
|
|
||||||
self._model_cache[m.id] = Model(
|
|
||||||
provider_id=self.__provider_id__,
|
|
||||||
provider_resource_id=m.id,
|
|
||||||
identifier=m.id,
|
|
||||||
model_type=ModelType.embedding,
|
|
||||||
metadata=metadata,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
self._model_cache[m.id] = Model(
|
|
||||||
provider_id=self.__provider_id__,
|
|
||||||
provider_resource_id=m.id,
|
|
||||||
identifier=m.id,
|
|
||||||
model_type=ModelType.llm,
|
|
||||||
)
|
|
||||||
|
|
||||||
return self._model_cache.values()
|
|
||||||
|
|
||||||
async def should_refresh_models(self) -> bool:
|
async def should_refresh_models(self) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
async def check_model_availability(self, model):
|
|
||||||
return model in self._model_cache
|
|
||||||
|
|
||||||
async def openai_embeddings(
|
async def openai_embeddings(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
|
@ -203,4 +106,4 @@ class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData)
|
||||||
)
|
)
|
||||||
response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
|
response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
|
||||||
|
|
||||||
return response
|
return response # type: ignore[no-any-return]
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,6 @@ from .config import VertexAIConfig
|
||||||
async def get_adapter_impl(config: VertexAIConfig, _deps):
|
async def get_adapter_impl(config: VertexAIConfig, _deps):
|
||||||
from .vertexai import VertexAIInferenceAdapter
|
from .vertexai import VertexAIInferenceAdapter
|
||||||
|
|
||||||
impl = VertexAIInferenceAdapter(config)
|
impl = VertexAIInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -4,29 +4,19 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
import google.auth.transport.requests
|
import google.auth.transport.requests
|
||||||
from google.auth import default
|
from google.auth import default
|
||||||
|
|
||||||
from llama_stack.apis.inference import ChatCompletionRequest
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
|
||||||
LiteLLMOpenAIMixin,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
from .config import VertexAIConfig
|
from .config import VertexAIConfig
|
||||||
|
|
||||||
|
|
||||||
class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
class VertexAIInferenceAdapter(OpenAIMixin):
|
||||||
def __init__(self, config: VertexAIConfig) -> None:
|
config: VertexAIConfig
|
||||||
LiteLLMOpenAIMixin.__init__(
|
|
||||||
self,
|
provider_data_api_key_field: str = "vertex_project"
|
||||||
litellm_provider_name="vertex_ai",
|
|
||||||
api_key_from_config=None, # Vertex AI uses ADC, not API keys
|
|
||||||
provider_data_api_key_field="vertex_project", # Use project for validation
|
|
||||||
)
|
|
||||||
self.config = config
|
|
||||||
|
|
||||||
def get_api_key(self) -> str:
|
def get_api_key(self) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -41,8 +31,7 @@ class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
credentials.refresh(google.auth.transport.requests.Request())
|
credentials.refresh(google.auth.transport.requests.Request())
|
||||||
return str(credentials.token)
|
return str(credentials.token)
|
||||||
except Exception:
|
except Exception:
|
||||||
# If we can't get credentials, return empty string to let LiteLLM handle it
|
# If we can't get credentials, return empty string to let the env work with ADC directly
|
||||||
# This allows the LiteLLM mixin to work with ADC directly
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def get_base_url(self) -> str:
|
def get_base_url(self) -> str:
|
||||||
|
|
@ -53,23 +42,3 @@ class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||||
Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
|
Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
|
||||||
"""
|
"""
|
||||||
return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
|
return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
|
||||||
# Get base parameters from parent
|
|
||||||
params = await super()._get_params(request)
|
|
||||||
|
|
||||||
# Add Vertex AI specific parameters
|
|
||||||
provider_data = self.get_request_provider_data()
|
|
||||||
if provider_data:
|
|
||||||
if getattr(provider_data, "vertex_project", None):
|
|
||||||
params["vertex_project"] = provider_data.vertex_project
|
|
||||||
if getattr(provider_data, "vertex_location", None):
|
|
||||||
params["vertex_location"] = provider_data.vertex_location
|
|
||||||
else:
|
|
||||||
params["vertex_project"] = self.config.project
|
|
||||||
params["vertex_location"] = self.config.location
|
|
||||||
|
|
||||||
# Remove api_key since Vertex AI uses ADC
|
|
||||||
params.pop("api_key", None)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,6 @@ async def get_adapter_impl(config: VLLMInferenceAdapterConfig, _deps):
|
||||||
from .vllm import VLLMInferenceAdapter
|
from .vllm import VLLMInferenceAdapter
|
||||||
|
|
||||||
assert isinstance(config, VLLMInferenceAdapterConfig), f"Unexpected config type: {type(config)}"
|
assert isinstance(config, VLLMInferenceAdapterConfig), f"Unexpected config type: {type(config)}"
|
||||||
impl = VLLMInferenceAdapter(config)
|
impl = VLLMInferenceAdapter(config=config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -3,56 +3,26 @@
|
||||||
#
|
#
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
import json
|
from collections.abc import AsyncIterator
|
||||||
from collections.abc import AsyncGenerator, AsyncIterator
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from openai import APIConnectionError
|
|
||||||
from openai.types.chat.chat_completion_chunk import (
|
from openai.types.chat.chat_completion_chunk import (
|
||||||
ChatCompletionChunk as OpenAIChatCompletionChunk,
|
ChatCompletionChunk as OpenAIChatCompletionChunk,
|
||||||
)
|
)
|
||||||
|
from pydantic import ConfigDict
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import (
|
|
||||||
TextDelta,
|
|
||||||
ToolCallDelta,
|
|
||||||
ToolCallParseStatus,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
ChatCompletionRequest,
|
|
||||||
ChatCompletionResponseEvent,
|
|
||||||
ChatCompletionResponseEventType,
|
|
||||||
ChatCompletionResponseStreamChunk,
|
|
||||||
GrammarResponseFormat,
|
|
||||||
Inference,
|
|
||||||
JsonSchemaResponseFormat,
|
|
||||||
ModelStore,
|
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
OpenAIResponseFormatParam,
|
OpenAIResponseFormatParam,
|
||||||
ToolChoice,
|
ToolChoice,
|
||||||
ToolDefinition,
|
|
||||||
)
|
)
|
||||||
from llama_stack.apis.models import Model, ModelType
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
|
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
from llama_stack.providers.datatypes import (
|
from llama_stack.providers.datatypes import (
|
||||||
HealthResponse,
|
HealthResponse,
|
||||||
HealthStatus,
|
HealthStatus,
|
||||||
ModelsProtocolPrivate,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
|
||||||
ModelRegistryHelper,
|
|
||||||
build_hf_repo_model_entry,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
|
||||||
UnparseableToolCall,
|
|
||||||
convert_message_to_openai_dict,
|
|
||||||
convert_tool_call,
|
|
||||||
get_sampling_options,
|
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
|
|
@ -61,210 +31,15 @@ from .config import VLLMInferenceAdapterConfig
|
||||||
log = get_logger(name=__name__, category="inference::vllm")
|
log = get_logger(name=__name__, category="inference::vllm")
|
||||||
|
|
||||||
|
|
||||||
def build_hf_repo_model_entries():
|
class VLLMInferenceAdapter(OpenAIMixin):
|
||||||
return [
|
config: VLLMInferenceAdapterConfig
|
||||||
build_hf_repo_model_entry(
|
|
||||||
model.huggingface_repo,
|
|
||||||
model.descriptor(),
|
|
||||||
)
|
|
||||||
for model in all_registered_models()
|
|
||||||
if model.huggingface_repo
|
|
||||||
]
|
|
||||||
|
|
||||||
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||||
|
|
||||||
def _convert_to_vllm_tool_calls_in_response(
|
provider_data_api_key_field: str = "vllm_api_token"
|
||||||
tool_calls,
|
|
||||||
) -> list[ToolCall]:
|
|
||||||
if not tool_calls:
|
|
||||||
return []
|
|
||||||
|
|
||||||
return [
|
def get_api_key(self) -> str:
|
||||||
ToolCall(
|
return self.config.api_token or ""
|
||||||
call_id=call.id,
|
|
||||||
tool_name=call.function.name,
|
|
||||||
arguments=call.function.arguments,
|
|
||||||
)
|
|
||||||
for call in tool_calls
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def _convert_to_vllm_tools_in_request(tools: list[ToolDefinition]) -> list[dict]:
|
|
||||||
compat_tools = []
|
|
||||||
|
|
||||||
for tool in tools:
|
|
||||||
# The tool.tool_name can be a str or a BuiltinTool enum. If
|
|
||||||
# it's the latter, convert to a string.
|
|
||||||
tool_name = tool.tool_name
|
|
||||||
if isinstance(tool_name, BuiltinTool):
|
|
||||||
tool_name = tool_name.value
|
|
||||||
|
|
||||||
compat_tool = {
|
|
||||||
"type": "function",
|
|
||||||
"function": {
|
|
||||||
"name": tool_name,
|
|
||||||
"description": tool.description,
|
|
||||||
"parameters": tool.input_schema
|
|
||||||
or {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {},
|
|
||||||
"required": [],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
compat_tools.append(compat_tool)
|
|
||||||
|
|
||||||
return compat_tools
|
|
||||||
|
|
||||||
|
|
||||||
def _convert_to_vllm_finish_reason(finish_reason: str) -> StopReason:
|
|
||||||
return {
|
|
||||||
"stop": StopReason.end_of_turn,
|
|
||||||
"length": StopReason.out_of_tokens,
|
|
||||||
"tool_calls": StopReason.end_of_message,
|
|
||||||
}.get(finish_reason, StopReason.end_of_turn)
|
|
||||||
|
|
||||||
|
|
||||||
def _process_vllm_chat_completion_end_of_stream(
|
|
||||||
finish_reason: str | None,
|
|
||||||
last_chunk_content: str | None,
|
|
||||||
current_event_type: ChatCompletionResponseEventType,
|
|
||||||
tool_call_bufs: dict[str, UnparseableToolCall] | None = None,
|
|
||||||
) -> list[OpenAIChatCompletionChunk]:
|
|
||||||
chunks = []
|
|
||||||
|
|
||||||
if finish_reason is not None:
|
|
||||||
stop_reason = _convert_to_vllm_finish_reason(finish_reason)
|
|
||||||
else:
|
|
||||||
stop_reason = StopReason.end_of_message
|
|
||||||
|
|
||||||
tool_call_bufs = tool_call_bufs or {}
|
|
||||||
for _index, tool_call_buf in sorted(tool_call_bufs.items()):
|
|
||||||
args_str = tool_call_buf.arguments or "{}"
|
|
||||||
try:
|
|
||||||
chunks.append(
|
|
||||||
ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=current_event_type,
|
|
||||||
delta=ToolCallDelta(
|
|
||||||
tool_call=ToolCall(
|
|
||||||
call_id=tool_call_buf.call_id,
|
|
||||||
tool_name=tool_call_buf.tool_name,
|
|
||||||
arguments=args_str,
|
|
||||||
),
|
|
||||||
parse_status=ToolCallParseStatus.succeeded,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
log.warning(f"Failed to parse tool call buffer arguments: {args_str} \nError: {e}")
|
|
||||||
|
|
||||||
chunks.append(
|
|
||||||
ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=ChatCompletionResponseEventType.progress,
|
|
||||||
delta=ToolCallDelta(
|
|
||||||
tool_call=str(tool_call_buf),
|
|
||||||
parse_status=ToolCallParseStatus.failed,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
chunks.append(
|
|
||||||
ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=ChatCompletionResponseEventType.complete,
|
|
||||||
delta=TextDelta(text=last_chunk_content or ""),
|
|
||||||
logprobs=None,
|
|
||||||
stop_reason=stop_reason,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return chunks
|
|
||||||
|
|
||||||
|
|
||||||
async def _process_vllm_chat_completion_stream_response(
|
|
||||||
stream: AsyncGenerator[OpenAIChatCompletionChunk, None],
|
|
||||||
) -> AsyncGenerator:
|
|
||||||
yield ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=ChatCompletionResponseEventType.start,
|
|
||||||
delta=TextDelta(text=""),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
event_type = ChatCompletionResponseEventType.progress
|
|
||||||
tool_call_bufs: dict[str, UnparseableToolCall] = {}
|
|
||||||
end_of_stream_processed = False
|
|
||||||
|
|
||||||
async for chunk in stream:
|
|
||||||
if not chunk.choices:
|
|
||||||
log.warning("vLLM failed to generation any completions - check the vLLM server logs for an error.")
|
|
||||||
return
|
|
||||||
choice = chunk.choices[0]
|
|
||||||
if choice.delta.tool_calls:
|
|
||||||
for delta_tool_call in choice.delta.tool_calls:
|
|
||||||
tool_call = convert_tool_call(delta_tool_call)
|
|
||||||
if delta_tool_call.index not in tool_call_bufs:
|
|
||||||
tool_call_bufs[delta_tool_call.index] = UnparseableToolCall()
|
|
||||||
tool_call_buf = tool_call_bufs[delta_tool_call.index]
|
|
||||||
tool_call_buf.tool_name += str(tool_call.tool_name)
|
|
||||||
tool_call_buf.call_id += tool_call.call_id
|
|
||||||
tool_call_buf.arguments += (
|
|
||||||
tool_call.arguments if isinstance(tool_call.arguments, str) else json.dumps(tool_call.arguments)
|
|
||||||
)
|
|
||||||
if choice.finish_reason:
|
|
||||||
chunks = _process_vllm_chat_completion_end_of_stream(
|
|
||||||
finish_reason=choice.finish_reason,
|
|
||||||
last_chunk_content=choice.delta.content,
|
|
||||||
current_event_type=event_type,
|
|
||||||
tool_call_bufs=tool_call_bufs,
|
|
||||||
)
|
|
||||||
for c in chunks:
|
|
||||||
yield c
|
|
||||||
end_of_stream_processed = True
|
|
||||||
elif not choice.delta.tool_calls:
|
|
||||||
yield ChatCompletionResponseStreamChunk(
|
|
||||||
event=ChatCompletionResponseEvent(
|
|
||||||
event_type=event_type,
|
|
||||||
delta=TextDelta(text=choice.delta.content or ""),
|
|
||||||
logprobs=None,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
event_type = ChatCompletionResponseEventType.progress
|
|
||||||
|
|
||||||
if end_of_stream_processed:
|
|
||||||
return
|
|
||||||
|
|
||||||
# the stream ended without a chunk containing finish_reason - we have to generate the
|
|
||||||
# respective completion chunks manually
|
|
||||||
chunks = _process_vllm_chat_completion_end_of_stream(
|
|
||||||
finish_reason=None, last_chunk_content=None, current_event_type=event_type, tool_call_bufs=tool_call_bufs
|
|
||||||
)
|
|
||||||
for c in chunks:
|
|
||||||
yield c
|
|
||||||
|
|
||||||
|
|
||||||
class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsProtocolPrivate):
|
|
||||||
# automatically set by the resolver when instantiating the provider
|
|
||||||
__provider_id__: str
|
|
||||||
model_store: ModelStore | None = None
|
|
||||||
|
|
||||||
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
|
|
||||||
LiteLLMOpenAIMixin.__init__(
|
|
||||||
self,
|
|
||||||
model_entries=build_hf_repo_model_entries(),
|
|
||||||
litellm_provider_name="vllm",
|
|
||||||
api_key_from_config=config.api_token,
|
|
||||||
provider_data_api_key_field="vllm_api_token",
|
|
||||||
openai_compat_api_base=config.url,
|
|
||||||
)
|
|
||||||
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
|
|
||||||
self.config = config
|
|
||||||
|
|
||||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
|
||||||
|
|
||||||
def get_base_url(self) -> str:
|
def get_base_url(self) -> str:
|
||||||
"""Get the base URL from config."""
|
"""Get the base URL from config."""
|
||||||
|
|
@ -282,27 +57,6 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro
|
||||||
# Strictly respecting the refresh_models directive
|
# Strictly respecting the refresh_models directive
|
||||||
return self.config.refresh_models
|
return self.config.refresh_models
|
||||||
|
|
||||||
async def list_models(self) -> list[Model] | None:
|
|
||||||
models = []
|
|
||||||
async for m in self.client.models.list():
|
|
||||||
model_type = ModelType.llm # unclear how to determine embedding vs. llm models
|
|
||||||
models.append(
|
|
||||||
Model(
|
|
||||||
identifier=m.id,
|
|
||||||
provider_resource_id=m.id,
|
|
||||||
provider_id=self.__provider_id__,
|
|
||||||
metadata={},
|
|
||||||
model_type=model_type,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return models
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def unregister_model(self, model_id: str) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def health(self) -> HealthResponse:
|
async def health(self) -> HealthResponse:
|
||||||
"""
|
"""
|
||||||
Performs a health check by verifying connectivity to the remote vLLM server.
|
Performs a health check by verifying connectivity to the remote vLLM server.
|
||||||
|
|
@ -324,63 +78,9 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
||||||
|
|
||||||
async def _get_model(self, model_id: str) -> Model:
|
|
||||||
if not self.model_store:
|
|
||||||
raise ValueError("Model store not set")
|
|
||||||
return await self.model_store.get_model(model_id)
|
|
||||||
|
|
||||||
def get_extra_client_params(self):
|
def get_extra_client_params(self):
|
||||||
return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
|
return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
|
||||||
|
|
||||||
async def register_model(self, model: Model) -> Model:
|
|
||||||
try:
|
|
||||||
model = await self.register_helper.register_model(model)
|
|
||||||
except ValueError:
|
|
||||||
pass # Ignore statically unknown model, will check live listing
|
|
||||||
try:
|
|
||||||
res = self.client.models.list()
|
|
||||||
except APIConnectionError as e:
|
|
||||||
raise ValueError(
|
|
||||||
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
|
|
||||||
) from e
|
|
||||||
available_models = [m.id async for m in res]
|
|
||||||
if model.provider_resource_id not in available_models:
|
|
||||||
raise ValueError(
|
|
||||||
f"Model {model.provider_resource_id} is not being served by vLLM. "
|
|
||||||
f"Available models: {', '.join(available_models)}"
|
|
||||||
)
|
|
||||||
return model
|
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
|
||||||
options = get_sampling_options(request.sampling_params)
|
|
||||||
if "max_tokens" not in options:
|
|
||||||
options["max_tokens"] = self.config.max_tokens
|
|
||||||
|
|
||||||
input_dict: dict[str, Any] = {}
|
|
||||||
# Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM.
|
|
||||||
if isinstance(request, ChatCompletionRequest) and request.tools:
|
|
||||||
input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}
|
|
||||||
|
|
||||||
input_dict["messages"] = [await convert_message_to_openai_dict(m, download=True) for m in request.messages]
|
|
||||||
|
|
||||||
if fmt := request.response_format:
|
|
||||||
if isinstance(fmt, JsonSchemaResponseFormat):
|
|
||||||
input_dict["extra_body"] = {"guided_json": fmt.json_schema}
|
|
||||||
elif isinstance(fmt, GrammarResponseFormat):
|
|
||||||
raise NotImplementedError("Grammar response format not supported yet")
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown response format {fmt.type}")
|
|
||||||
|
|
||||||
if request.logprobs and request.logprobs.top_k:
|
|
||||||
input_dict["logprobs"] = request.logprobs.top_k
|
|
||||||
|
|
||||||
return {
|
|
||||||
"model": request.model,
|
|
||||||
**input_dict,
|
|
||||||
"stream": request.stream,
|
|
||||||
**options,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
|
|
||||||
|
|
@ -7,10 +7,11 @@
|
||||||
import base64
|
import base64
|
||||||
import uuid
|
import uuid
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from collections.abc import AsyncIterator
|
from collections.abc import AsyncIterator, Iterable
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from openai import NOT_GIVEN, AsyncOpenAI
|
from openai import NOT_GIVEN, AsyncOpenAI
|
||||||
|
from pydantic import BaseModel, ConfigDict
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
Model,
|
Model,
|
||||||
|
|
@ -26,14 +27,14 @@ from llama_stack.apis.inference import (
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.core.request_headers import NeedsRequestProviderData
|
from llama_stack.core.request_headers import NeedsRequestProviderData
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||||
from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
|
from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
|
from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="providers::utils")
|
logger = get_logger(name=__name__, category="providers::utils")
|
||||||
|
|
||||||
|
|
||||||
class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
||||||
"""
|
"""
|
||||||
Mixin class that provides OpenAI-specific functionality for inference providers.
|
Mixin class that provides OpenAI-specific functionality for inference providers.
|
||||||
This class handles direct OpenAI API calls using the AsyncOpenAI client.
|
This class handles direct OpenAI API calls using the AsyncOpenAI client.
|
||||||
|
|
@ -42,12 +43,25 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
||||||
- get_api_key(): Method to retrieve the API key
|
- get_api_key(): Method to retrieve the API key
|
||||||
- get_base_url(): Method to retrieve the OpenAI-compatible API base URL
|
- get_base_url(): Method to retrieve the OpenAI-compatible API base URL
|
||||||
|
|
||||||
|
The behavior of this class can be customized by child classes in the following ways:
|
||||||
|
- overwrite_completion_id: If True, overwrites the 'id' field in OpenAI responses
|
||||||
|
- download_images: If True, downloads images and converts to base64 for providers that require it
|
||||||
|
- embedding_model_metadata: A dictionary mapping model IDs to their embedding metadata
|
||||||
|
- provider_data_api_key_field: Optional field name in provider data to look for API key
|
||||||
|
- list_provider_model_ids: Method to list available models from the provider
|
||||||
|
- get_extra_client_params: Method to provide extra parameters to the AsyncOpenAI client
|
||||||
|
|
||||||
Expected Dependencies:
|
Expected Dependencies:
|
||||||
- self.model_store: Injected by the Llama Stack distribution system at runtime.
|
- self.model_store: Injected by the Llama Stack distribution system at runtime.
|
||||||
This provides model registry functionality for looking up registered models.
|
This provides model registry functionality for looking up registered models.
|
||||||
The model_store is set in routing_tables/common.py during provider initialization.
|
The model_store is set in routing_tables/common.py during provider initialization.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Allow extra fields so the routing infra can inject model_store, __provider_id__, etc.
|
||||||
|
model_config = ConfigDict(extra="allow")
|
||||||
|
|
||||||
|
config: RemoteInferenceProviderConfig
|
||||||
|
|
||||||
# Allow subclasses to control whether to overwrite the 'id' field in OpenAI responses
|
# Allow subclasses to control whether to overwrite the 'id' field in OpenAI responses
|
||||||
# is overwritten with a client-side generated id.
|
# is overwritten with a client-side generated id.
|
||||||
#
|
#
|
||||||
|
|
@ -73,9 +87,6 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
||||||
# Optional field name in provider data to look for API key, which takes precedence
|
# Optional field name in provider data to look for API key, which takes precedence
|
||||||
provider_data_api_key_field: str | None = None
|
provider_data_api_key_field: str | None = None
|
||||||
|
|
||||||
# automatically set by the resolver when instantiating the provider
|
|
||||||
__provider_id__: str
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_api_key(self) -> str:
|
def get_api_key(self) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
@ -111,6 +122,38 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
||||||
"""
|
"""
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||||
|
"""
|
||||||
|
List available models from the provider.
|
||||||
|
|
||||||
|
Child classes can override this method to provide a custom implementation
|
||||||
|
for listing models. The default implementation uses the AsyncOpenAI client
|
||||||
|
to list models from the OpenAI-compatible endpoint.
|
||||||
|
|
||||||
|
:return: An iterable of model IDs or None if not implemented
|
||||||
|
"""
|
||||||
|
return [m.id async for m in self.client.models.list()]
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
"""
|
||||||
|
Initialize the OpenAI mixin.
|
||||||
|
|
||||||
|
This method provides a default implementation that does nothing.
|
||||||
|
Subclasses can override this method to perform initialization tasks
|
||||||
|
such as setting up clients, validating configurations, etc.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def shutdown(self) -> None:
|
||||||
|
"""
|
||||||
|
Shutdown the OpenAI mixin.
|
||||||
|
|
||||||
|
This method provides a default implementation that does nothing.
|
||||||
|
Subclasses can override this method to perform cleanup tasks
|
||||||
|
such as closing connections, releasing resources, etc.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def client(self) -> AsyncOpenAI:
|
def client(self) -> AsyncOpenAI:
|
||||||
"""
|
"""
|
||||||
|
|
@ -371,7 +414,7 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
||||||
|
|
||||||
async def register_model(self, model: Model) -> Model:
|
async def register_model(self, model: Model) -> Model:
|
||||||
if not await self.check_model_availability(model.provider_model_id):
|
if not await self.check_model_availability(model.provider_model_id):
|
||||||
raise ValueError(f"Model {model.provider_model_id} is not available from provider {self.__provider_id__}")
|
raise ValueError(f"Model {model.provider_model_id} is not available from provider {self.__provider_id__}") # type: ignore[attr-defined]
|
||||||
return model
|
return model
|
||||||
|
|
||||||
async def unregister_model(self, model_id: str) -> None:
|
async def unregister_model(self, model_id: str) -> None:
|
||||||
|
|
@ -387,28 +430,42 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
||||||
"""
|
"""
|
||||||
self._model_cache = {}
|
self._model_cache = {}
|
||||||
|
|
||||||
async for m in self.client.models.list():
|
try:
|
||||||
if self.allowed_models and m.id not in self.allowed_models:
|
iterable = await self.list_provider_model_ids()
|
||||||
logger.info(f"Skipping model {m.id} as it is not in the allowed models list")
|
except Exception as e:
|
||||||
|
logger.error(f"{self.__class__.__name__}.list_provider_model_ids() failed with: {e}")
|
||||||
|
raise
|
||||||
|
if not hasattr(iterable, "__iter__"):
|
||||||
|
raise TypeError(
|
||||||
|
f"Failed to list models: {self.__class__.__name__}.list_provider_model_ids() must return an iterable of "
|
||||||
|
f"strings, but returned {type(iterable).__name__}"
|
||||||
|
)
|
||||||
|
|
||||||
|
provider_models_ids = list(iterable)
|
||||||
|
logger.info(f"{self.__class__.__name__}.list_provider_model_ids() returned {len(provider_models_ids)} models")
|
||||||
|
|
||||||
|
for provider_model_id in provider_models_ids:
|
||||||
|
if not isinstance(provider_model_id, str):
|
||||||
|
raise ValueError(f"Model ID {provider_model_id} from list_provider_model_ids() is not a string")
|
||||||
|
if self.allowed_models and provider_model_id not in self.allowed_models:
|
||||||
|
logger.info(f"Skipping model {provider_model_id} as it is not in the allowed models list")
|
||||||
continue
|
continue
|
||||||
if metadata := self.embedding_model_metadata.get(m.id):
|
if metadata := self.embedding_model_metadata.get(provider_model_id):
|
||||||
# This is an embedding model - augment with metadata
|
|
||||||
model = Model(
|
model = Model(
|
||||||
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
||||||
provider_resource_id=m.id,
|
provider_resource_id=provider_model_id,
|
||||||
identifier=m.id,
|
identifier=provider_model_id,
|
||||||
model_type=ModelType.embedding,
|
model_type=ModelType.embedding,
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# This is an LLM
|
|
||||||
model = Model(
|
model = Model(
|
||||||
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
||||||
provider_resource_id=m.id,
|
provider_resource_id=provider_model_id,
|
||||||
identifier=m.id,
|
identifier=provider_model_id,
|
||||||
model_type=ModelType.llm,
|
model_type=ModelType.llm,
|
||||||
)
|
)
|
||||||
self._model_cache[m.id] = model
|
self._model_cache[provider_model_id] = model
|
||||||
|
|
||||||
return list(self._model_cache.values())
|
return list(self._model_cache.values())
|
||||||
|
|
||||||
|
|
@ -425,3 +482,29 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
||||||
|
|
||||||
async def should_refresh_models(self) -> bool:
|
async def should_refresh_models(self) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
#
|
||||||
|
# The model_dump implementations are to avoid serializing the extra fields,
|
||||||
|
# e.g. model_store, which are not pydantic.
|
||||||
|
#
|
||||||
|
|
||||||
|
def _filter_fields(self, **kwargs):
|
||||||
|
"""Helper to exclude extra fields from serialization."""
|
||||||
|
# Exclude any extra fields stored in __pydantic_extra__
|
||||||
|
if hasattr(self, "__pydantic_extra__") and self.__pydantic_extra__:
|
||||||
|
exclude = kwargs.get("exclude", set())
|
||||||
|
if not isinstance(exclude, set):
|
||||||
|
exclude = set(exclude) if exclude else set()
|
||||||
|
exclude.update(self.__pydantic_extra__.keys())
|
||||||
|
kwargs["exclude"] = exclude
|
||||||
|
return kwargs
|
||||||
|
|
||||||
|
def model_dump(self, **kwargs):
|
||||||
|
"""Override to exclude extra fields from serialization."""
|
||||||
|
kwargs = self._filter_fields(**kwargs)
|
||||||
|
return super().model_dump(**kwargs)
|
||||||
|
|
||||||
|
def model_dump_json(self, **kwargs):
|
||||||
|
"""Override to exclude extra fields from JSON serialization."""
|
||||||
|
kwargs = self._filter_fields(**kwargs)
|
||||||
|
return super().model_dump_json(**kwargs)
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,43 @@ from typing import Any, TypeVar
|
||||||
from .strong_typing.schema import json_schema_type, register_schema # noqa: F401
|
from .strong_typing.schema import json_schema_type, register_schema # noqa: F401
|
||||||
|
|
||||||
|
|
||||||
|
class ExtraBodyField[T]:
|
||||||
|
"""
|
||||||
|
Marker annotation for parameters that arrive via extra_body in the client SDK.
|
||||||
|
|
||||||
|
These parameters:
|
||||||
|
- Will NOT appear in the generated client SDK method signature
|
||||||
|
- WILL be documented in OpenAPI spec under x-llama-stack-extra-body-params
|
||||||
|
- MUST be passed via the extra_body parameter in client SDK calls
|
||||||
|
- WILL be available in server-side method signature with proper typing
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```python
|
||||||
|
async def create_openai_response(
|
||||||
|
self,
|
||||||
|
input: str,
|
||||||
|
model: str,
|
||||||
|
shields: Annotated[
|
||||||
|
list[str] | None, ExtraBodyField("List of shields to apply")
|
||||||
|
] = None,
|
||||||
|
) -> ResponseObject:
|
||||||
|
# shields is available here with proper typing
|
||||||
|
if shields:
|
||||||
|
print(f"Using shields: {shields}")
|
||||||
|
```
|
||||||
|
|
||||||
|
Client usage:
|
||||||
|
```python
|
||||||
|
client.responses.create(
|
||||||
|
input="hello", model="llama-3", extra_body={"shields": ["shield-1"]}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, description: str | None = None):
|
||||||
|
self.description = description
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class WebMethod:
|
class WebMethod:
|
||||||
level: str | None = None
|
level: str | None = None
|
||||||
|
|
@ -26,7 +63,7 @@ class WebMethod:
|
||||||
deprecated: bool | None = False
|
deprecated: bool | None = False
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar("T", bound=Callable[..., Any])
|
CallableT = TypeVar("CallableT", bound=Callable[..., Any])
|
||||||
|
|
||||||
|
|
||||||
def webmethod(
|
def webmethod(
|
||||||
|
|
@ -40,7 +77,7 @@ def webmethod(
|
||||||
descriptive_name: str | None = None,
|
descriptive_name: str | None = None,
|
||||||
required_scope: str | None = None,
|
required_scope: str | None = None,
|
||||||
deprecated: bool | None = False,
|
deprecated: bool | None = False,
|
||||||
) -> Callable[[T], T]:
|
) -> Callable[[CallableT], CallableT]:
|
||||||
"""
|
"""
|
||||||
Decorator that supplies additional metadata to an endpoint operation function.
|
Decorator that supplies additional metadata to an endpoint operation function.
|
||||||
|
|
||||||
|
|
@ -51,7 +88,7 @@ def webmethod(
|
||||||
:param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer').
|
:param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer').
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def wrap(func: T) -> T:
|
def wrap(func: CallableT) -> CallableT:
|
||||||
webmethod_obj = WebMethod(
|
webmethod_obj = WebMethod(
|
||||||
route=route,
|
route=route,
|
||||||
method=method,
|
method=method,
|
||||||
|
|
|
||||||
|
|
@ -22,10 +22,18 @@ from llama_stack.log import get_logger
|
||||||
logger = get_logger(__name__, category="testing")
|
logger = get_logger(__name__, category="testing")
|
||||||
|
|
||||||
# Global state for the recording system
|
# Global state for the recording system
|
||||||
|
# Note: Using module globals instead of ContextVars because the session-scoped
|
||||||
|
# client initialization happens in one async context, but tests run in different
|
||||||
|
# contexts, and we need the mode/storage to persist across all contexts.
|
||||||
_current_mode: str | None = None
|
_current_mode: str | None = None
|
||||||
_current_storage: ResponseStorage | None = None
|
_current_storage: ResponseStorage | None = None
|
||||||
_original_methods: dict[str, Any] = {}
|
_original_methods: dict[str, Any] = {}
|
||||||
|
|
||||||
|
# Test context uses ContextVar since it changes per-test and needs async isolation
|
||||||
|
from contextvars import ContextVar
|
||||||
|
|
||||||
|
_test_context: ContextVar[str | None] = ContextVar("_test_context", default=None)
|
||||||
|
|
||||||
from openai.types.completion_choice import CompletionChoice
|
from openai.types.completion_choice import CompletionChoice
|
||||||
|
|
||||||
# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
|
# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
|
||||||
|
|
@ -33,22 +41,38 @@ CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "len
|
||||||
CompletionChoice.model_rebuild()
|
CompletionChoice.model_rebuild()
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).parent.parent.parent
|
REPO_ROOT = Path(__file__).parent.parent.parent
|
||||||
DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/recordings"
|
DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/common"
|
||||||
|
|
||||||
|
|
||||||
class InferenceMode(StrEnum):
|
class InferenceMode(StrEnum):
|
||||||
LIVE = "live"
|
LIVE = "live"
|
||||||
RECORD = "record"
|
RECORD = "record"
|
||||||
REPLAY = "replay"
|
REPLAY = "replay"
|
||||||
|
RECORD_IF_MISSING = "record-if-missing"
|
||||||
|
|
||||||
|
|
||||||
def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict[str, Any]) -> str:
|
def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict[str, Any]) -> str:
|
||||||
"""Create a normalized hash of the request for consistent matching."""
|
"""Create a normalized hash of the request for consistent matching.
|
||||||
|
|
||||||
|
Includes test_id from context to ensure test isolation - identical requests
|
||||||
|
from different tests will have different hashes.
|
||||||
|
|
||||||
|
Exception: Model list endpoints (/v1/models, /api/tags) exclude test_id since
|
||||||
|
they are infrastructure/shared and need to work across session setup and tests.
|
||||||
|
"""
|
||||||
# Extract just the endpoint path
|
# Extract just the endpoint path
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
normalized = {"method": method.upper(), "endpoint": parsed.path, "body": body}
|
normalized: dict[str, Any] = {
|
||||||
|
"method": method.upper(),
|
||||||
|
"endpoint": parsed.path,
|
||||||
|
"body": body,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Include test_id for isolation, except for shared infrastructure endpoints
|
||||||
|
if parsed.path not in ("/api/tags", "/v1/models"):
|
||||||
|
normalized["test_id"] = _test_context.get()
|
||||||
|
|
||||||
# Create hash - sort_keys=True ensures deterministic ordering
|
# Create hash - sort_keys=True ensures deterministic ordering
|
||||||
normalized_json = json.dumps(normalized, sort_keys=True)
|
normalized_json = json.dumps(normalized, sort_keys=True)
|
||||||
|
|
@ -67,7 +91,11 @@ def setup_inference_recording():
|
||||||
Currently, this is only supported for OpenAI and Ollama clients. These should cover the vast majority of use cases.
|
Currently, this is only supported for OpenAI and Ollama clients. These should cover the vast majority of use cases.
|
||||||
|
|
||||||
Two environment variables are supported:
|
Two environment variables are supported:
|
||||||
- LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', or 'replay'. Default is 'replay'.
|
- LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', 'replay', or 'record-if-missing'. Default is 'replay'.
|
||||||
|
- 'live': Make all requests live without recording
|
||||||
|
- 'record': Record all requests (overwrites existing recordings)
|
||||||
|
- 'replay': Use only recorded responses (fails if recording not found)
|
||||||
|
- 'record-if-missing': Use recorded responses when available, record new ones when not found
|
||||||
- LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in. Default is 'tests/integration/recordings'.
|
- LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in. Default is 'tests/integration/recordings'.
|
||||||
|
|
||||||
The recordings are stored as JSON files.
|
The recordings are stored as JSON files.
|
||||||
|
|
@ -80,9 +108,43 @@ def setup_inference_recording():
|
||||||
return inference_recording(mode=mode, storage_dir=storage_dir)
|
return inference_recording(mode=mode, storage_dir=storage_dir)
|
||||||
|
|
||||||
|
|
||||||
def _serialize_response(response: Any) -> Any:
|
def _normalize_response_data(data: dict[str, Any], request_hash: str) -> dict[str, Any]:
|
||||||
|
"""Normalize fields that change between recordings but don't affect functionality.
|
||||||
|
|
||||||
|
This reduces noise in git diffs by making IDs deterministic and timestamps constant.
|
||||||
|
"""
|
||||||
|
# Only normalize ID for completion/chat responses, not for model objects
|
||||||
|
# Model objects have "object": "model" and the ID is the actual model identifier
|
||||||
|
if "id" in data and data.get("object") != "model":
|
||||||
|
data["id"] = f"rec-{request_hash[:12]}"
|
||||||
|
|
||||||
|
# Normalize timestamp to epoch (0) (for OpenAI-style responses)
|
||||||
|
# But not for model objects where created timestamp might be meaningful
|
||||||
|
if "created" in data and data.get("object") != "model":
|
||||||
|
data["created"] = 0
|
||||||
|
|
||||||
|
# Normalize Ollama-specific timestamp fields
|
||||||
|
if "created_at" in data:
|
||||||
|
data["created_at"] = "1970-01-01T00:00:00.000000Z"
|
||||||
|
|
||||||
|
# Normalize Ollama-specific duration fields (these vary based on system load)
|
||||||
|
if "total_duration" in data and data["total_duration"] is not None:
|
||||||
|
data["total_duration"] = 0
|
||||||
|
if "load_duration" in data and data["load_duration"] is not None:
|
||||||
|
data["load_duration"] = 0
|
||||||
|
if "prompt_eval_duration" in data and data["prompt_eval_duration"] is not None:
|
||||||
|
data["prompt_eval_duration"] = 0
|
||||||
|
if "eval_duration" in data and data["eval_duration"] is not None:
|
||||||
|
data["eval_duration"] = 0
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_response(response: Any, request_hash: str = "") -> Any:
|
||||||
if hasattr(response, "model_dump"):
|
if hasattr(response, "model_dump"):
|
||||||
data = response.model_dump(mode="json")
|
data = response.model_dump(mode="json")
|
||||||
|
# Normalize fields to reduce noise
|
||||||
|
data = _normalize_response_data(data, request_hash)
|
||||||
return {
|
return {
|
||||||
"__type__": f"{response.__class__.__module__}.{response.__class__.__qualname__}",
|
"__type__": f"{response.__class__.__module__}.{response.__class__.__qualname__}",
|
||||||
"__data__": data,
|
"__data__": data,
|
||||||
|
|
@ -120,61 +182,121 @@ def _deserialize_response(data: dict[str, Any]) -> Any:
|
||||||
class ResponseStorage:
|
class ResponseStorage:
|
||||||
"""Handles SQLite index + JSON file storage/retrieval for inference recordings."""
|
"""Handles SQLite index + JSON file storage/retrieval for inference recordings."""
|
||||||
|
|
||||||
def __init__(self, test_dir: Path):
|
def __init__(self, base_dir: Path):
|
||||||
self.test_dir = test_dir
|
self.base_dir = base_dir
|
||||||
self.responses_dir = self.test_dir / "responses"
|
# Don't create responses_dir here - determine it per-test at runtime
|
||||||
|
|
||||||
self._ensure_directories()
|
def _get_test_dir(self) -> Path:
|
||||||
|
"""Get the recordings directory in the test file's parent directory.
|
||||||
|
|
||||||
|
For test at "tests/integration/inference/test_foo.py::test_bar",
|
||||||
|
returns "tests/integration/inference/recordings/".
|
||||||
|
"""
|
||||||
|
test_id = _test_context.get()
|
||||||
|
if test_id:
|
||||||
|
# Extract the directory path from the test nodeid
|
||||||
|
# e.g., "tests/integration/inference/test_basic.py::test_foo[params]"
|
||||||
|
# -> get "tests/integration/inference"
|
||||||
|
test_file = test_id.split("::")[0] # Remove test function part
|
||||||
|
test_dir = Path(test_file).parent # Get parent directory
|
||||||
|
|
||||||
|
# Put recordings in a "recordings" subdirectory of the test's parent dir
|
||||||
|
# e.g., "tests/integration/inference" -> "tests/integration/inference/recordings"
|
||||||
|
return test_dir / "recordings"
|
||||||
|
else:
|
||||||
|
# Fallback for non-test contexts
|
||||||
|
return self.base_dir / "recordings"
|
||||||
|
|
||||||
def _ensure_directories(self):
|
def _ensure_directories(self):
|
||||||
self.test_dir.mkdir(parents=True, exist_ok=True)
|
"""Ensure test-specific directories exist."""
|
||||||
self.responses_dir.mkdir(exist_ok=True)
|
test_dir = self._get_test_dir()
|
||||||
|
test_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
return test_dir
|
||||||
|
|
||||||
def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
|
def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
|
||||||
"""Store a request/response pair."""
|
"""Store a request/response pair."""
|
||||||
# Generate unique response filename
|
responses_dir = self._ensure_directories()
|
||||||
short_hash = request_hash[:12]
|
|
||||||
response_file = f"{short_hash}.json"
|
# Use FULL hash (not truncated)
|
||||||
|
response_file = f"{request_hash}.json"
|
||||||
|
|
||||||
# Serialize response body if needed
|
# Serialize response body if needed
|
||||||
serialized_response = dict(response)
|
serialized_response = dict(response)
|
||||||
if "body" in serialized_response:
|
if "body" in serialized_response:
|
||||||
if isinstance(serialized_response["body"], list):
|
if isinstance(serialized_response["body"], list):
|
||||||
# Handle streaming responses (list of chunks)
|
# Handle streaming responses (list of chunks)
|
||||||
serialized_response["body"] = [_serialize_response(chunk) for chunk in serialized_response["body"]]
|
serialized_response["body"] = [
|
||||||
|
_serialize_response(chunk, request_hash) for chunk in serialized_response["body"]
|
||||||
|
]
|
||||||
else:
|
else:
|
||||||
# Handle single response
|
# Handle single response
|
||||||
serialized_response["body"] = _serialize_response(serialized_response["body"])
|
serialized_response["body"] = _serialize_response(serialized_response["body"], request_hash)
|
||||||
|
|
||||||
# If this is an Ollama /api/tags recording, include models digest in filename to distinguish variants
|
# For model-list endpoints, include digest in filename to distinguish different model sets
|
||||||
endpoint = request.get("endpoint")
|
endpoint = request.get("endpoint")
|
||||||
if endpoint in ("/api/tags", "/v1/models"):
|
if endpoint in ("/api/tags", "/v1/models"):
|
||||||
digest = _model_identifiers_digest(endpoint, response)
|
digest = _model_identifiers_digest(endpoint, response)
|
||||||
response_file = f"models-{short_hash}-{digest}.json"
|
response_file = f"models-{request_hash}-{digest}.json"
|
||||||
|
|
||||||
response_path = self.responses_dir / response_file
|
response_path = responses_dir / response_file
|
||||||
|
|
||||||
# Save response to JSON file
|
# Save response to JSON file with metadata
|
||||||
with open(response_path, "w") as f:
|
with open(response_path, "w") as f:
|
||||||
json.dump({"request": request, "response": serialized_response}, f, indent=2)
|
json.dump(
|
||||||
|
{
|
||||||
|
"test_id": _test_context.get(), # Include for debugging
|
||||||
|
"request": request,
|
||||||
|
"response": serialized_response,
|
||||||
|
},
|
||||||
|
f,
|
||||||
|
indent=2,
|
||||||
|
)
|
||||||
f.write("\n")
|
f.write("\n")
|
||||||
f.flush()
|
f.flush()
|
||||||
|
|
||||||
def find_recording(self, request_hash: str) -> dict[str, Any] | None:
|
def find_recording(self, request_hash: str) -> dict[str, Any] | None:
|
||||||
"""Find a recorded response by request hash."""
|
"""Find a recorded response by request hash.
|
||||||
response_file = f"{request_hash[:12]}.json"
|
|
||||||
response_path = self.responses_dir / response_file
|
|
||||||
|
|
||||||
if not response_path.exists():
|
Uses fallback: first checks test-specific dir, then falls back to base recordings dir.
|
||||||
return None
|
This handles cases where recordings happen during session setup (no test context) but
|
||||||
|
are requested during tests (with test context).
|
||||||
|
"""
|
||||||
|
response_file = f"{request_hash}.json"
|
||||||
|
|
||||||
return _recording_from_file(response_path)
|
# Try test-specific directory first
|
||||||
|
test_dir = self._get_test_dir()
|
||||||
|
response_path = test_dir / response_file
|
||||||
|
|
||||||
def _model_list_responses(self, short_hash: str) -> list[dict[str, Any]]:
|
if response_path.exists():
|
||||||
|
return _recording_from_file(response_path)
|
||||||
|
|
||||||
|
# Fallback to base recordings directory (for session-level recordings)
|
||||||
|
fallback_dir = self.base_dir / "recordings"
|
||||||
|
fallback_path = fallback_dir / response_file
|
||||||
|
|
||||||
|
if fallback_path.exists():
|
||||||
|
return _recording_from_file(fallback_path)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _model_list_responses(self, request_hash: str) -> list[dict[str, Any]]:
|
||||||
|
"""Find all model-list recordings with the given hash (different digests)."""
|
||||||
results: list[dict[str, Any]] = []
|
results: list[dict[str, Any]] = []
|
||||||
for path in self.responses_dir.glob(f"models-{short_hash}-*.json"):
|
|
||||||
data = _recording_from_file(path)
|
# Check test-specific directory first
|
||||||
results.append(data)
|
test_dir = self._get_test_dir()
|
||||||
|
if test_dir.exists():
|
||||||
|
for path in test_dir.glob(f"models-{request_hash}-*.json"):
|
||||||
|
data = _recording_from_file(path)
|
||||||
|
results.append(data)
|
||||||
|
|
||||||
|
# Also check fallback directory
|
||||||
|
fallback_dir = self.base_dir / "recordings"
|
||||||
|
if fallback_dir.exists():
|
||||||
|
for path in fallback_dir.glob(f"models-{request_hash}-*.json"):
|
||||||
|
data = _recording_from_file(path)
|
||||||
|
results.append(data)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -195,6 +317,8 @@ def _recording_from_file(response_path) -> dict[str, Any]:
|
||||||
|
|
||||||
|
|
||||||
def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
||||||
|
"""Generate a digest from model identifiers for distinguishing different model sets."""
|
||||||
|
|
||||||
def _extract_model_identifiers():
|
def _extract_model_identifiers():
|
||||||
"""Extract a stable set of identifiers for model-list endpoints.
|
"""Extract a stable set of identifiers for model-list endpoints.
|
||||||
|
|
||||||
|
|
@ -217,7 +341,14 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
||||||
|
|
||||||
|
|
||||||
def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None:
|
def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None:
|
||||||
"""Return a single, unioned recording for supported model-list endpoints."""
|
"""Return a single, unioned recording for supported model-list endpoints.
|
||||||
|
|
||||||
|
Merges multiple recordings with different model sets (from different servers) into
|
||||||
|
a single response containing all models.
|
||||||
|
"""
|
||||||
|
if not records:
|
||||||
|
return None
|
||||||
|
|
||||||
seen: dict[str, dict[str, Any]] = {}
|
seen: dict[str, dict[str, Any]] = {}
|
||||||
for rec in records:
|
for rec in records:
|
||||||
body = rec["response"]["body"]
|
body = rec["response"]["body"]
|
||||||
|
|
@ -246,7 +377,10 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
|
||||||
async def _patched_inference_method(original_method, self, client_type, endpoint, *args, **kwargs):
|
async def _patched_inference_method(original_method, self, client_type, endpoint, *args, **kwargs):
|
||||||
global _current_mode, _current_storage
|
global _current_mode, _current_storage
|
||||||
|
|
||||||
if _current_mode == InferenceMode.LIVE or _current_storage is None:
|
mode = _current_mode
|
||||||
|
storage = _current_storage
|
||||||
|
|
||||||
|
if mode == InferenceMode.LIVE or storage is None:
|
||||||
if endpoint == "/v1/models":
|
if endpoint == "/v1/models":
|
||||||
return original_method(self, *args, **kwargs)
|
return original_method(self, *args, **kwargs)
|
||||||
else:
|
else:
|
||||||
|
|
@ -277,13 +411,16 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
|
|
||||||
request_hash = normalize_request(method, url, headers, body)
|
request_hash = normalize_request(method, url, headers, body)
|
||||||
|
|
||||||
if _current_mode == InferenceMode.REPLAY:
|
# Try to find existing recording for REPLAY or RECORD_IF_MISSING modes
|
||||||
# Special handling for model-list endpoints: return union of all responses
|
recording = None
|
||||||
|
if mode == InferenceMode.REPLAY or mode == InferenceMode.RECORD_IF_MISSING:
|
||||||
|
# Special handling for model-list endpoints: merge all recordings with this hash
|
||||||
if endpoint in ("/api/tags", "/v1/models"):
|
if endpoint in ("/api/tags", "/v1/models"):
|
||||||
records = _current_storage._model_list_responses(request_hash[:12])
|
records = storage._model_list_responses(request_hash)
|
||||||
recording = _combine_model_list_responses(endpoint, records)
|
recording = _combine_model_list_responses(endpoint, records)
|
||||||
else:
|
else:
|
||||||
recording = _current_storage.find_recording(request_hash)
|
recording = storage.find_recording(request_hash)
|
||||||
|
|
||||||
if recording:
|
if recording:
|
||||||
response_body = recording["response"]["body"]
|
response_body = recording["response"]["body"]
|
||||||
|
|
||||||
|
|
@ -296,7 +433,8 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
return replay_stream()
|
return replay_stream()
|
||||||
else:
|
else:
|
||||||
return response_body
|
return response_body
|
||||||
else:
|
elif mode == InferenceMode.REPLAY:
|
||||||
|
# REPLAY mode requires recording to exist
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"No recorded response found for request hash: {request_hash}\n"
|
f"No recorded response found for request hash: {request_hash}\n"
|
||||||
f"Request: {method} {url} {body}\n"
|
f"Request: {method} {url} {body}\n"
|
||||||
|
|
@ -304,7 +442,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
f"To record this response, run with LLAMA_STACK_TEST_INFERENCE_MODE=record"
|
f"To record this response, run with LLAMA_STACK_TEST_INFERENCE_MODE=record"
|
||||||
)
|
)
|
||||||
|
|
||||||
elif _current_mode == InferenceMode.RECORD:
|
if mode == InferenceMode.RECORD or (mode == InferenceMode.RECORD_IF_MISSING and not recording):
|
||||||
if endpoint == "/v1/models":
|
if endpoint == "/v1/models":
|
||||||
response = original_method(self, *args, **kwargs)
|
response = original_method(self, *args, **kwargs)
|
||||||
else:
|
else:
|
||||||
|
|
@ -335,7 +473,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
|
|
||||||
# Store the recording immediately
|
# Store the recording immediately
|
||||||
response_data = {"body": chunks, "is_streaming": True}
|
response_data = {"body": chunks, "is_streaming": True}
|
||||||
_current_storage.store_recording(request_hash, request_data, response_data)
|
storage.store_recording(request_hash, request_data, response_data)
|
||||||
|
|
||||||
# Return a generator that replays the stored chunks
|
# Return a generator that replays the stored chunks
|
||||||
async def replay_recorded_stream():
|
async def replay_recorded_stream():
|
||||||
|
|
@ -345,11 +483,11 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
return replay_recorded_stream()
|
return replay_recorded_stream()
|
||||||
else:
|
else:
|
||||||
response_data = {"body": response, "is_streaming": False}
|
response_data = {"body": response, "is_streaming": False}
|
||||||
_current_storage.store_recording(request_hash, request_data, response_data)
|
storage.store_recording(request_hash, request_data, response_data)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise AssertionError(f"Invalid mode: {_current_mode}")
|
raise AssertionError(f"Invalid mode: {mode}")
|
||||||
|
|
||||||
|
|
||||||
def patch_inference_clients():
|
def patch_inference_clients():
|
||||||
|
|
@ -490,9 +628,9 @@ def inference_recording(mode: str, storage_dir: str | Path | None = None) -> Gen
|
||||||
try:
|
try:
|
||||||
_current_mode = mode
|
_current_mode = mode
|
||||||
|
|
||||||
if mode in ["record", "replay"]:
|
if mode in ["record", "replay", "record-if-missing"]:
|
||||||
if storage_dir is None:
|
if storage_dir is None:
|
||||||
raise ValueError("storage_dir is required for record and replay modes")
|
raise ValueError("storage_dir is required for record, replay, and record-if-missing modes")
|
||||||
_current_storage = ResponseStorage(Path(storage_dir))
|
_current_storage = ResponseStorage(Path(storage_dir))
|
||||||
patch_inference_clients()
|
patch_inference_clients()
|
||||||
|
|
||||||
|
|
@ -500,7 +638,7 @@ def inference_recording(mode: str, storage_dir: str | Path | None = None) -> Gen
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Restore previous state
|
# Restore previous state
|
||||||
if mode in ["record", "replay"]:
|
if mode in ["record", "replay", "record-if-missing"]:
|
||||||
unpatch_inference_clients()
|
unpatch_inference_clients()
|
||||||
|
|
||||||
_current_mode = prev_mode
|
_current_mode = prev_mode
|
||||||
|
|
|
||||||
118
llama_stack/ui/package-lock.json
generated
118
llama_stack/ui/package-lock.json
generated
|
|
@ -20,11 +20,11 @@
|
||||||
"framer-motion": "^12.23.12",
|
"framer-motion": "^12.23.12",
|
||||||
"llama-stack-client": "^0.2.23",
|
"llama-stack-client": "^0.2.23",
|
||||||
"lucide-react": "^0.542.0",
|
"lucide-react": "^0.542.0",
|
||||||
"next": "15.5.3",
|
"next": "15.5.4",
|
||||||
"next-auth": "^4.24.11",
|
"next-auth": "^4.24.11",
|
||||||
"next-themes": "^0.4.6",
|
"next-themes": "^0.4.6",
|
||||||
"react": "^19.0.0",
|
"react": "^19.0.0",
|
||||||
"react-dom": "^19.1.1",
|
"react-dom": "^19.2.0",
|
||||||
"react-markdown": "^10.1.0",
|
"react-markdown": "^10.1.0",
|
||||||
"remark-gfm": "^4.0.1",
|
"remark-gfm": "^4.0.1",
|
||||||
"remeda": "^2.32.0",
|
"remeda": "^2.32.0",
|
||||||
|
|
@ -2279,9 +2279,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@next/env": {
|
"node_modules/@next/env": {
|
||||||
"version": "15.5.3",
|
"version": "15.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-15.5.3.tgz",
|
"resolved": "https://registry.npmjs.org/@next/env/-/env-15.5.4.tgz",
|
||||||
"integrity": "sha512-RSEDTRqyihYXygx/OJXwvVupfr9m04+0vH8vyy0HfZ7keRto6VX9BbEk0J2PUk0VGy6YhklJUSrgForov5F9pw==",
|
"integrity": "sha512-27SQhYp5QryzIT5uO8hq99C69eLQ7qkzkDPsk3N+GuS2XgOgoYEeOav7Pf8Tn4drECOVDsDg8oj+/DVy8qQL2A==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/@next/eslint-plugin-next": {
|
"node_modules/@next/eslint-plugin-next": {
|
||||||
|
|
@ -2295,9 +2295,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@next/swc-darwin-arm64": {
|
"node_modules/@next/swc-darwin-arm64": {
|
||||||
"version": "15.5.3",
|
"version": "15.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.5.3.tgz",
|
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.5.4.tgz",
|
||||||
"integrity": "sha512-nzbHQo69+au9wJkGKTU9lP7PXv0d1J5ljFpvb+LnEomLtSbJkbZyEs6sbF3plQmiOB2l9OBtN2tNSvCH1nQ9Jg==",
|
"integrity": "sha512-nopqz+Ov6uvorej8ndRX6HlxCYWCO3AHLfKK2TYvxoSB2scETOcfm/HSS3piPqc3A+MUgyHoqE6je4wnkjfrOA==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
|
|
@ -2311,9 +2311,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@next/swc-darwin-x64": {
|
"node_modules/@next/swc-darwin-x64": {
|
||||||
"version": "15.5.3",
|
"version": "15.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.5.3.tgz",
|
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.5.4.tgz",
|
||||||
"integrity": "sha512-w83w4SkOOhekJOcA5HBvHyGzgV1W/XvOfpkrxIse4uPWhYTTRwtGEM4v/jiXwNSJvfRvah0H8/uTLBKRXlef8g==",
|
"integrity": "sha512-QOTCFq8b09ghfjRJKfb68kU9k2K+2wsC4A67psOiMn849K9ZXgCSRQr0oVHfmKnoqCbEmQWG1f2h1T2vtJJ9mA==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
|
|
@ -2327,9 +2327,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||||
"version": "15.5.3",
|
"version": "15.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.5.3.tgz",
|
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.5.4.tgz",
|
||||||
"integrity": "sha512-+m7pfIs0/yvgVu26ieaKrifV8C8yiLe7jVp9SpcIzg7XmyyNE7toC1fy5IOQozmr6kWl/JONC51osih2RyoXRw==",
|
"integrity": "sha512-eRD5zkts6jS3VfE/J0Kt1VxdFqTnMc3QgO5lFE5GKN3KDI/uUpSyK3CjQHmfEkYR4wCOl0R0XrsjpxfWEA++XA==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
|
|
@ -2343,9 +2343,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@next/swc-linux-arm64-musl": {
|
"node_modules/@next/swc-linux-arm64-musl": {
|
||||||
"version": "15.5.3",
|
"version": "15.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.5.3.tgz",
|
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.5.4.tgz",
|
||||||
"integrity": "sha512-u3PEIzuguSenoZviZJahNLgCexGFhso5mxWCrrIMdvpZn6lkME5vc/ADZG8UUk5K1uWRy4hqSFECrON6UKQBbQ==",
|
"integrity": "sha512-TOK7iTxmXFc45UrtKqWdZ1shfxuL4tnVAOuuJK4S88rX3oyVV4ZkLjtMT85wQkfBrOOvU55aLty+MV8xmcJR8A==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
|
|
@ -2359,9 +2359,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@next/swc-linux-x64-gnu": {
|
"node_modules/@next/swc-linux-x64-gnu": {
|
||||||
"version": "15.5.3",
|
"version": "15.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.5.3.tgz",
|
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.5.4.tgz",
|
||||||
"integrity": "sha512-lDtOOScYDZxI2BENN9m0pfVPJDSuUkAD1YXSvlJF0DKwZt0WlA7T7o3wrcEr4Q+iHYGzEaVuZcsIbCps4K27sA==",
|
"integrity": "sha512-7HKolaj+481FSW/5lL0BcTkA4Ueam9SPYWyN/ib/WGAFZf0DGAN8frNpNZYFHtM4ZstrHZS3LY3vrwlIQfsiMA==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
|
|
@ -2375,9 +2375,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@next/swc-linux-x64-musl": {
|
"node_modules/@next/swc-linux-x64-musl": {
|
||||||
"version": "15.5.3",
|
"version": "15.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.5.3.tgz",
|
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.5.4.tgz",
|
||||||
"integrity": "sha512-9vWVUnsx9PrY2NwdVRJ4dUURAQ8Su0sLRPqcCCxtX5zIQUBES12eRVHq6b70bbfaVaxIDGJN2afHui0eDm+cLg==",
|
"integrity": "sha512-nlQQ6nfgN0nCO/KuyEUwwOdwQIGjOs4WNMjEUtpIQJPR2NUfmGpW2wkJln1d4nJ7oUzd1g4GivH5GoEPBgfsdw==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
|
|
@ -2391,9 +2391,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||||
"version": "15.5.3",
|
"version": "15.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.5.3.tgz",
|
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.5.4.tgz",
|
||||||
"integrity": "sha512-1CU20FZzY9LFQigRi6jM45oJMU3KziA5/sSG+dXeVaTm661snQP6xu3ykGxxwU5sLG3sh14teO/IOEPVsQMRfA==",
|
"integrity": "sha512-PcR2bN7FlM32XM6eumklmyWLLbu2vs+D7nJX8OAIoWy69Kef8mfiN4e8TUv2KohprwifdpFKPzIP1njuCjD0YA==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
|
|
@ -2407,9 +2407,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@next/swc-win32-x64-msvc": {
|
"node_modules/@next/swc-win32-x64-msvc": {
|
||||||
"version": "15.5.3",
|
"version": "15.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.5.3.tgz",
|
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.5.4.tgz",
|
||||||
"integrity": "sha512-JMoLAq3n3y5tKXPQwCK5c+6tmwkuFDa2XAxz8Wm4+IVthdBZdZGh+lmiLUHg9f9IDwIQpUjp+ysd6OkYTyZRZw==",
|
"integrity": "sha512-1ur2tSHZj8Px/KMAthmuI9FMp/YFusMMGoRNJaRZMOlSkgvLjzosSdQI0cJAKogdHl3qXUQKL9MGaYvKwA7DXg==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
|
|
@ -3995,22 +3995,22 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/react": {
|
"node_modules/@types/react": {
|
||||||
"version": "19.1.4",
|
"version": "19.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/@types/react/-/react-19.1.4.tgz",
|
"resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.0.tgz",
|
||||||
"integrity": "sha512-EB1yiiYdvySuIITtD5lhW4yPyJ31RkJkkDw794LaQYrxCSaQV/47y5o1FMC4zF9ZyjUjzJMZwbovEnT5yHTW6g==",
|
"integrity": "sha512-1LOH8xovvsKsCBq1wnT4ntDUdCJKmnEakhsuoUSy6ExlHCkGP2hqnatagYTgFk6oeL0VU31u7SNjunPN+GchtA==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"csstype": "^3.0.2"
|
"csstype": "^3.0.2"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/react-dom": {
|
"node_modules/@types/react-dom": {
|
||||||
"version": "19.1.9",
|
"version": "19.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.1.9.tgz",
|
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.2.0.tgz",
|
||||||
"integrity": "sha512-qXRuZaOsAdXKFyOhRBg6Lqqc0yay13vN7KrIg4L7N4aaHN68ma9OK3NE1BoDFgFOTfM7zg+3/8+2n8rLUH3OKQ==",
|
"integrity": "sha512-brtBs0MnE9SMx7px208g39lRmC5uHZs96caOJfTjFcYSLHNamvaSMfJNagChVNkup2SdtOxKX1FDBkRSJe1ZAg==",
|
||||||
"devOptional": true,
|
"devOptional": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@types/react": "^19.0.0"
|
"@types/react": "^19.2.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/stack-utils": {
|
"node_modules/@types/stack-utils": {
|
||||||
|
|
@ -11414,12 +11414,12 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/next": {
|
"node_modules/next": {
|
||||||
"version": "15.5.3",
|
"version": "15.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/next/-/next-15.5.3.tgz",
|
"resolved": "https://registry.npmjs.org/next/-/next-15.5.4.tgz",
|
||||||
"integrity": "sha512-r/liNAx16SQj4D+XH/oI1dlpv9tdKJ6cONYPwwcCC46f2NjpaRWY+EKCzULfgQYV6YKXjHBchff2IZBSlZmJNw==",
|
"integrity": "sha512-xH4Yjhb82sFYQfY3vbkJfgSDgXvBB6a8xPs9i35k6oZJRoQRihZH+4s9Yo2qsWpzBmZ3lPXaJ2KPXLfkvW4LnA==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@next/env": "15.5.3",
|
"@next/env": "15.5.4",
|
||||||
"@swc/helpers": "0.5.15",
|
"@swc/helpers": "0.5.15",
|
||||||
"caniuse-lite": "^1.0.30001579",
|
"caniuse-lite": "^1.0.30001579",
|
||||||
"postcss": "8.4.31",
|
"postcss": "8.4.31",
|
||||||
|
|
@ -11432,14 +11432,14 @@
|
||||||
"node": "^18.18.0 || ^19.8.0 || >= 20.0.0"
|
"node": "^18.18.0 || ^19.8.0 || >= 20.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@next/swc-darwin-arm64": "15.5.3",
|
"@next/swc-darwin-arm64": "15.5.4",
|
||||||
"@next/swc-darwin-x64": "15.5.3",
|
"@next/swc-darwin-x64": "15.5.4",
|
||||||
"@next/swc-linux-arm64-gnu": "15.5.3",
|
"@next/swc-linux-arm64-gnu": "15.5.4",
|
||||||
"@next/swc-linux-arm64-musl": "15.5.3",
|
"@next/swc-linux-arm64-musl": "15.5.4",
|
||||||
"@next/swc-linux-x64-gnu": "15.5.3",
|
"@next/swc-linux-x64-gnu": "15.5.4",
|
||||||
"@next/swc-linux-x64-musl": "15.5.3",
|
"@next/swc-linux-x64-musl": "15.5.4",
|
||||||
"@next/swc-win32-arm64-msvc": "15.5.3",
|
"@next/swc-win32-arm64-msvc": "15.5.4",
|
||||||
"@next/swc-win32-x64-msvc": "15.5.3",
|
"@next/swc-win32-x64-msvc": "15.5.4",
|
||||||
"sharp": "^0.34.3"
|
"sharp": "^0.34.3"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
|
|
@ -12450,24 +12450,24 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react": {
|
"node_modules/react": {
|
||||||
"version": "19.1.1",
|
"version": "19.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/react/-/react-19.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz",
|
||||||
"integrity": "sha512-w8nqGImo45dmMIfljjMwOGtbmC/mk4CMYhWIicdSflH91J9TyCyczcPFXJzrZ/ZXcgGRFeP6BU0BEJTw6tZdfQ==",
|
"integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react-dom": {
|
"node_modules/react-dom": {
|
||||||
"version": "19.1.1",
|
"version": "19.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz",
|
||||||
"integrity": "sha512-Dlq/5LAZgF0Gaz6yiqZCf6VCcZs1ghAJyrsu84Q/GT0gV+mCxbfmKNoGRKBYMJ8IEdGPqu49YWXD02GCknEDkw==",
|
"integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"scheduler": "^0.26.0"
|
"scheduler": "^0.27.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"react": "^19.1.1"
|
"react": "^19.2.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react-is": {
|
"node_modules/react-is": {
|
||||||
|
|
@ -12982,9 +12982,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/scheduler": {
|
"node_modules/scheduler": {
|
||||||
"version": "0.26.0",
|
"version": "0.27.0",
|
||||||
"resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz",
|
"resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz",
|
||||||
"integrity": "sha512-NlHwttCI/l5gCPR3D1nNXtWABUmBwvZpEQiD4IXSbIDq8BzLIK/7Ir5gTFSGZDUu37K5cMNp0hFtzO38sC7gWA==",
|
"integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/semver": {
|
"node_modules/semver": {
|
||||||
|
|
|
||||||
|
|
@ -25,11 +25,11 @@
|
||||||
"framer-motion": "^12.23.12",
|
"framer-motion": "^12.23.12",
|
||||||
"llama-stack-client": "^0.2.23",
|
"llama-stack-client": "^0.2.23",
|
||||||
"lucide-react": "^0.542.0",
|
"lucide-react": "^0.542.0",
|
||||||
"next": "15.5.3",
|
"next": "15.5.4",
|
||||||
"next-auth": "^4.24.11",
|
"next-auth": "^4.24.11",
|
||||||
"next-themes": "^0.4.6",
|
"next-themes": "^0.4.6",
|
||||||
"react": "^19.0.0",
|
"react": "^19.0.0",
|
||||||
"react-dom": "^19.1.1",
|
"react-dom": "^19.2.0",
|
||||||
"react-markdown": "^10.1.0",
|
"react-markdown": "^10.1.0",
|
||||||
"remark-gfm": "^4.0.1",
|
"remark-gfm": "^4.0.1",
|
||||||
"remeda": "^2.32.0",
|
"remeda": "^2.32.0",
|
||||||
|
|
|
||||||
|
|
@ -99,6 +99,7 @@ unit = [
|
||||||
"coverage",
|
"coverage",
|
||||||
"chromadb>=1.0.15",
|
"chromadb>=1.0.15",
|
||||||
"moto[s3]>=5.1.10",
|
"moto[s3]>=5.1.10",
|
||||||
|
"weaviate-client>=4.16.4",
|
||||||
]
|
]
|
||||||
# These are the core dependencies required for running integration tests. They are shared across all
|
# These are the core dependencies required for running integration tests. They are shared across all
|
||||||
# providers. If a provider requires additional dependencies, please add them to your environment
|
# providers. If a provider requires additional dependencies, please add them to your environment
|
||||||
|
|
@ -277,14 +278,10 @@ exclude = [
|
||||||
"^llama_stack/providers/remote/datasetio/huggingface/",
|
"^llama_stack/providers/remote/datasetio/huggingface/",
|
||||||
"^llama_stack/providers/remote/datasetio/nvidia/",
|
"^llama_stack/providers/remote/datasetio/nvidia/",
|
||||||
"^llama_stack/providers/remote/inference/bedrock/",
|
"^llama_stack/providers/remote/inference/bedrock/",
|
||||||
"^llama_stack/providers/remote/inference/cerebras/",
|
|
||||||
"^llama_stack/providers/remote/inference/databricks/",
|
|
||||||
"^llama_stack/providers/remote/inference/fireworks/",
|
|
||||||
"^llama_stack/providers/remote/inference/nvidia/",
|
"^llama_stack/providers/remote/inference/nvidia/",
|
||||||
"^llama_stack/providers/remote/inference/passthrough/",
|
"^llama_stack/providers/remote/inference/passthrough/",
|
||||||
"^llama_stack/providers/remote/inference/runpod/",
|
"^llama_stack/providers/remote/inference/runpod/",
|
||||||
"^llama_stack/providers/remote/inference/tgi/",
|
"^llama_stack/providers/remote/inference/tgi/",
|
||||||
"^llama_stack/providers/remote/inference/together/",
|
|
||||||
"^llama_stack/providers/remote/inference/watsonx/",
|
"^llama_stack/providers/remote/inference/watsonx/",
|
||||||
"^llama_stack/providers/remote/safety/bedrock/",
|
"^llama_stack/providers/remote/safety/bedrock/",
|
||||||
"^llama_stack/providers/remote/safety/nvidia/",
|
"^llama_stack/providers/remote/safety/nvidia/",
|
||||||
|
|
|
||||||
120
scripts/normalize_recordings.py
Executable file
120
scripts/normalize_recordings.py
Executable file
|
|
@ -0,0 +1,120 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Utility script to re-normalize existing recording files.
|
||||||
|
|
||||||
|
This script reads all recording JSON files and applies the normalization
|
||||||
|
to make IDs deterministic and timestamps constant. This reduces noise in
|
||||||
|
git diffs when recordings are re-recorded.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/normalize_recordings.py [--dry-run]
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_response_data(data: dict, request_hash: str) -> dict:
|
||||||
|
"""Normalize fields that change between recordings but don't affect functionality."""
|
||||||
|
# Only normalize ID for completion/chat responses, not for model objects
|
||||||
|
# Model objects have "object": "model" and the ID is the actual model identifier
|
||||||
|
if "id" in data and data.get("object") != "model":
|
||||||
|
data["id"] = f"rec-{request_hash[:12]}"
|
||||||
|
|
||||||
|
# Normalize timestamp to epoch (0) (for OpenAI-style responses)
|
||||||
|
# But not for model objects where created timestamp might be meaningful
|
||||||
|
if "created" in data and data.get("object") != "model":
|
||||||
|
data["created"] = 0
|
||||||
|
|
||||||
|
# Normalize Ollama-specific timestamp fields
|
||||||
|
if "created_at" in data:
|
||||||
|
data["created_at"] = "1970-01-01T00:00:00.000000Z"
|
||||||
|
|
||||||
|
# Normalize Ollama-specific duration fields (these vary based on system load)
|
||||||
|
if "total_duration" in data and data["total_duration"] is not None:
|
||||||
|
data["total_duration"] = 0
|
||||||
|
if "load_duration" in data and data["load_duration"] is not None:
|
||||||
|
data["load_duration"] = 0
|
||||||
|
if "prompt_eval_duration" in data and data["prompt_eval_duration"] is not None:
|
||||||
|
data["prompt_eval_duration"] = 0
|
||||||
|
if "eval_duration" in data and data["eval_duration"] is not None:
|
||||||
|
data["eval_duration"] = 0
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_recording_file(file_path: Path, dry_run: bool = False) -> bool:
|
||||||
|
"""Normalize a single recording file. Returns True if file was modified."""
|
||||||
|
with open(file_path) as f:
|
||||||
|
recording = json.load(f)
|
||||||
|
|
||||||
|
# Extract request hash from filename (first 12 chars)
|
||||||
|
request_hash = file_path.stem.split("-")[-1] if "-" in file_path.stem else file_path.stem
|
||||||
|
|
||||||
|
modified = False
|
||||||
|
old_recording = json.dumps(recording, sort_keys=True)
|
||||||
|
|
||||||
|
# NOTE: We do NOT normalize request body here because that would change the request hash
|
||||||
|
# and break recording lookups. The recorder will normalize tool_call_ids in future recordings.
|
||||||
|
|
||||||
|
# Normalize response body
|
||||||
|
if "response" in recording and "body" in recording["response"]:
|
||||||
|
body = recording["response"]["body"]
|
||||||
|
|
||||||
|
if isinstance(body, list):
|
||||||
|
# Handle streaming responses (list of chunks)
|
||||||
|
for chunk in body:
|
||||||
|
if isinstance(chunk, dict) and "__data__" in chunk:
|
||||||
|
normalize_response_data(chunk["__data__"], request_hash)
|
||||||
|
elif isinstance(body, dict) and "__data__" in body:
|
||||||
|
# Handle single response
|
||||||
|
normalize_response_data(body["__data__"], request_hash)
|
||||||
|
|
||||||
|
# Check if anything changed
|
||||||
|
new_recording = json.dumps(recording, sort_keys=True)
|
||||||
|
modified = old_recording != new_recording
|
||||||
|
|
||||||
|
if modified and not dry_run:
|
||||||
|
with open(file_path, "w") as f:
|
||||||
|
json.dump(recording, f, indent=2)
|
||||||
|
f.write("\n")
|
||||||
|
|
||||||
|
return modified
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Normalize recording files to reduce git diff noise")
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Show what would be changed without modifying files")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
recordings_dir = Path(__file__).parent.parent / "tests/integration/recordings/responses"
|
||||||
|
|
||||||
|
if not recordings_dir.exists():
|
||||||
|
print(f"Recordings directory not found: {recordings_dir}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
modified_count = 0
|
||||||
|
total_count = 0
|
||||||
|
|
||||||
|
for file_path in sorted(recordings_dir.glob("*.json")):
|
||||||
|
total_count += 1
|
||||||
|
was_modified = normalize_recording_file(file_path, dry_run=args.dry_run)
|
||||||
|
|
||||||
|
if was_modified:
|
||||||
|
modified_count += 1
|
||||||
|
status = "[DRY RUN] Would normalize" if args.dry_run else "Normalized"
|
||||||
|
print(f"{status}: {file_path.name}")
|
||||||
|
|
||||||
|
print(f"\n{'[DRY RUN] ' if args.dry_run else ''}Summary: {modified_count}/{total_count} files modified")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
exit(main())
|
||||||
15
scripts/telemetry/grafana-datasources.yaml
Normal file
15
scripts/telemetry/grafana-datasources.yaml
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
access: proxy
|
||||||
|
url: http://prometheus:9090
|
||||||
|
isDefault: true
|
||||||
|
editable: true
|
||||||
|
|
||||||
|
- name: Jaeger
|
||||||
|
type: jaeger
|
||||||
|
access: proxy
|
||||||
|
url: http://jaeger:16686
|
||||||
|
editable: true
|
||||||
40
scripts/telemetry/otel-collector-config.yaml
Normal file
40
scripts/telemetry/otel-collector-config.yaml
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
receivers:
|
||||||
|
otlp:
|
||||||
|
protocols:
|
||||||
|
grpc:
|
||||||
|
endpoint: 0.0.0.0:4317
|
||||||
|
http:
|
||||||
|
endpoint: 0.0.0.0:4318
|
||||||
|
|
||||||
|
processors:
|
||||||
|
batch:
|
||||||
|
timeout: 1s
|
||||||
|
send_batch_size: 1024
|
||||||
|
|
||||||
|
exporters:
|
||||||
|
# Export traces to Jaeger
|
||||||
|
otlp/jaeger:
|
||||||
|
endpoint: jaeger:4317
|
||||||
|
tls:
|
||||||
|
insecure: true
|
||||||
|
|
||||||
|
# Export metrics to Prometheus
|
||||||
|
prometheus:
|
||||||
|
endpoint: 0.0.0.0:9464
|
||||||
|
namespace: llama_stack
|
||||||
|
|
||||||
|
# Debug exporter for troubleshooting
|
||||||
|
debug:
|
||||||
|
verbosity: detailed
|
||||||
|
|
||||||
|
service:
|
||||||
|
pipelines:
|
||||||
|
traces:
|
||||||
|
receivers: [otlp]
|
||||||
|
processors: [batch]
|
||||||
|
exporters: [otlp/jaeger, debug]
|
||||||
|
|
||||||
|
metrics:
|
||||||
|
receivers: [otlp]
|
||||||
|
processors: [batch]
|
||||||
|
exporters: [prometheus, debug]
|
||||||
12
scripts/telemetry/prometheus.yml
Normal file
12
scripts/telemetry/prometheus.yml
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9090']
|
||||||
|
|
||||||
|
- job_name: 'otel-collector'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['otel-collector:9464']
|
||||||
|
|
@ -17,6 +17,7 @@
|
||||||
set -Eeuo pipefail
|
set -Eeuo pipefail
|
||||||
|
|
||||||
CONTAINER_RUNTIME=${CONTAINER_RUNTIME:-docker}
|
CONTAINER_RUNTIME=${CONTAINER_RUNTIME:-docker}
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
|
||||||
echo "🚀 Setting up telemetry stack for Llama Stack using Podman..."
|
echo "🚀 Setting up telemetry stack for Llama Stack using Podman..."
|
||||||
|
|
||||||
|
|
@ -53,7 +54,7 @@ $CONTAINER_RUNTIME run -d --name otel-collector \
|
||||||
-p 4317:4317 \
|
-p 4317:4317 \
|
||||||
-p 9464:9464 \
|
-p 9464:9464 \
|
||||||
-p 13133:13133 \
|
-p 13133:13133 \
|
||||||
-v $(pwd)/otel-collector-config.yaml:/etc/otel-collector-config.yaml:Z \
|
-v "$SCRIPT_DIR/otel-collector-config.yaml:/etc/otel-collector-config.yaml:Z" \
|
||||||
docker.io/otel/opentelemetry-collector-contrib:latest \
|
docker.io/otel/opentelemetry-collector-contrib:latest \
|
||||||
--config /etc/otel-collector-config.yaml
|
--config /etc/otel-collector-config.yaml
|
||||||
|
|
||||||
|
|
@ -62,7 +63,7 @@ echo "📈 Starting Prometheus..."
|
||||||
$CONTAINER_RUNTIME run -d --name prometheus \
|
$CONTAINER_RUNTIME run -d --name prometheus \
|
||||||
--network llama-telemetry \
|
--network llama-telemetry \
|
||||||
-p 9090:9090 \
|
-p 9090:9090 \
|
||||||
-v $(pwd)/prometheus.yml:/etc/prometheus/prometheus.yml:Z \
|
-v "$SCRIPT_DIR/prometheus.yml:/etc/prometheus/prometheus.yml:Z" \
|
||||||
docker.io/prom/prometheus:latest \
|
docker.io/prom/prometheus:latest \
|
||||||
--config.file=/etc/prometheus/prometheus.yml \
|
--config.file=/etc/prometheus/prometheus.yml \
|
||||||
--storage.tsdb.path=/prometheus \
|
--storage.tsdb.path=/prometheus \
|
||||||
|
|
@ -72,13 +73,15 @@ $CONTAINER_RUNTIME run -d --name prometheus \
|
||||||
--web.enable-lifecycle
|
--web.enable-lifecycle
|
||||||
|
|
||||||
# Start Grafana
|
# Start Grafana
|
||||||
|
# Note: Using 11.0.0 because grafana:latest arm64 image has a broken /run.sh (0 bytes)
|
||||||
echo "📊 Starting Grafana..."
|
echo "📊 Starting Grafana..."
|
||||||
$CONTAINER_RUNTIME run -d --name grafana \
|
$CONTAINER_RUNTIME run -d --name grafana \
|
||||||
--network llama-telemetry \
|
--network llama-telemetry \
|
||||||
-p 3000:3000 \
|
-p 3000:3000 \
|
||||||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||||
docker.io/grafana/grafana:latest
|
-v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||||
|
docker.io/grafana/grafana:11.0.0
|
||||||
|
|
||||||
# Wait for services to start
|
# Wait for services to start
|
||||||
echo "⏳ Waiting for services to start..."
|
echo "⏳ Waiting for services to start..."
|
||||||
|
|
@ -125,21 +125,28 @@ pytest -s -v tests/integration/vector_io/ \
|
||||||
|
|
||||||
## Recording Modes
|
## Recording Modes
|
||||||
|
|
||||||
The testing system supports three modes controlled by environment variables:
|
The testing system supports four modes controlled by environment variables:
|
||||||
|
|
||||||
### REPLAY Mode (Default)
|
### REPLAY Mode (Default)
|
||||||
Uses cached responses instead of making API calls:
|
Uses cached responses instead of making API calls:
|
||||||
```bash
|
```bash
|
||||||
pytest tests/integration/
|
pytest tests/integration/
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### RECORD-IF-MISSING Mode (Recommended for adding new tests)
|
||||||
|
Records only when no recording exists, otherwise replays. This is the preferred mode for iterative development:
|
||||||
|
```bash
|
||||||
|
pytest tests/integration/inference/test_new_feature.py --inference-mode=record-if-missing
|
||||||
|
```
|
||||||
|
|
||||||
### RECORD Mode
|
### RECORD Mode
|
||||||
Captures API interactions for later replay:
|
**Force-records all API interactions**, overwriting existing recordings. Use with caution as this will re-record everything:
|
||||||
```bash
|
```bash
|
||||||
pytest tests/integration/inference/test_new_feature.py --inference-mode=record
|
pytest tests/integration/inference/test_new_feature.py --inference-mode=record
|
||||||
```
|
```
|
||||||
|
|
||||||
### LIVE Mode
|
### LIVE Mode
|
||||||
Tests make real API calls (but not recorded):
|
Tests make real API calls (not recorded):
|
||||||
```bash
|
```bash
|
||||||
pytest tests/integration/ --inference-mode=live
|
pytest tests/integration/ --inference-mode=live
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
{
|
||||||
|
"test_id": "tests/integration/agents/test_agents.py::test_custom_tool_infinite_loop[ollama/llama3.2:3b-instruct-fp16]",
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "llama-guard3:1b",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Task: Check if there is unsafe content in 'Tool' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: Get the boiling point of polyjuice with a tool call.\n\nAssistant: \n\nTool: Error when running tool: get_boiling_point() missing 1 required positional argument: 'liquid_name'\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Tool message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.0
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "llama-guard3:1b"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-000506671ad4",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "safe",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama-guard3:1b",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 2,
|
||||||
|
"prompt_tokens": 422,
|
||||||
|
"total_tokens": 424,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -28,7 +28,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -43,7 +43,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -54,7 +54,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -69,7 +69,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -80,7 +80,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -95,7 +95,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -106,7 +106,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -121,7 +121,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -132,7 +132,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -147,7 +147,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -158,7 +158,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -173,7 +173,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -184,7 +184,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -199,7 +199,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -210,7 +210,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -225,7 +225,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -236,7 +236,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -251,7 +251,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -262,7 +262,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -277,7 +277,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -288,7 +288,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -303,7 +303,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -314,7 +314,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -329,7 +329,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -340,7 +340,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -355,7 +355,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -366,7 +366,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -381,7 +381,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -392,7 +392,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -407,7 +407,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -418,7 +418,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -433,7 +433,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -444,7 +444,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -459,7 +459,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -470,7 +470,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -485,7 +485,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437810,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -496,7 +496,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -511,7 +511,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437811,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -522,7 +522,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-130",
|
"id": "rec-044dcd8fdeb1",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -537,7 +537,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437811,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
{
|
||||||
|
"test_id": "tests/integration/agents/test_agents.py::test_custom_tool[ollama/llama3.2:3b-instruct-fp16]",
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "llama-guard3:1b",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Task: Check if there is unsafe content in 'Tool' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: Error when running tool: get_boiling_point() got an unexpected keyword argument 'liquid'\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Tool message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.0
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "llama-guard3:1b"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-06fbbb88ed5e",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "safe",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama-guard3:1b",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 2,
|
||||||
|
"prompt_tokens": 421,
|
||||||
|
"total_tokens": 423,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -73,7 +73,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -88,7 +88,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -99,7 +99,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -114,7 +114,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -125,7 +125,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -140,7 +140,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -151,7 +151,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -166,7 +166,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -177,7 +177,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -192,7 +192,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -203,7 +203,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -218,7 +218,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -229,7 +229,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -244,7 +244,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -255,7 +255,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -270,7 +270,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -281,7 +281,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -296,7 +296,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -307,7 +307,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -322,7 +322,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -333,7 +333,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -348,7 +348,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -359,7 +359,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -374,7 +374,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441160,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -385,7 +385,7 @@
|
||||||
{
|
{
|
||||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-67",
|
"id": "rec-4a32ce3da3ce",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"delta": {
|
"delta": {
|
||||||
|
|
@ -400,7 +400,7 @@
|
||||||
"logprobs": null
|
"logprobs": null
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759441161,
|
"created": 0,
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -21,7 +21,7 @@
|
||||||
"body": {
|
"body": {
|
||||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"id": "chatcmpl-912",
|
"id": "rec-b58e35a624b0",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"finish_reason": "stop",
|
"finish_reason": "stop",
|
||||||
|
|
@ -38,7 +38,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"created": 1759437811,
|
"created": 0,
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"service_tier": null,
|
"service_tier": null,
|
||||||
|
|
@ -0,0 +1,104 @@
|
||||||
|
{
|
||||||
|
"test_id": "tests/integration/agents/test_agents.py::test_create_turn_response[ollama/llama3.2:3b-instruct-fp16-client_tools1]",
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful assistant"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Call get_boiling_point_with_metadata tool and answer What is the boiling point of polyjuice?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 512,
|
||||||
|
"stream": true,
|
||||||
|
"temperature": 0.0001,
|
||||||
|
"tool_choice": "auto",
|
||||||
|
"tools": [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_boiling_point_with_metadata",
|
||||||
|
"description": "Returns the boiling point of a liquid in Celcius or Fahrenheit"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_p": 0.9
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "llama3.2:3b-instruct-fp16"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": [
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-176bcef706a9",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"id": "call_wxinam9c",
|
||||||
|
"function": {
|
||||||
|
"arguments": "{}",
|
||||||
|
"name": "get_boiling_point_with_metadata"
|
||||||
|
},
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-176bcef706a9",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": "tool_calls",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"is_streaming": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
{
|
||||||
|
"test_id": "tests/integration/agents/test_agents.py::test_tool_choice_none[ollama/llama3.2:3b-instruct-fp16]",
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "llama-guard3:1b",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Task: Check if there is unsafe content in 'User' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST User message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.0
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "llama-guard3:1b"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1a0d3109cf92",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "safe",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama-guard3:1b",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 2,
|
||||||
|
"prompt_tokens": 398,
|
||||||
|
"total_tokens": 400,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
||||||
388
tests/integration/agents/recordings/1d82e9439ae3.json
Normal file
388
tests/integration/agents/recordings/1d82e9439ae3.json
Normal file
|
|
@ -0,0 +1,388 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful assistant"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Call get_boiling_point tool and answer What is the boiling point of polyjuice?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "toolcall-1d82e943-0",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_boiling_point",
|
||||||
|
"arguments": "{\"celcius\":null,\"liquid_name\":\"polyjuice\"}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "tool",
|
||||||
|
"tool_call_id": "toolcall-1d82e943-0",
|
||||||
|
"content": "-212"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 512,
|
||||||
|
"stream": true,
|
||||||
|
"temperature": 0.0001,
|
||||||
|
"tool_choice": "auto",
|
||||||
|
"tools": [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_boiling_point",
|
||||||
|
"description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"liquid_name": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The name of the liquid"
|
||||||
|
},
|
||||||
|
"celcius": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "Whether to return the boiling point in Celcius"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"liquid_name"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_p": 0.9
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "llama3.2:3b-instruct-fp16"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": [
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "The",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " boiling",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " point",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " of",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " poly",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "ju",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "ice",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " is",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " -",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "212",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": ".",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1d82e9439ae3",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"is_streaming": true
|
||||||
|
}
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue