mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 12:06:04 +00:00
Merge
Signed-off-by: Bill Murdock <bmurdock@redhat.com>
This commit is contained in:
commit
e77b7a127c
854 changed files with 165238 additions and 99099 deletions
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
|
@ -2,4 +2,4 @@
|
|||
|
||||
# These owners will be the default owners for everything in
|
||||
# the repo. Unless a later match takes precedence,
|
||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1
|
||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
|
||||
|
|
|
|||
1
.github/TRIAGERS.md
vendored
1
.github/TRIAGERS.md
vendored
|
|
@ -1,2 +1 @@
|
|||
# This file documents Triage members in the Llama Stack community
|
||||
@franciscojavierarceo
|
||||
|
|
|
|||
1
.github/workflows/README.md
vendored
1
.github/workflows/README.md
vendored
|
|
@ -12,6 +12,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
|||
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
|
||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
|
||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
||||
|
|
|
|||
25
.github/workflows/integration-tests.yml
vendored
25
.github/workflows/integration-tests.yml
vendored
|
|
@ -42,18 +42,27 @@ jobs:
|
|||
|
||||
run-replay-mode-tests:
|
||||
runs-on: ubuntu-latest
|
||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }}
|
||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
client-type: [library, server]
|
||||
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
|
||||
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
|
||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||
suite: [base, vision]
|
||||
# Define (setup, suite) pairs - they are always matched and cannot be independent
|
||||
# Weekly schedule (Sun 1 AM): vllm+base
|
||||
# Input test-setup=ollama-vision: ollama-vision+vision
|
||||
# Default (including test-setup=ollama): both ollama+base and ollama-vision+vision
|
||||
config: >-
|
||||
${{
|
||||
github.event.schedule == '1 0 * * 0'
|
||||
&& fromJSON('[{"setup": "vllm", "suite": "base"}]')
|
||||
|| github.event.inputs.test-setup == 'ollama-vision'
|
||||
&& fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
|
||||
|| fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]')
|
||||
}}
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
|
|
@ -64,14 +73,14 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
client-version: ${{ matrix.client-version }}
|
||||
setup: ${{ matrix.setup }}
|
||||
suite: ${{ matrix.suite }}
|
||||
setup: ${{ matrix.config.setup }}
|
||||
suite: ${{ matrix.config.suite }}
|
||||
inference-mode: 'replay'
|
||||
|
||||
- name: Run tests
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
with:
|
||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||
setup: ${{ matrix.setup }}
|
||||
setup: ${{ matrix.config.setup }}
|
||||
inference-mode: 'replay'
|
||||
suite: ${{ matrix.suite }}
|
||||
suite: ${{ matrix.config.suite }}
|
||||
|
|
|
|||
227
.github/workflows/precommit-trigger.yml
vendored
Normal file
227
.github/workflows/precommit-trigger.yml
vendored
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
name: Pre-commit Bot
|
||||
|
||||
run-name: Pre-commit bot for PR #${{ github.event.issue.number }}
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
pre-commit:
|
||||
# Only run on pull request comments
|
||||
if: github.event.issue.pull_request && contains(github.event.comment.body, '@github-actions run precommit')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- name: Check comment author and get PR details
|
||||
id: check_author
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
// Get PR details
|
||||
const pr = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: context.issue.number
|
||||
});
|
||||
|
||||
// Check if commenter has write access or is the PR author
|
||||
const commenter = context.payload.comment.user.login;
|
||||
const prAuthor = pr.data.user.login;
|
||||
|
||||
let hasPermission = false;
|
||||
|
||||
// Check if commenter is PR author
|
||||
if (commenter === prAuthor) {
|
||||
hasPermission = true;
|
||||
console.log(`Comment author ${commenter} is the PR author`);
|
||||
} else {
|
||||
// Check if commenter has write/admin access
|
||||
try {
|
||||
const permission = await github.rest.repos.getCollaboratorPermissionLevel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
username: commenter
|
||||
});
|
||||
|
||||
const level = permission.data.permission;
|
||||
hasPermission = ['write', 'admin', 'maintain'].includes(level);
|
||||
console.log(`Comment author ${commenter} has permission: ${level}`);
|
||||
} catch (error) {
|
||||
console.log(`Could not check permissions for ${commenter}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasPermission) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: `❌ @${commenter} You don't have permission to trigger pre-commit. Only PR authors or repository collaborators can run this command.`
|
||||
});
|
||||
core.setFailed(`User ${commenter} does not have permission`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Save PR info for later steps
|
||||
core.setOutput('pr_number', context.issue.number);
|
||||
core.setOutput('pr_head_ref', pr.data.head.ref);
|
||||
core.setOutput('pr_head_sha', pr.data.head.sha);
|
||||
core.setOutput('pr_head_repo', pr.data.head.repo.full_name);
|
||||
core.setOutput('pr_base_ref', pr.data.base.ref);
|
||||
core.setOutput('is_fork', pr.data.head.repo.full_name !== context.payload.repository.full_name);
|
||||
core.setOutput('authorized', 'true');
|
||||
|
||||
- name: React to comment
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.reactions.createForIssueComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: context.payload.comment.id,
|
||||
content: 'rocket'
|
||||
});
|
||||
|
||||
- name: Comment starting
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `⏳ Running pre-commit hooks on PR #${{ steps.check_author.outputs.pr_number }}...`
|
||||
});
|
||||
|
||||
- name: Checkout PR branch (same-repo)
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'false'
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
ref: ${{ steps.check_author.outputs.pr_head_ref }}
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Checkout PR branch (fork)
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'true'
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
repository: ${{ steps.check_author.outputs.pr_head_repo }}
|
||||
ref: ${{ steps.check_author.outputs.pr_head_ref }}
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Verify checkout
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
run: |
|
||||
echo "Current SHA: $(git rev-parse HEAD)"
|
||||
echo "Expected SHA: ${{ steps.check_author.outputs.pr_head_sha }}"
|
||||
if [[ "$(git rev-parse HEAD)" != "${{ steps.check_author.outputs.pr_head_sha }}" ]]; then
|
||||
echo "::error::Checked out SHA does not match expected SHA"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Set up Python
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
cache: pip
|
||||
cache-dependency-path: |
|
||||
**/requirements*.txt
|
||||
.pre-commit-config.yaml
|
||||
|
||||
- name: Set up Node.js
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: 'npm'
|
||||
cache-dependency-path: 'llama_stack/ui/'
|
||||
|
||||
- name: Install npm dependencies
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
run: npm ci
|
||||
working-directory: llama_stack/ui
|
||||
|
||||
- name: Run pre-commit
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
id: precommit
|
||||
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||
continue-on-error: true
|
||||
env:
|
||||
SKIP: no-commit-to-branch
|
||||
RUFF_OUTPUT_FORMAT: github
|
||||
|
||||
- name: Check for changes
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
id: changes
|
||||
run: |
|
||||
if ! git diff --exit-code || [ -n "$(git ls-files --others --exclude-standard)" ]; then
|
||||
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||
echo "Changes detected after pre-commit"
|
||||
else
|
||||
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||
echo "No changes after pre-commit"
|
||||
fi
|
||||
|
||||
- name: Commit and push changes
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||
run: |
|
||||
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git config --local user.name "github-actions[bot]"
|
||||
|
||||
git add -A
|
||||
git commit -m "style: apply pre-commit fixes
|
||||
|
||||
🤖 Applied by @github-actions bot via pre-commit workflow"
|
||||
|
||||
# Push changes
|
||||
git push origin HEAD:${{ steps.check_author.outputs.pr_head_ref }}
|
||||
|
||||
- name: Comment success with changes
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `✅ Pre-commit hooks completed successfully!\n\n🔧 Changes have been committed and pushed to the PR branch.`
|
||||
});
|
||||
|
||||
- name: Comment success without changes
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `✅ Pre-commit hooks passed!\n\n✨ No changes needed - your code is already formatted correctly.`
|
||||
});
|
||||
|
||||
- name: Comment failure
|
||||
if: failure()
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: ${{ steps.check_author.outputs.pr_number }},
|
||||
body: `❌ Pre-commit workflow failed!\n\nPlease check the [workflow logs](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) for details.`
|
||||
});
|
||||
4
.github/workflows/providers-build.yml
vendored
4
.github/workflows/providers-build.yml
vendored
|
|
@ -112,7 +112,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -150,7 +150,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
|||
2
.github/workflows/python-build-test.yml
vendored
2
.github/workflows/python-build-test.yml
vendored
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@b75a909f75acd358c2196fb9a5f1299a9a8868a4 # v6.7.0
|
||||
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6.8.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
activate-environment: true
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
||||
|
||||
[**Quick Start**](https://llamastack.github.io/latest/getting_started/index.html) | [**Documentation**](https://llamastack.github.io/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||
[**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||
|
||||
|
||||
### ✨🎉 Llama 4 Support 🎉✨
|
||||
|
|
|
|||
|
|
@ -187,21 +187,21 @@ Configure telemetry behavior using environment variables:
|
|||
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
|
||||
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `console,sqlite`)
|
||||
|
||||
## Visualization with Jaeger
|
||||
### Quick Setup: Complete Telemetry Stack
|
||||
|
||||
The `otel_trace` sink works with any service compatible with the OpenTelemetry collector. Traces and metrics use separate endpoints but can share the same collector.
|
||||
|
||||
### Starting Jaeger
|
||||
|
||||
Start a Jaeger instance with OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686:
|
||||
Use the automated setup script to launch the complete telemetry stack (Jaeger, OpenTelemetry Collector, Prometheus, and Grafana):
|
||||
|
||||
```bash
|
||||
docker run --pull always --rm --name jaeger \
|
||||
-p 16686:16686 -p 4318:4318 \
|
||||
jaegertracing/jaeger:2.1.0
|
||||
./scripts/telemetry/setup_telemetry.sh
|
||||
```
|
||||
|
||||
Once running, you can visualize traces by navigating to [http://localhost:16686/](http://localhost:16686/).
|
||||
This sets up:
|
||||
- **Jaeger UI**: http://localhost:16686 (traces visualization)
|
||||
- **Prometheus**: http://localhost:9090 (metrics)
|
||||
- **Grafana**: http://localhost:3000 (dashboards with auto-configured data sources)
|
||||
- **OTEL Collector**: http://localhost:4318 (OTLP endpoint)
|
||||
|
||||
Once running, you can visualize traces by navigating to [Grafana](http://localhost:3000/) and login with login `admin` and password `admin`.
|
||||
|
||||
## Querying Metrics
|
||||
|
||||
|
|
|
|||
|
|
@ -357,7 +357,7 @@ server:
|
|||
8. Run the server:
|
||||
|
||||
```bash
|
||||
python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
|
||||
llama stack run ~/.llama/run-byoa.yaml
|
||||
```
|
||||
|
||||
9. Test the API:
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ spec:
|
|||
- name: llama-stack
|
||||
image: localhost/llama-stack-run-k8s:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
|
||||
command: ["llama", "stack", "run", "/app/config.yaml"]
|
||||
ports:
|
||||
- containerPort: 5000
|
||||
volumeMounts:
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ spec:
|
|||
value: "${SAFETY_MODEL}"
|
||||
- name: TAVILY_SEARCH_API_KEY
|
||||
value: "${TAVILY_SEARCH_API_KEY}"
|
||||
command: ["python", "-m", "llama_stack.core.server.server", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||
command: ["llama", "stack", "run", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||
ports:
|
||||
- containerPort: 8321
|
||||
volumeMounts:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
---
|
||||
description: "Files
|
||||
|
||||
This API is used to upload documents that can be used with other Llama Stack APIs."
|
||||
sidebar_label: Files
|
||||
title: Files
|
||||
---
|
||||
|
|
@ -7,4 +10,8 @@ title: Files
|
|||
|
||||
## Overview
|
||||
|
||||
Files
|
||||
|
||||
This API is used to upload documents that can be used with other Llama Stack APIs.
|
||||
|
||||
This section contains documentation for all available providers for the **files** API.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
---
|
||||
description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
description: "Inference
|
||||
|
||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
|
||||
|
|
@ -12,7 +14,9 @@ title: Inference
|
|||
|
||||
## Overview
|
||||
|
||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
Inference
|
||||
|
||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ Databricks inference provider for running models on Databricks' unified analytic
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `url` | `<class 'str'>` | No | | The URL for the Databricks model serving endpoint |
|
||||
| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint |
|
||||
| `api_token` | `<class 'pydantic.types.SecretStr'>` | No | | The Databricks API token |
|
||||
|
||||
## Sample Configuration
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
---
|
||||
description: "Safety
|
||||
|
||||
OpenAI-compatible Moderations API."
|
||||
sidebar_label: Safety
|
||||
title: Safety
|
||||
---
|
||||
|
|
@ -7,4 +10,8 @@ title: Safety
|
|||
|
||||
## Overview
|
||||
|
||||
Safety
|
||||
|
||||
OpenAI-compatible Moderations API.
|
||||
|
||||
This section contains documentation for all available providers for the **safety** API.
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ from .specification import (
|
|||
Document,
|
||||
Example,
|
||||
ExampleRef,
|
||||
ExtraBodyParameter,
|
||||
MediaType,
|
||||
Operation,
|
||||
Parameter,
|
||||
|
|
@ -677,6 +678,27 @@ class Generator:
|
|||
# parameters passed anywhere
|
||||
parameters = path_parameters + query_parameters
|
||||
|
||||
# Build extra body parameters documentation
|
||||
extra_body_parameters = []
|
||||
for param_name, param_type, description in op.extra_body_params:
|
||||
if is_type_optional(param_type):
|
||||
inner_type: type = unwrap_optional_type(param_type)
|
||||
required = False
|
||||
else:
|
||||
inner_type = param_type
|
||||
required = True
|
||||
|
||||
# Use description from ExtraBodyField if available, otherwise from docstring
|
||||
param_description = description or doc_params.get(param_name)
|
||||
|
||||
extra_body_param = ExtraBodyParameter(
|
||||
name=param_name,
|
||||
schema=self.schema_builder.classdef_to_ref(inner_type),
|
||||
description=param_description,
|
||||
required=required,
|
||||
)
|
||||
extra_body_parameters.append(extra_body_param)
|
||||
|
||||
webmethod = getattr(op.func_ref, "__webmethod__", None)
|
||||
raw_bytes_request_body = False
|
||||
if webmethod:
|
||||
|
|
@ -898,6 +920,7 @@ class Generator:
|
|||
deprecated=getattr(op.webmethod, "deprecated", False)
|
||||
or "DEPRECATED" in op.func_name,
|
||||
security=[] if op.public else None,
|
||||
extraBodyParameters=extra_body_parameters if extra_body_parameters else None,
|
||||
)
|
||||
|
||||
def _get_api_stability_priority(self, api_level: str) -> int:
|
||||
|
|
|
|||
|
|
@ -19,10 +19,12 @@ from llama_stack.strong_typing.inspection import get_signature
|
|||
|
||||
from typing import get_origin, get_args
|
||||
|
||||
from fastapi import UploadFile
|
||||
from fastapi import UploadFile
|
||||
from fastapi.params import File, Form
|
||||
from typing import Annotated
|
||||
|
||||
from llama_stack.schema_utils import ExtraBodyField
|
||||
|
||||
|
||||
def split_prefix(
|
||||
s: str, sep: str, prefix: Union[str, Iterable[str]]
|
||||
|
|
@ -89,6 +91,7 @@ class EndpointOperation:
|
|||
:param query_params: Parameters of the operation signature that are passed in the query string as `key=value` pairs.
|
||||
:param request_params: The parameter that corresponds to the data transmitted in the request body.
|
||||
:param multipart_params: Parameters that indicate multipart/form-data request body.
|
||||
:param extra_body_params: Parameters that arrive via extra_body and are documented but not in SDK.
|
||||
:param event_type: The Python type of the data that is transmitted out-of-band (e.g. via websockets) while the operation is in progress.
|
||||
:param response_type: The Python type of the data that is transmitted in the response body.
|
||||
:param http_method: The HTTP method used to invoke the endpoint such as POST, GET or PUT.
|
||||
|
|
@ -106,6 +109,7 @@ class EndpointOperation:
|
|||
query_params: List[OperationParameter]
|
||||
request_params: Optional[OperationParameter]
|
||||
multipart_params: List[OperationParameter]
|
||||
extra_body_params: List[tuple[str, type, str | None]]
|
||||
event_type: Optional[type]
|
||||
response_type: type
|
||||
http_method: HTTPMethod
|
||||
|
|
@ -265,6 +269,7 @@ def get_endpoint_operations(
|
|||
query_params = []
|
||||
request_params = []
|
||||
multipart_params = []
|
||||
extra_body_params = []
|
||||
|
||||
for param_name, parameter in signature.parameters.items():
|
||||
param_type = _get_annotation_type(parameter.annotation, func_ref)
|
||||
|
|
@ -279,6 +284,13 @@ def get_endpoint_operations(
|
|||
f"parameter '{param_name}' in function '{func_name}' has no type annotation"
|
||||
)
|
||||
|
||||
# Check if this is an extra_body parameter
|
||||
is_extra_body, extra_body_desc = _is_extra_body_param(param_type)
|
||||
if is_extra_body:
|
||||
# Store in a separate list for documentation
|
||||
extra_body_params.append((param_name, param_type, extra_body_desc))
|
||||
continue # Skip adding to request_params
|
||||
|
||||
is_multipart = _is_multipart_param(param_type)
|
||||
|
||||
if prefix in ["get", "delete"]:
|
||||
|
|
@ -351,6 +363,7 @@ def get_endpoint_operations(
|
|||
query_params=query_params,
|
||||
request_params=request_params,
|
||||
multipart_params=multipart_params,
|
||||
extra_body_params=extra_body_params,
|
||||
event_type=event_type,
|
||||
response_type=response_type,
|
||||
http_method=http_method,
|
||||
|
|
@ -403,7 +416,7 @@ def get_endpoint_events(endpoint: type) -> Dict[str, type]:
|
|||
def _is_multipart_param(param_type: type) -> bool:
|
||||
"""
|
||||
Check if a parameter type indicates multipart form data.
|
||||
|
||||
|
||||
Returns True if the type is:
|
||||
- UploadFile
|
||||
- Annotated[UploadFile, File()]
|
||||
|
|
@ -413,19 +426,38 @@ def _is_multipart_param(param_type: type) -> bool:
|
|||
"""
|
||||
if param_type is UploadFile:
|
||||
return True
|
||||
|
||||
|
||||
# Check for Annotated types
|
||||
origin = get_origin(param_type)
|
||||
if origin is None:
|
||||
return False
|
||||
|
||||
|
||||
if origin is Annotated:
|
||||
args = get_args(param_type)
|
||||
if len(args) < 2:
|
||||
return False
|
||||
|
||||
|
||||
# Check the annotations for File() or Form()
|
||||
for annotation in args[1:]:
|
||||
if isinstance(annotation, (File, Form)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_extra_body_param(param_type: type) -> tuple[bool, str | None]:
|
||||
"""
|
||||
Check if parameter is marked as coming from extra_body.
|
||||
|
||||
Returns:
|
||||
(is_extra_body, description): Tuple of boolean and optional description
|
||||
"""
|
||||
origin = get_origin(param_type)
|
||||
if origin is Annotated:
|
||||
args = get_args(param_type)
|
||||
for annotation in args[1:]:
|
||||
if isinstance(annotation, ExtraBodyField):
|
||||
return True, annotation.description
|
||||
# Also check by type name for cases where import matters
|
||||
if type(annotation).__name__ == 'ExtraBodyField':
|
||||
return True, getattr(annotation, 'description', None)
|
||||
return False, None
|
||||
|
|
|
|||
|
|
@ -106,6 +106,15 @@ class Parameter:
|
|||
example: Optional[Any] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtraBodyParameter:
|
||||
"""Represents a parameter that arrives via extra_body in the request."""
|
||||
name: str
|
||||
schema: SchemaOrRef
|
||||
description: Optional[str] = None
|
||||
required: Optional[bool] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Operation:
|
||||
responses: Dict[str, Union[Response, ResponseRef]]
|
||||
|
|
@ -118,6 +127,7 @@ class Operation:
|
|||
callbacks: Optional[Dict[str, "Callback"]] = None
|
||||
security: Optional[List["SecurityRequirement"]] = None
|
||||
deprecated: Optional[bool] = None
|
||||
extraBodyParameters: Optional[List[ExtraBodyParameter]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -52,6 +52,17 @@ class Specification:
|
|||
if display_name:
|
||||
tag["x-displayName"] = display_name
|
||||
|
||||
# Handle operations to rename extraBodyParameters -> x-llama-stack-extra-body-params
|
||||
paths = json_doc.get("paths", {})
|
||||
for path_item in paths.values():
|
||||
if isinstance(path_item, dict):
|
||||
for method in ["get", "post", "put", "delete", "patch"]:
|
||||
operation = path_item.get(method)
|
||||
if operation and isinstance(operation, dict):
|
||||
extra_body_params = operation.pop("extraBodyParameters", None)
|
||||
if extra_body_params:
|
||||
operation["x-llama-stack-extra-body-params"] = extra_body_params
|
||||
|
||||
return json_doc
|
||||
|
||||
def get_json_string(self, pretty_print: bool = False) -> str:
|
||||
|
|
|
|||
111
docs/static/deprecated-llama-stack-spec.html
vendored
111
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -1443,8 +1443,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "List all chat completions.",
|
||||
"description": "List all chat completions.",
|
||||
"summary": "List chat completions.",
|
||||
"description": "List chat completions.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -1520,8 +1520,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"summary": "Create chat completions.",
|
||||
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1565,8 +1565,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Describe a chat completion by its ID.",
|
||||
"description": "Describe a chat completion by its ID.",
|
||||
"summary": "Get chat completion.",
|
||||
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "completion_id",
|
||||
|
|
@ -1610,8 +1610,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"summary": "Create completion.",
|
||||
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1655,8 +1655,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"summary": "Create embeddings.",
|
||||
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1700,8 +1700,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns a list of files that belong to the user's organization.",
|
||||
"description": "Returns a list of files that belong to the user's organization.",
|
||||
"summary": "List files.",
|
||||
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -1770,8 +1770,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Upload a file that can be used across various endpoints.",
|
||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"summary": "Upload file.",
|
||||
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1831,8 +1831,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns information about a specific file.",
|
||||
"description": "Returns information about a specific file.",
|
||||
"summary": "Retrieve file.",
|
||||
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1874,8 +1874,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Delete a file.",
|
||||
"description": "Delete a file.",
|
||||
"summary": "Delete file.",
|
||||
"description": "Delete file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1919,8 +1919,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns the contents of the specified file.",
|
||||
"description": "Returns the contents of the specified file.",
|
||||
"summary": "Retrieve file content.",
|
||||
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1999,8 +1999,8 @@
|
|||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"summary": "Create moderation.",
|
||||
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -2044,8 +2044,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List all OpenAI responses.",
|
||||
"description": "List all OpenAI responses.",
|
||||
"summary": "List all responses.",
|
||||
"description": "List all responses.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -2119,8 +2119,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Create a new OpenAI response.",
|
||||
"description": "Create a new OpenAI response.",
|
||||
"summary": "Create a model response.",
|
||||
"description": "Create a model response.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -2132,7 +2132,27 @@
|
|||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": true
|
||||
"deprecated": true,
|
||||
"x-llama-stack-extra-body-params": [
|
||||
{
|
||||
"name": "shields",
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ResponseShieldSpec"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "List of shields to apply during response generation. Shields provide safety and content moderation.",
|
||||
"required": false
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/responses/{response_id}": {
|
||||
|
|
@ -2164,8 +2184,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Retrieve an OpenAI response by its ID.",
|
||||
"description": "Retrieve an OpenAI response by its ID.",
|
||||
"summary": "Get a model response.",
|
||||
"description": "Get a model response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -2207,8 +2227,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Delete an OpenAI response by its ID.",
|
||||
"description": "Delete an OpenAI response by its ID.",
|
||||
"summary": "Delete a response.",
|
||||
"description": "Delete a response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -2252,8 +2272,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List input items for a given OpenAI response.",
|
||||
"description": "List input items for a given OpenAI response.",
|
||||
"summary": "List input items.",
|
||||
"description": "List input items.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -9521,6 +9541,21 @@
|
|||
"title": "OpenAIResponseText",
|
||||
"description": "Text response configuration for OpenAI responses."
|
||||
},
|
||||
"ResponseShieldSpec": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The type/identifier of the shield."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "ResponseShieldSpec",
|
||||
"description": "Specification for a shield to apply during response generation."
|
||||
},
|
||||
"OpenAIResponseInputTool": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
|
@ -13331,12 +13366,13 @@
|
|||
},
|
||||
{
|
||||
"name": "Files",
|
||||
"description": ""
|
||||
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||
"x-displayName": "Files"
|
||||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "Models",
|
||||
|
|
@ -13348,7 +13384,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Safety",
|
||||
"description": ""
|
||||
"description": "OpenAI-compatible Moderations API.",
|
||||
"x-displayName": "Safety"
|
||||
},
|
||||
{
|
||||
"name": "Telemetry",
|
||||
|
|
|
|||
121
docs/static/deprecated-llama-stack-spec.yaml
vendored
121
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -1033,8 +1033,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: List all chat completions.
|
||||
description: List all chat completions.
|
||||
summary: List chat completions.
|
||||
description: List chat completions.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -1087,10 +1087,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
summary: Create chat completions.
|
||||
description: >-
|
||||
Create chat completions.
|
||||
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
parameters: []
|
||||
|
|
@ -1122,8 +1122,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: Describe a chat completion by its ID.
|
||||
description: Describe a chat completion by its ID.
|
||||
summary: Get chat completion.
|
||||
description: >-
|
||||
Get chat completion.
|
||||
|
||||
Describe a chat completion by its ID.
|
||||
parameters:
|
||||
- name: completion_id
|
||||
in: path
|
||||
|
|
@ -1153,10 +1156,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
summary: Create completion.
|
||||
description: >-
|
||||
Create completion.
|
||||
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -1189,10 +1192,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
summary: Create embeddings.
|
||||
description: >-
|
||||
Create embeddings.
|
||||
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -1225,9 +1228,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns a list of files that belong to the user's organization.
|
||||
summary: List files.
|
||||
description: >-
|
||||
List files.
|
||||
|
||||
Returns a list of files that belong to the user's organization.
|
||||
parameters:
|
||||
- name: after
|
||||
|
|
@ -1285,11 +1289,13 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Upload a file that can be used across various endpoints.
|
||||
summary: Upload file.
|
||||
description: >-
|
||||
Upload file.
|
||||
|
||||
Upload a file that can be used across various endpoints.
|
||||
|
||||
|
||||
The file upload should be a multipart form request with:
|
||||
|
||||
- file: The File object (not file name) to be uploaded.
|
||||
|
|
@ -1338,9 +1344,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns information about a specific file.
|
||||
summary: Retrieve file.
|
||||
description: >-
|
||||
Retrieve file.
|
||||
|
||||
Returns information about a specific file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -1372,8 +1379,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: Delete a file.
|
||||
description: Delete a file.
|
||||
summary: Delete file.
|
||||
description: Delete file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
in: path
|
||||
|
|
@ -1405,9 +1412,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns the contents of the specified file.
|
||||
summary: Retrieve file content.
|
||||
description: >-
|
||||
Retrieve file content.
|
||||
|
||||
Returns the contents of the specified file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -1464,9 +1472,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
summary: >-
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
summary: Create moderation.
|
||||
description: >-
|
||||
Create moderation.
|
||||
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
parameters: []
|
||||
requestBody:
|
||||
|
|
@ -1497,8 +1506,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: List all OpenAI responses.
|
||||
description: List all OpenAI responses.
|
||||
summary: List all responses.
|
||||
description: List all responses.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -1549,8 +1558,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Create a new OpenAI response.
|
||||
description: Create a new OpenAI response.
|
||||
summary: Create a model response.
|
||||
description: Create a model response.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -1559,6 +1568,18 @@ paths:
|
|||
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
x-llama-stack-extra-body-params:
|
||||
- name: shields
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ResponseShieldSpec'
|
||||
description: >-
|
||||
List of shields to apply during response generation. Shields provide safety
|
||||
and content moderation.
|
||||
required: false
|
||||
/v1/openai/v1/responses/{response_id}:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -1580,8 +1601,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Retrieve an OpenAI response by its ID.
|
||||
description: Retrieve an OpenAI response by its ID.
|
||||
summary: Get a model response.
|
||||
description: Get a model response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1611,8 +1632,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Delete an OpenAI response by its ID.
|
||||
description: Delete an OpenAI response by its ID.
|
||||
summary: Delete a response.
|
||||
description: Delete a response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1642,10 +1663,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: >-
|
||||
List input items for a given OpenAI response.
|
||||
description: >-
|
||||
List input items for a given OpenAI response.
|
||||
summary: List input items.
|
||||
description: List input items.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -7076,6 +7095,18 @@ components:
|
|||
title: OpenAIResponseText
|
||||
description: >-
|
||||
Text response configuration for OpenAI responses.
|
||||
ResponseShieldSpec:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
description: The type/identifier of the shield.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: ResponseShieldSpec
|
||||
description: >-
|
||||
Specification for a shield to apply during response generation.
|
||||
OpenAIResponseInputTool:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
|
||||
|
|
@ -9987,9 +10018,16 @@ tags:
|
|||
x-displayName: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
- name: Files
|
||||
description: ''
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
APIs.
|
||||
x-displayName: Files
|
||||
- name: Inference
|
||||
description: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
are supported:
|
||||
|
||||
|
|
@ -9997,15 +10035,14 @@ tags:
|
|||
|
||||
- Embedding models: these models generate embeddings to be used for semantic
|
||||
search.
|
||||
x-displayName: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
x-displayName: Inference
|
||||
- name: Models
|
||||
description: ''
|
||||
- name: PostTraining (Coming Soon)
|
||||
description: ''
|
||||
- name: Safety
|
||||
description: ''
|
||||
description: OpenAI-compatible Moderations API.
|
||||
x-displayName: Safety
|
||||
- name: Telemetry
|
||||
description: ''
|
||||
- name: VectorIO
|
||||
|
|
|
|||
182
docs/static/llama-stack-spec.html
vendored
182
docs/static/llama-stack-spec.html
vendored
|
|
@ -69,8 +69,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "List all chat completions.",
|
||||
"description": "List all chat completions.",
|
||||
"summary": "List chat completions.",
|
||||
"description": "List chat completions.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -146,8 +146,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"summary": "Create chat completions.",
|
||||
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -191,8 +191,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Describe a chat completion by its ID.",
|
||||
"description": "Describe a chat completion by its ID.",
|
||||
"summary": "Get chat completion.",
|
||||
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "completion_id",
|
||||
|
|
@ -236,8 +236,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"summary": "Create completion.",
|
||||
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -758,8 +758,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"summary": "Create embeddings.",
|
||||
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -803,8 +803,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns a list of files that belong to the user's organization.",
|
||||
"description": "Returns a list of files that belong to the user's organization.",
|
||||
"summary": "List files.",
|
||||
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -873,8 +873,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Upload a file that can be used across various endpoints.",
|
||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"summary": "Upload file.",
|
||||
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -934,8 +934,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns information about a specific file.",
|
||||
"description": "Returns information about a specific file.",
|
||||
"summary": "Retrieve file.",
|
||||
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -977,8 +977,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Delete a file.",
|
||||
"description": "Delete a file.",
|
||||
"summary": "Delete file.",
|
||||
"description": "Delete file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1022,8 +1022,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns the contents of the specified file.",
|
||||
"description": "Returns the contents of the specified file.",
|
||||
"summary": "Retrieve file content.",
|
||||
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1067,8 +1067,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "Get the current health status of the service.",
|
||||
"description": "Get the current health status of the service.",
|
||||
"summary": "Get health status.",
|
||||
"description": "Get health status.\nGet the current health status of the service.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1102,8 +1102,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "List all available API routes with their methods and implementing providers.",
|
||||
"description": "List all available API routes with their methods and implementing providers.",
|
||||
"summary": "List routes.",
|
||||
"description": "List routes.\nList all available API routes with their methods and implementing providers.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1170,8 +1170,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Register a model.",
|
||||
"description": "Register a model.",
|
||||
"summary": "Register model.",
|
||||
"description": "Register model.\nRegister a model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1215,8 +1215,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Get a model by its identifier.",
|
||||
"description": "Get a model by its identifier.",
|
||||
"summary": "Get model.",
|
||||
"description": "Get model.\nGet a model by its identifier.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "model_id",
|
||||
|
|
@ -1251,8 +1251,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Unregister a model.",
|
||||
"description": "Unregister a model.",
|
||||
"summary": "Unregister model.",
|
||||
"description": "Unregister model.\nUnregister a model.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "model_id",
|
||||
|
|
@ -1296,8 +1296,8 @@
|
|||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"summary": "Create moderation.",
|
||||
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1374,8 +1374,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Create a new prompt.",
|
||||
"description": "Create a new prompt.",
|
||||
"summary": "Create prompt.",
|
||||
"description": "Create prompt.\nCreate a new prompt.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1419,8 +1419,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Get a prompt by its identifier and optional version.",
|
||||
"description": "Get a prompt by its identifier and optional version.",
|
||||
"summary": "Get prompt.",
|
||||
"description": "Get prompt.\nGet a prompt by its identifier and optional version.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1471,8 +1471,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Update an existing prompt (increments version).",
|
||||
"description": "Update an existing prompt (increments version).",
|
||||
"summary": "Update prompt.",
|
||||
"description": "Update prompt.\nUpdate an existing prompt (increments version).",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1517,8 +1517,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Delete a prompt.",
|
||||
"description": "Delete a prompt.",
|
||||
"summary": "Delete prompt.",
|
||||
"description": "Delete prompt.\nDelete a prompt.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1562,8 +1562,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Set which version of a prompt should be the default in get_prompt (latest).",
|
||||
"description": "Set which version of a prompt should be the default in get_prompt (latest).",
|
||||
"summary": "Set prompt version.",
|
||||
"description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1617,8 +1617,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "List all versions of a specific prompt.",
|
||||
"description": "List all versions of a specific prompt.",
|
||||
"summary": "List prompt versions.",
|
||||
"description": "List prompt versions.\nList all versions of a specific prompt.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1662,8 +1662,8 @@
|
|||
"tags": [
|
||||
"Providers"
|
||||
],
|
||||
"summary": "List all available providers.",
|
||||
"description": "List all available providers.",
|
||||
"summary": "List providers.",
|
||||
"description": "List providers.\nList all available providers.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1697,8 +1697,8 @@
|
|||
"tags": [
|
||||
"Providers"
|
||||
],
|
||||
"summary": "Get detailed information about a specific provider.",
|
||||
"description": "Get detailed information about a specific provider.",
|
||||
"summary": "Get provider.",
|
||||
"description": "Get provider.\nGet detailed information about a specific provider.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "provider_id",
|
||||
|
|
@ -1742,8 +1742,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List all OpenAI responses.",
|
||||
"description": "List all OpenAI responses.",
|
||||
"summary": "List all responses.",
|
||||
"description": "List all responses.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -1817,8 +1817,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Create a new OpenAI response.",
|
||||
"description": "Create a new OpenAI response.",
|
||||
"summary": "Create a model response.",
|
||||
"description": "Create a model response.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1830,7 +1830,27 @@
|
|||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
"deprecated": false,
|
||||
"x-llama-stack-extra-body-params": [
|
||||
{
|
||||
"name": "shields",
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ResponseShieldSpec"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "List of shields to apply during response generation. Shields provide safety and content moderation.",
|
||||
"required": false
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/responses/{response_id}": {
|
||||
|
|
@ -1862,8 +1882,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Retrieve an OpenAI response by its ID.",
|
||||
"description": "Retrieve an OpenAI response by its ID.",
|
||||
"summary": "Get a model response.",
|
||||
"description": "Get a model response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -1905,8 +1925,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Delete an OpenAI response by its ID.",
|
||||
"description": "Delete an OpenAI response by its ID.",
|
||||
"summary": "Delete a response.",
|
||||
"description": "Delete a response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -1950,8 +1970,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List input items for a given OpenAI response.",
|
||||
"description": "List input items for a given OpenAI response.",
|
||||
"summary": "List input items.",
|
||||
"description": "List input items.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -2043,8 +2063,8 @@
|
|||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"summary": "Run a shield.",
|
||||
"description": "Run a shield.",
|
||||
"summary": "Run shield.",
|
||||
"description": "Run shield.\nRun a shield.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -4176,8 +4196,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "Get the version of the service.",
|
||||
"description": "Get the version of the service.",
|
||||
"summary": "Get version.",
|
||||
"description": "Get version.\nGet the version of the service.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -7616,6 +7636,21 @@
|
|||
"title": "OpenAIResponseText",
|
||||
"description": "Text response configuration for OpenAI responses."
|
||||
},
|
||||
"ResponseShieldSpec": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The type/identifier of the shield."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "ResponseShieldSpec",
|
||||
"description": "Specification for a shield to apply during response generation."
|
||||
},
|
||||
"OpenAIResponseInputTool": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
|
@ -12879,16 +12914,18 @@
|
|||
},
|
||||
{
|
||||
"name": "Files",
|
||||
"description": ""
|
||||
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||
"x-displayName": "Files"
|
||||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "Inspect",
|
||||
"description": ""
|
||||
"description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
|
||||
"x-displayName": "Inspect"
|
||||
},
|
||||
{
|
||||
"name": "Models",
|
||||
|
|
@ -12896,17 +12933,18 @@
|
|||
},
|
||||
{
|
||||
"name": "Prompts",
|
||||
"description": "",
|
||||
"x-displayName": "Protocol for prompt management operations."
|
||||
"description": "Protocol for prompt management operations.",
|
||||
"x-displayName": "Prompts"
|
||||
},
|
||||
{
|
||||
"name": "Providers",
|
||||
"description": "",
|
||||
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
||||
"description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
|
||||
"x-displayName": "Providers"
|
||||
},
|
||||
{
|
||||
"name": "Safety",
|
||||
"description": ""
|
||||
"description": "OpenAI-compatible Moderations API.",
|
||||
"x-displayName": "Safety"
|
||||
},
|
||||
{
|
||||
"name": "Scoring",
|
||||
|
|
|
|||
227
docs/static/llama-stack-spec.yaml
vendored
227
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -33,8 +33,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: List all chat completions.
|
||||
description: List all chat completions.
|
||||
summary: List chat completions.
|
||||
description: List chat completions.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -87,10 +87,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
summary: Create chat completions.
|
||||
description: >-
|
||||
Create chat completions.
|
||||
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
parameters: []
|
||||
|
|
@ -122,8 +122,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: Describe a chat completion by its ID.
|
||||
description: Describe a chat completion by its ID.
|
||||
summary: Get chat completion.
|
||||
description: >-
|
||||
Get chat completion.
|
||||
|
||||
Describe a chat completion by its ID.
|
||||
parameters:
|
||||
- name: completion_id
|
||||
in: path
|
||||
|
|
@ -153,10 +156,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
summary: Create completion.
|
||||
description: >-
|
||||
Create completion.
|
||||
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -603,10 +606,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
summary: Create embeddings.
|
||||
description: >-
|
||||
Create embeddings.
|
||||
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -639,9 +642,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns a list of files that belong to the user's organization.
|
||||
summary: List files.
|
||||
description: >-
|
||||
List files.
|
||||
|
||||
Returns a list of files that belong to the user's organization.
|
||||
parameters:
|
||||
- name: after
|
||||
|
|
@ -699,11 +703,13 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Upload a file that can be used across various endpoints.
|
||||
summary: Upload file.
|
||||
description: >-
|
||||
Upload file.
|
||||
|
||||
Upload a file that can be used across various endpoints.
|
||||
|
||||
|
||||
The file upload should be a multipart form request with:
|
||||
|
||||
- file: The File object (not file name) to be uploaded.
|
||||
|
|
@ -752,9 +758,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns information about a specific file.
|
||||
summary: Retrieve file.
|
||||
description: >-
|
||||
Retrieve file.
|
||||
|
||||
Returns information about a specific file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -786,8 +793,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: Delete a file.
|
||||
description: Delete a file.
|
||||
summary: Delete file.
|
||||
description: Delete file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
in: path
|
||||
|
|
@ -819,9 +826,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns the contents of the specified file.
|
||||
summary: Retrieve file content.
|
||||
description: >-
|
||||
Retrieve file content.
|
||||
|
||||
Returns the contents of the specified file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -854,9 +862,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: >-
|
||||
Get the current health status of the service.
|
||||
summary: Get health status.
|
||||
description: >-
|
||||
Get health status.
|
||||
|
||||
Get the current health status of the service.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
|
|
@ -882,9 +891,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: >-
|
||||
List all available API routes with their methods and implementing providers.
|
||||
summary: List routes.
|
||||
description: >-
|
||||
List routes.
|
||||
|
||||
List all available API routes with their methods and implementing providers.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
|
|
@ -933,8 +943,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Register a model.
|
||||
description: Register a model.
|
||||
summary: Register model.
|
||||
description: >-
|
||||
Register model.
|
||||
|
||||
Register a model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -964,8 +977,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Get a model by its identifier.
|
||||
description: Get a model by its identifier.
|
||||
summary: Get model.
|
||||
description: >-
|
||||
Get model.
|
||||
|
||||
Get a model by its identifier.
|
||||
parameters:
|
||||
- name: model_id
|
||||
in: path
|
||||
|
|
@ -990,8 +1006,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Unregister a model.
|
||||
description: Unregister a model.
|
||||
summary: Unregister model.
|
||||
description: >-
|
||||
Unregister model.
|
||||
|
||||
Unregister a model.
|
||||
parameters:
|
||||
- name: model_id
|
||||
in: path
|
||||
|
|
@ -1022,9 +1041,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
summary: >-
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
summary: Create moderation.
|
||||
description: >-
|
||||
Create moderation.
|
||||
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
parameters: []
|
||||
requestBody:
|
||||
|
|
@ -1080,8 +1100,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: Create a new prompt.
|
||||
description: Create a new prompt.
|
||||
summary: Create prompt.
|
||||
description: >-
|
||||
Create prompt.
|
||||
|
||||
Create a new prompt.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -1111,9 +1134,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Get a prompt by its identifier and optional version.
|
||||
summary: Get prompt.
|
||||
description: >-
|
||||
Get prompt.
|
||||
|
||||
Get a prompt by its identifier and optional version.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1151,9 +1175,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Update an existing prompt (increments version).
|
||||
summary: Update prompt.
|
||||
description: >-
|
||||
Update prompt.
|
||||
|
||||
Update an existing prompt (increments version).
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1185,8 +1210,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: Delete a prompt.
|
||||
description: Delete a prompt.
|
||||
summary: Delete prompt.
|
||||
description: >-
|
||||
Delete prompt.
|
||||
|
||||
Delete a prompt.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
|
|
@ -1217,9 +1245,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
summary: Set prompt version.
|
||||
description: >-
|
||||
Set prompt version.
|
||||
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1257,8 +1286,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: List all versions of a specific prompt.
|
||||
description: List all versions of a specific prompt.
|
||||
summary: List prompt versions.
|
||||
description: >-
|
||||
List prompt versions.
|
||||
|
||||
List all versions of a specific prompt.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
|
|
@ -1290,8 +1322,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Providers
|
||||
summary: List all available providers.
|
||||
description: List all available providers.
|
||||
summary: List providers.
|
||||
description: >-
|
||||
List providers.
|
||||
|
||||
List all available providers.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
/v1/providers/{provider_id}:
|
||||
|
|
@ -1316,9 +1351,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Providers
|
||||
summary: >-
|
||||
Get detailed information about a specific provider.
|
||||
summary: Get provider.
|
||||
description: >-
|
||||
Get provider.
|
||||
|
||||
Get detailed information about a specific provider.
|
||||
parameters:
|
||||
- name: provider_id
|
||||
|
|
@ -1349,8 +1385,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: List all OpenAI responses.
|
||||
description: List all OpenAI responses.
|
||||
summary: List all responses.
|
||||
description: List all responses.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -1401,8 +1437,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Create a new OpenAI response.
|
||||
description: Create a new OpenAI response.
|
||||
summary: Create a model response.
|
||||
description: Create a model response.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -1411,6 +1447,18 @@ paths:
|
|||
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
x-llama-stack-extra-body-params:
|
||||
- name: shields
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ResponseShieldSpec'
|
||||
description: >-
|
||||
List of shields to apply during response generation. Shields provide safety
|
||||
and content moderation.
|
||||
required: false
|
||||
/v1/responses/{response_id}:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -1432,8 +1480,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Retrieve an OpenAI response by its ID.
|
||||
description: Retrieve an OpenAI response by its ID.
|
||||
summary: Get a model response.
|
||||
description: Get a model response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1463,8 +1511,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Delete an OpenAI response by its ID.
|
||||
description: Delete an OpenAI response by its ID.
|
||||
summary: Delete a response.
|
||||
description: Delete a response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1494,10 +1542,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: >-
|
||||
List input items for a given OpenAI response.
|
||||
description: >-
|
||||
List input items for a given OpenAI response.
|
||||
summary: List input items.
|
||||
description: List input items.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1566,8 +1612,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
summary: Run a shield.
|
||||
description: Run a shield.
|
||||
summary: Run shield.
|
||||
description: >-
|
||||
Run shield.
|
||||
|
||||
Run a shield.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -3123,8 +3172,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: Get the version of the service.
|
||||
description: Get the version of the service.
|
||||
summary: Get version.
|
||||
description: >-
|
||||
Get version.
|
||||
|
||||
Get the version of the service.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
jsonSchemaDialect: >-
|
||||
|
|
@ -5739,6 +5791,18 @@ components:
|
|||
title: OpenAIResponseText
|
||||
description: >-
|
||||
Text response configuration for OpenAI responses.
|
||||
ResponseShieldSpec:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
description: The type/identifier of the shield.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: ResponseShieldSpec
|
||||
description: >-
|
||||
Specification for a shield to apply during response generation.
|
||||
OpenAIResponseInputTool:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
|
||||
|
|
@ -9725,9 +9789,16 @@ tags:
|
|||
x-displayName: >-
|
||||
Protocol for conversation management operations.
|
||||
- name: Files
|
||||
description: ''
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
APIs.
|
||||
x-displayName: Files
|
||||
- name: Inference
|
||||
description: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
are supported:
|
||||
|
||||
|
|
@ -9735,23 +9806,25 @@ tags:
|
|||
|
||||
- Embedding models: these models generate embeddings to be used for semantic
|
||||
search.
|
||||
x-displayName: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
x-displayName: Inference
|
||||
- name: Inspect
|
||||
description: ''
|
||||
description: >-
|
||||
APIs for inspecting the Llama Stack service, including health status, available
|
||||
API routes with methods and implementing providers.
|
||||
x-displayName: Inspect
|
||||
- name: Models
|
||||
description: ''
|
||||
- name: Prompts
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Protocol for prompt management operations.
|
||||
x-displayName: Prompts
|
||||
- name: Providers
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||
x-displayName: Providers
|
||||
- name: Safety
|
||||
description: ''
|
||||
description: OpenAI-compatible Moderations API.
|
||||
x-displayName: Safety
|
||||
- name: Scoring
|
||||
description: ''
|
||||
- name: ScoringFunctions
|
||||
|
|
|
|||
182
docs/static/stainless-llama-stack-spec.html
vendored
182
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -69,8 +69,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "List all chat completions.",
|
||||
"description": "List all chat completions.",
|
||||
"summary": "List chat completions.",
|
||||
"description": "List chat completions.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -146,8 +146,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"summary": "Create chat completions.",
|
||||
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -191,8 +191,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Describe a chat completion by its ID.",
|
||||
"description": "Describe a chat completion by its ID.",
|
||||
"summary": "Get chat completion.",
|
||||
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "completion_id",
|
||||
|
|
@ -236,8 +236,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"summary": "Create completion.",
|
||||
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -758,8 +758,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"summary": "Create embeddings.",
|
||||
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -803,8 +803,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns a list of files that belong to the user's organization.",
|
||||
"description": "Returns a list of files that belong to the user's organization.",
|
||||
"summary": "List files.",
|
||||
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -873,8 +873,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Upload a file that can be used across various endpoints.",
|
||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"summary": "Upload file.",
|
||||
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -934,8 +934,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns information about a specific file.",
|
||||
"description": "Returns information about a specific file.",
|
||||
"summary": "Retrieve file.",
|
||||
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -977,8 +977,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Delete a file.",
|
||||
"description": "Delete a file.",
|
||||
"summary": "Delete file.",
|
||||
"description": "Delete file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1022,8 +1022,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns the contents of the specified file.",
|
||||
"description": "Returns the contents of the specified file.",
|
||||
"summary": "Retrieve file content.",
|
||||
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1067,8 +1067,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "Get the current health status of the service.",
|
||||
"description": "Get the current health status of the service.",
|
||||
"summary": "Get health status.",
|
||||
"description": "Get health status.\nGet the current health status of the service.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1102,8 +1102,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "List all available API routes with their methods and implementing providers.",
|
||||
"description": "List all available API routes with their methods and implementing providers.",
|
||||
"summary": "List routes.",
|
||||
"description": "List routes.\nList all available API routes with their methods and implementing providers.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1170,8 +1170,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Register a model.",
|
||||
"description": "Register a model.",
|
||||
"summary": "Register model.",
|
||||
"description": "Register model.\nRegister a model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1215,8 +1215,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Get a model by its identifier.",
|
||||
"description": "Get a model by its identifier.",
|
||||
"summary": "Get model.",
|
||||
"description": "Get model.\nGet a model by its identifier.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "model_id",
|
||||
|
|
@ -1251,8 +1251,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Unregister a model.",
|
||||
"description": "Unregister a model.",
|
||||
"summary": "Unregister model.",
|
||||
"description": "Unregister model.\nUnregister a model.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "model_id",
|
||||
|
|
@ -1296,8 +1296,8 @@
|
|||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"summary": "Create moderation.",
|
||||
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1374,8 +1374,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Create a new prompt.",
|
||||
"description": "Create a new prompt.",
|
||||
"summary": "Create prompt.",
|
||||
"description": "Create prompt.\nCreate a new prompt.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1419,8 +1419,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Get a prompt by its identifier and optional version.",
|
||||
"description": "Get a prompt by its identifier and optional version.",
|
||||
"summary": "Get prompt.",
|
||||
"description": "Get prompt.\nGet a prompt by its identifier and optional version.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1471,8 +1471,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Update an existing prompt (increments version).",
|
||||
"description": "Update an existing prompt (increments version).",
|
||||
"summary": "Update prompt.",
|
||||
"description": "Update prompt.\nUpdate an existing prompt (increments version).",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1517,8 +1517,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Delete a prompt.",
|
||||
"description": "Delete a prompt.",
|
||||
"summary": "Delete prompt.",
|
||||
"description": "Delete prompt.\nDelete a prompt.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1562,8 +1562,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Set which version of a prompt should be the default in get_prompt (latest).",
|
||||
"description": "Set which version of a prompt should be the default in get_prompt (latest).",
|
||||
"summary": "Set prompt version.",
|
||||
"description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1617,8 +1617,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "List all versions of a specific prompt.",
|
||||
"description": "List all versions of a specific prompt.",
|
||||
"summary": "List prompt versions.",
|
||||
"description": "List prompt versions.\nList all versions of a specific prompt.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1662,8 +1662,8 @@
|
|||
"tags": [
|
||||
"Providers"
|
||||
],
|
||||
"summary": "List all available providers.",
|
||||
"description": "List all available providers.",
|
||||
"summary": "List providers.",
|
||||
"description": "List providers.\nList all available providers.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1697,8 +1697,8 @@
|
|||
"tags": [
|
||||
"Providers"
|
||||
],
|
||||
"summary": "Get detailed information about a specific provider.",
|
||||
"description": "Get detailed information about a specific provider.",
|
||||
"summary": "Get provider.",
|
||||
"description": "Get provider.\nGet detailed information about a specific provider.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "provider_id",
|
||||
|
|
@ -1742,8 +1742,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List all OpenAI responses.",
|
||||
"description": "List all OpenAI responses.",
|
||||
"summary": "List all responses.",
|
||||
"description": "List all responses.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -1817,8 +1817,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Create a new OpenAI response.",
|
||||
"description": "Create a new OpenAI response.",
|
||||
"summary": "Create a model response.",
|
||||
"description": "Create a model response.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1830,7 +1830,27 @@
|
|||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
"deprecated": false,
|
||||
"x-llama-stack-extra-body-params": [
|
||||
{
|
||||
"name": "shields",
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ResponseShieldSpec"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "List of shields to apply during response generation. Shields provide safety and content moderation.",
|
||||
"required": false
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/responses/{response_id}": {
|
||||
|
|
@ -1862,8 +1882,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Retrieve an OpenAI response by its ID.",
|
||||
"description": "Retrieve an OpenAI response by its ID.",
|
||||
"summary": "Get a model response.",
|
||||
"description": "Get a model response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -1905,8 +1925,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Delete an OpenAI response by its ID.",
|
||||
"description": "Delete an OpenAI response by its ID.",
|
||||
"summary": "Delete a response.",
|
||||
"description": "Delete a response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -1950,8 +1970,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List input items for a given OpenAI response.",
|
||||
"description": "List input items for a given OpenAI response.",
|
||||
"summary": "List input items.",
|
||||
"description": "List input items.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -2043,8 +2063,8 @@
|
|||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"summary": "Run a shield.",
|
||||
"description": "Run a shield.",
|
||||
"summary": "Run shield.",
|
||||
"description": "Run shield.\nRun a shield.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -4176,8 +4196,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "Get the version of the service.",
|
||||
"description": "Get the version of the service.",
|
||||
"summary": "Get version.",
|
||||
"description": "Get version.\nGet the version of the service.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -9625,6 +9645,21 @@
|
|||
"title": "OpenAIResponseText",
|
||||
"description": "Text response configuration for OpenAI responses."
|
||||
},
|
||||
"ResponseShieldSpec": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The type/identifier of the shield."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "ResponseShieldSpec",
|
||||
"description": "Specification for a shield to apply during response generation."
|
||||
},
|
||||
"OpenAIResponseInputTool": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
|
@ -18452,16 +18487,18 @@
|
|||
},
|
||||
{
|
||||
"name": "Files",
|
||||
"description": ""
|
||||
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||
"x-displayName": "Files"
|
||||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "Inspect",
|
||||
"description": ""
|
||||
"description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
|
||||
"x-displayName": "Inspect"
|
||||
},
|
||||
{
|
||||
"name": "Models",
|
||||
|
|
@ -18473,17 +18510,18 @@
|
|||
},
|
||||
{
|
||||
"name": "Prompts",
|
||||
"description": "",
|
||||
"x-displayName": "Protocol for prompt management operations."
|
||||
"description": "Protocol for prompt management operations.",
|
||||
"x-displayName": "Prompts"
|
||||
},
|
||||
{
|
||||
"name": "Providers",
|
||||
"description": "",
|
||||
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
||||
"description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
|
||||
"x-displayName": "Providers"
|
||||
},
|
||||
{
|
||||
"name": "Safety",
|
||||
"description": ""
|
||||
"description": "OpenAI-compatible Moderations API.",
|
||||
"x-displayName": "Safety"
|
||||
},
|
||||
{
|
||||
"name": "Scoring",
|
||||
|
|
|
|||
227
docs/static/stainless-llama-stack-spec.yaml
vendored
227
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -36,8 +36,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: List all chat completions.
|
||||
description: List all chat completions.
|
||||
summary: List chat completions.
|
||||
description: List chat completions.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -90,10 +90,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
summary: Create chat completions.
|
||||
description: >-
|
||||
Create chat completions.
|
||||
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
parameters: []
|
||||
|
|
@ -125,8 +125,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: Describe a chat completion by its ID.
|
||||
description: Describe a chat completion by its ID.
|
||||
summary: Get chat completion.
|
||||
description: >-
|
||||
Get chat completion.
|
||||
|
||||
Describe a chat completion by its ID.
|
||||
parameters:
|
||||
- name: completion_id
|
||||
in: path
|
||||
|
|
@ -156,10 +159,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
summary: Create completion.
|
||||
description: >-
|
||||
Create completion.
|
||||
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -606,10 +609,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
summary: Create embeddings.
|
||||
description: >-
|
||||
Create embeddings.
|
||||
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -642,9 +645,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns a list of files that belong to the user's organization.
|
||||
summary: List files.
|
||||
description: >-
|
||||
List files.
|
||||
|
||||
Returns a list of files that belong to the user's organization.
|
||||
parameters:
|
||||
- name: after
|
||||
|
|
@ -702,11 +706,13 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Upload a file that can be used across various endpoints.
|
||||
summary: Upload file.
|
||||
description: >-
|
||||
Upload file.
|
||||
|
||||
Upload a file that can be used across various endpoints.
|
||||
|
||||
|
||||
The file upload should be a multipart form request with:
|
||||
|
||||
- file: The File object (not file name) to be uploaded.
|
||||
|
|
@ -755,9 +761,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns information about a specific file.
|
||||
summary: Retrieve file.
|
||||
description: >-
|
||||
Retrieve file.
|
||||
|
||||
Returns information about a specific file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -789,8 +796,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: Delete a file.
|
||||
description: Delete a file.
|
||||
summary: Delete file.
|
||||
description: Delete file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
in: path
|
||||
|
|
@ -822,9 +829,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns the contents of the specified file.
|
||||
summary: Retrieve file content.
|
||||
description: >-
|
||||
Retrieve file content.
|
||||
|
||||
Returns the contents of the specified file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -857,9 +865,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: >-
|
||||
Get the current health status of the service.
|
||||
summary: Get health status.
|
||||
description: >-
|
||||
Get health status.
|
||||
|
||||
Get the current health status of the service.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
|
|
@ -885,9 +894,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: >-
|
||||
List all available API routes with their methods and implementing providers.
|
||||
summary: List routes.
|
||||
description: >-
|
||||
List routes.
|
||||
|
||||
List all available API routes with their methods and implementing providers.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
|
|
@ -936,8 +946,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Register a model.
|
||||
description: Register a model.
|
||||
summary: Register model.
|
||||
description: >-
|
||||
Register model.
|
||||
|
||||
Register a model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -967,8 +980,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Get a model by its identifier.
|
||||
description: Get a model by its identifier.
|
||||
summary: Get model.
|
||||
description: >-
|
||||
Get model.
|
||||
|
||||
Get a model by its identifier.
|
||||
parameters:
|
||||
- name: model_id
|
||||
in: path
|
||||
|
|
@ -993,8 +1009,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Unregister a model.
|
||||
description: Unregister a model.
|
||||
summary: Unregister model.
|
||||
description: >-
|
||||
Unregister model.
|
||||
|
||||
Unregister a model.
|
||||
parameters:
|
||||
- name: model_id
|
||||
in: path
|
||||
|
|
@ -1025,9 +1044,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
summary: >-
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
summary: Create moderation.
|
||||
description: >-
|
||||
Create moderation.
|
||||
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
parameters: []
|
||||
requestBody:
|
||||
|
|
@ -1083,8 +1103,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: Create a new prompt.
|
||||
description: Create a new prompt.
|
||||
summary: Create prompt.
|
||||
description: >-
|
||||
Create prompt.
|
||||
|
||||
Create a new prompt.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -1114,9 +1137,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Get a prompt by its identifier and optional version.
|
||||
summary: Get prompt.
|
||||
description: >-
|
||||
Get prompt.
|
||||
|
||||
Get a prompt by its identifier and optional version.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1154,9 +1178,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Update an existing prompt (increments version).
|
||||
summary: Update prompt.
|
||||
description: >-
|
||||
Update prompt.
|
||||
|
||||
Update an existing prompt (increments version).
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1188,8 +1213,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: Delete a prompt.
|
||||
description: Delete a prompt.
|
||||
summary: Delete prompt.
|
||||
description: >-
|
||||
Delete prompt.
|
||||
|
||||
Delete a prompt.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
|
|
@ -1220,9 +1248,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
summary: Set prompt version.
|
||||
description: >-
|
||||
Set prompt version.
|
||||
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1260,8 +1289,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: List all versions of a specific prompt.
|
||||
description: List all versions of a specific prompt.
|
||||
summary: List prompt versions.
|
||||
description: >-
|
||||
List prompt versions.
|
||||
|
||||
List all versions of a specific prompt.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
|
|
@ -1293,8 +1325,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Providers
|
||||
summary: List all available providers.
|
||||
description: List all available providers.
|
||||
summary: List providers.
|
||||
description: >-
|
||||
List providers.
|
||||
|
||||
List all available providers.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
/v1/providers/{provider_id}:
|
||||
|
|
@ -1319,9 +1354,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Providers
|
||||
summary: >-
|
||||
Get detailed information about a specific provider.
|
||||
summary: Get provider.
|
||||
description: >-
|
||||
Get provider.
|
||||
|
||||
Get detailed information about a specific provider.
|
||||
parameters:
|
||||
- name: provider_id
|
||||
|
|
@ -1352,8 +1388,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: List all OpenAI responses.
|
||||
description: List all OpenAI responses.
|
||||
summary: List all responses.
|
||||
description: List all responses.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -1404,8 +1440,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Create a new OpenAI response.
|
||||
description: Create a new OpenAI response.
|
||||
summary: Create a model response.
|
||||
description: Create a model response.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -1414,6 +1450,18 @@ paths:
|
|||
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
x-llama-stack-extra-body-params:
|
||||
- name: shields
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ResponseShieldSpec'
|
||||
description: >-
|
||||
List of shields to apply during response generation. Shields provide safety
|
||||
and content moderation.
|
||||
required: false
|
||||
/v1/responses/{response_id}:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -1435,8 +1483,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Retrieve an OpenAI response by its ID.
|
||||
description: Retrieve an OpenAI response by its ID.
|
||||
summary: Get a model response.
|
||||
description: Get a model response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1466,8 +1514,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Delete an OpenAI response by its ID.
|
||||
description: Delete an OpenAI response by its ID.
|
||||
summary: Delete a response.
|
||||
description: Delete a response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1497,10 +1545,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: >-
|
||||
List input items for a given OpenAI response.
|
||||
description: >-
|
||||
List input items for a given OpenAI response.
|
||||
summary: List input items.
|
||||
description: List input items.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1569,8 +1615,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
summary: Run a shield.
|
||||
description: Run a shield.
|
||||
summary: Run shield.
|
||||
description: >-
|
||||
Run shield.
|
||||
|
||||
Run a shield.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -3126,8 +3175,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: Get the version of the service.
|
||||
description: Get the version of the service.
|
||||
summary: Get version.
|
||||
description: >-
|
||||
Get version.
|
||||
|
||||
Get the version of the service.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
/v1beta/datasetio/append-rows/{dataset_id}:
|
||||
|
|
@ -7184,6 +7236,18 @@ components:
|
|||
title: OpenAIResponseText
|
||||
description: >-
|
||||
Text response configuration for OpenAI responses.
|
||||
ResponseShieldSpec:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
description: The type/identifier of the shield.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: ResponseShieldSpec
|
||||
description: >-
|
||||
Specification for a shield to apply during response generation.
|
||||
OpenAIResponseInputTool:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
|
||||
|
|
@ -13771,9 +13835,16 @@ tags:
|
|||
x-displayName: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
- name: Files
|
||||
description: ''
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
APIs.
|
||||
x-displayName: Files
|
||||
- name: Inference
|
||||
description: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
are supported:
|
||||
|
||||
|
|
@ -13781,25 +13852,27 @@ tags:
|
|||
|
||||
- Embedding models: these models generate embeddings to be used for semantic
|
||||
search.
|
||||
x-displayName: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
x-displayName: Inference
|
||||
- name: Inspect
|
||||
description: ''
|
||||
description: >-
|
||||
APIs for inspecting the Llama Stack service, including health status, available
|
||||
API routes with methods and implementing providers.
|
||||
x-displayName: Inspect
|
||||
- name: Models
|
||||
description: ''
|
||||
- name: PostTraining (Coming Soon)
|
||||
description: ''
|
||||
- name: Prompts
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Protocol for prompt management operations.
|
||||
x-displayName: Prompts
|
||||
- name: Providers
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||
x-displayName: Providers
|
||||
- name: Safety
|
||||
description: ''
|
||||
description: OpenAI-compatible Moderations API.
|
||||
x-displayName: Safety
|
||||
- name: Scoring
|
||||
description: ''
|
||||
- name: ScoringFunctions
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ from llama_stack.apis.inference import (
|
|||
from llama_stack.apis.safety import SafetyViolation
|
||||
from llama_stack.apis.tools import ToolDef
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
from llama_stack.schema_utils import ExtraBodyField, json_schema_type, register_schema, webmethod
|
||||
|
||||
from .openai_responses import (
|
||||
ListOpenAIResponseInputItem,
|
||||
|
|
@ -42,6 +42,20 @@ from .openai_responses import (
|
|||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ResponseShieldSpec(BaseModel):
|
||||
"""Specification for a shield to apply during response generation.
|
||||
|
||||
:param type: The type/identifier of the shield.
|
||||
"""
|
||||
|
||||
type: str
|
||||
# TODO: more fields to be added for shield configuration
|
||||
|
||||
|
||||
ResponseShield = str | ResponseShieldSpec
|
||||
|
||||
|
||||
class Attachment(BaseModel):
|
||||
"""An attachment to an agent turn.
|
||||
|
||||
|
|
@ -783,7 +797,7 @@ class Agents(Protocol):
|
|||
self,
|
||||
response_id: str,
|
||||
) -> OpenAIResponseObject:
|
||||
"""Retrieve an OpenAI response by its ID.
|
||||
"""Get a model response.
|
||||
|
||||
:param response_id: The ID of the OpenAI response to retrieve.
|
||||
:returns: An OpenAIResponseObject.
|
||||
|
|
@ -805,13 +819,20 @@ class Agents(Protocol):
|
|||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
||||
shields: Annotated[
|
||||
list[ResponseShield] | None,
|
||||
ExtraBodyField(
|
||||
"List of shields to apply during response generation. Shields provide safety and content moderation."
|
||||
),
|
||||
] = None,
|
||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||
"""Create a new OpenAI response.
|
||||
"""Create a model response.
|
||||
|
||||
:param input: Input message(s) to create the response.
|
||||
:param model: The underlying LLM used for completions.
|
||||
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
||||
:param include: (Optional) Additional fields to include in the response.
|
||||
:param shields: (Optional) List of shields to apply during response generation. Can be shield IDs (strings) or shield specifications.
|
||||
:returns: An OpenAIResponseObject.
|
||||
"""
|
||||
...
|
||||
|
|
@ -825,7 +846,7 @@ class Agents(Protocol):
|
|||
model: str | None = None,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIResponseObject:
|
||||
"""List all OpenAI responses.
|
||||
"""List all responses.
|
||||
|
||||
:param after: The ID of the last response to return.
|
||||
:param limit: The number of responses to return.
|
||||
|
|
@ -848,7 +869,7 @@ class Agents(Protocol):
|
|||
limit: int | None = 20,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIResponseInputItem:
|
||||
"""List input items for a given OpenAI response.
|
||||
"""List input items.
|
||||
|
||||
:param response_id: The ID of the response to retrieve input items for.
|
||||
:param after: An item ID to list items after, used for pagination.
|
||||
|
|
@ -863,7 +884,7 @@ class Agents(Protocol):
|
|||
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
"""Delete an OpenAI response by its ID.
|
||||
"""Delete a response.
|
||||
|
||||
:param response_id: The ID of the OpenAI response to delete.
|
||||
:returns: An OpenAIDeleteResponseObject
|
||||
|
|
|
|||
|
|
@ -104,6 +104,11 @@ class OpenAIFileDeleteResponse(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Files(Protocol):
|
||||
"""Files
|
||||
|
||||
This API is used to upload documents that can be used with other Llama Stack APIs.
|
||||
"""
|
||||
|
||||
# OpenAI Files API Endpoints
|
||||
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||
|
|
@ -113,7 +118,8 @@ class Files(Protocol):
|
|||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||
expires_after: Annotated[ExpiresAfter | None, Form()] = None,
|
||||
) -> OpenAIFileObject:
|
||||
"""
|
||||
"""Upload file.
|
||||
|
||||
Upload a file that can be used across various endpoints.
|
||||
|
||||
The file upload should be a multipart form request with:
|
||||
|
|
@ -137,7 +143,8 @@ class Files(Protocol):
|
|||
order: Order | None = Order.desc,
|
||||
purpose: OpenAIFilePurpose | None = None,
|
||||
) -> ListOpenAIFileResponse:
|
||||
"""
|
||||
"""List files.
|
||||
|
||||
Returns a list of files that belong to the user's organization.
|
||||
|
||||
:param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
|
||||
|
|
@ -154,7 +161,8 @@ class Files(Protocol):
|
|||
self,
|
||||
file_id: str,
|
||||
) -> OpenAIFileObject:
|
||||
"""
|
||||
"""Retrieve file.
|
||||
|
||||
Returns information about a specific file.
|
||||
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
|
|
@ -168,8 +176,7 @@ class Files(Protocol):
|
|||
self,
|
||||
file_id: str,
|
||||
) -> OpenAIFileDeleteResponse:
|
||||
"""
|
||||
Delete a file.
|
||||
"""Delete file.
|
||||
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
:returns: An OpenAIFileDeleteResponse indicating successful deletion.
|
||||
|
|
@ -182,7 +189,8 @@ class Files(Protocol):
|
|||
self,
|
||||
file_id: str,
|
||||
) -> Response:
|
||||
"""
|
||||
"""Retrieve file content.
|
||||
|
||||
Returns the contents of the specified file.
|
||||
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
|
|
|
|||
|
|
@ -1053,7 +1053,9 @@ class InferenceProvider(Protocol):
|
|||
# for fill-in-the-middle type completion
|
||||
suffix: str | None = None,
|
||||
) -> OpenAICompletion:
|
||||
"""Generate an OpenAI-compatible completion for the given prompt using the specified model.
|
||||
"""Create completion.
|
||||
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified model.
|
||||
|
||||
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||
:param prompt: The prompt to generate a completion for.
|
||||
|
|
@ -1105,7 +1107,9 @@ class InferenceProvider(Protocol):
|
|||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
"""Generate an OpenAI-compatible chat completion for the given messages using the specified model.
|
||||
"""Create chat completions.
|
||||
|
||||
Generate an OpenAI-compatible chat completion for the given messages using the specified model.
|
||||
|
||||
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||
:param messages: List of messages in the conversation.
|
||||
|
|
@ -1144,7 +1148,9 @@ class InferenceProvider(Protocol):
|
|||
dimensions: int | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIEmbeddingsResponse:
|
||||
"""Generate OpenAI-compatible embeddings for the given input using the specified model.
|
||||
"""Create embeddings.
|
||||
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified model.
|
||||
|
||||
:param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
|
||||
:param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
|
||||
|
|
@ -1157,7 +1163,9 @@ class InferenceProvider(Protocol):
|
|||
|
||||
|
||||
class Inference(InferenceProvider):
|
||||
"""Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
"""Inference
|
||||
|
||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||
|
|
@ -1173,7 +1181,7 @@ class Inference(InferenceProvider):
|
|||
model: str | None = None,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIChatCompletionResponse:
|
||||
"""List all chat completions.
|
||||
"""List chat completions.
|
||||
|
||||
:param after: The ID of the last chat completion to return.
|
||||
:param limit: The maximum number of chat completions to return.
|
||||
|
|
@ -1188,7 +1196,9 @@ class Inference(InferenceProvider):
|
|||
)
|
||||
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
||||
"""Describe a chat completion by its ID.
|
||||
"""Get chat completion.
|
||||
|
||||
Describe a chat completion by its ID.
|
||||
|
||||
:param completion_id: ID of the chat completion.
|
||||
:returns: A OpenAICompletionWithInputMessages.
|
||||
|
|
|
|||
|
|
@ -58,9 +58,16 @@ class ListRoutesResponse(BaseModel):
|
|||
|
||||
@runtime_checkable
|
||||
class Inspect(Protocol):
|
||||
"""Inspect
|
||||
|
||||
APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
|
||||
"""
|
||||
|
||||
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_routes(self) -> ListRoutesResponse:
|
||||
"""List all available API routes with their methods and implementing providers.
|
||||
"""List routes.
|
||||
|
||||
List all available API routes with their methods and implementing providers.
|
||||
|
||||
:returns: Response containing information about all available routes.
|
||||
"""
|
||||
|
|
@ -68,7 +75,9 @@ class Inspect(Protocol):
|
|||
|
||||
@webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def health(self) -> HealthInfo:
|
||||
"""Get the current health status of the service.
|
||||
"""Get health status.
|
||||
|
||||
Get the current health status of the service.
|
||||
|
||||
:returns: Health information indicating if the service is operational.
|
||||
"""
|
||||
|
|
@ -76,7 +85,9 @@ class Inspect(Protocol):
|
|||
|
||||
@webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def version(self) -> VersionInfo:
|
||||
"""Get the version of the service.
|
||||
"""Get version.
|
||||
|
||||
Get the version of the service.
|
||||
|
||||
:returns: Version information containing the service version number.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -124,7 +124,9 @@ class Models(Protocol):
|
|||
self,
|
||||
model_id: str,
|
||||
) -> Model:
|
||||
"""Get a model by its identifier.
|
||||
"""Get model.
|
||||
|
||||
Get a model by its identifier.
|
||||
|
||||
:param model_id: The identifier of the model to get.
|
||||
:returns: A Model.
|
||||
|
|
@ -140,7 +142,9 @@ class Models(Protocol):
|
|||
metadata: dict[str, Any] | None = None,
|
||||
model_type: ModelType | None = None,
|
||||
) -> Model:
|
||||
"""Register a model.
|
||||
"""Register model.
|
||||
|
||||
Register a model.
|
||||
|
||||
:param model_id: The identifier of the model to register.
|
||||
:param provider_model_id: The identifier of the model in the provider.
|
||||
|
|
@ -156,7 +160,9 @@ class Models(Protocol):
|
|||
self,
|
||||
model_id: str,
|
||||
) -> None:
|
||||
"""Unregister a model.
|
||||
"""Unregister model.
|
||||
|
||||
Unregister a model.
|
||||
|
||||
:param model_id: The identifier of the model to unregister.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -94,7 +94,9 @@ class ListPromptsResponse(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Prompts(Protocol):
|
||||
"""Protocol for prompt management operations."""
|
||||
"""Prompts
|
||||
|
||||
Protocol for prompt management operations."""
|
||||
|
||||
@webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_prompts(self) -> ListPromptsResponse:
|
||||
|
|
@ -109,7 +111,9 @@ class Prompts(Protocol):
|
|||
self,
|
||||
prompt_id: str,
|
||||
) -> ListPromptsResponse:
|
||||
"""List all versions of a specific prompt.
|
||||
"""List prompt versions.
|
||||
|
||||
List all versions of a specific prompt.
|
||||
|
||||
:param prompt_id: The identifier of the prompt to list versions for.
|
||||
:returns: A ListPromptsResponse containing all versions of the prompt.
|
||||
|
|
@ -122,7 +126,9 @@ class Prompts(Protocol):
|
|||
prompt_id: str,
|
||||
version: int | None = None,
|
||||
) -> Prompt:
|
||||
"""Get a prompt by its identifier and optional version.
|
||||
"""Get prompt.
|
||||
|
||||
Get a prompt by its identifier and optional version.
|
||||
|
||||
:param prompt_id: The identifier of the prompt to get.
|
||||
:param version: The version of the prompt to get (defaults to latest).
|
||||
|
|
@ -136,7 +142,9 @@ class Prompts(Protocol):
|
|||
prompt: str,
|
||||
variables: list[str] | None = None,
|
||||
) -> Prompt:
|
||||
"""Create a new prompt.
|
||||
"""Create prompt.
|
||||
|
||||
Create a new prompt.
|
||||
|
||||
:param prompt: The prompt text content with variable placeholders.
|
||||
:param variables: List of variable names that can be used in the prompt template.
|
||||
|
|
@ -153,7 +161,9 @@ class Prompts(Protocol):
|
|||
variables: list[str] | None = None,
|
||||
set_as_default: bool = True,
|
||||
) -> Prompt:
|
||||
"""Update an existing prompt (increments version).
|
||||
"""Update prompt.
|
||||
|
||||
Update an existing prompt (increments version).
|
||||
|
||||
:param prompt_id: The identifier of the prompt to update.
|
||||
:param prompt: The updated prompt text content.
|
||||
|
|
@ -169,7 +179,9 @@ class Prompts(Protocol):
|
|||
self,
|
||||
prompt_id: str,
|
||||
) -> None:
|
||||
"""Delete a prompt.
|
||||
"""Delete prompt.
|
||||
|
||||
Delete a prompt.
|
||||
|
||||
:param prompt_id: The identifier of the prompt to delete.
|
||||
"""
|
||||
|
|
@ -181,7 +193,9 @@ class Prompts(Protocol):
|
|||
prompt_id: str,
|
||||
version: int,
|
||||
) -> Prompt:
|
||||
"""Set which version of a prompt should be the default in get_prompt (latest).
|
||||
"""Set prompt version.
|
||||
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
|
||||
:param prompt_id: The identifier of the prompt.
|
||||
:param version: The version to set as default.
|
||||
|
|
|
|||
|
|
@ -42,13 +42,16 @@ class ListProvidersResponse(BaseModel):
|
|||
|
||||
@runtime_checkable
|
||||
class Providers(Protocol):
|
||||
"""
|
||||
"""Providers
|
||||
|
||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||
"""
|
||||
|
||||
@webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_providers(self) -> ListProvidersResponse:
|
||||
"""List all available providers.
|
||||
"""List providers.
|
||||
|
||||
List all available providers.
|
||||
|
||||
:returns: A ListProvidersResponse containing information about all providers.
|
||||
"""
|
||||
|
|
@ -56,7 +59,9 @@ class Providers(Protocol):
|
|||
|
||||
@webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def inspect_provider(self, provider_id: str) -> ProviderInfo:
|
||||
"""Get detailed information about a specific provider.
|
||||
"""Get provider.
|
||||
|
||||
Get detailed information about a specific provider.
|
||||
|
||||
:param provider_id: The ID of the provider to inspect.
|
||||
:returns: A ProviderInfo object containing the provider's details.
|
||||
|
|
|
|||
|
|
@ -96,6 +96,11 @@ class ShieldStore(Protocol):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Safety(Protocol):
|
||||
"""Safety
|
||||
|
||||
OpenAI-compatible Moderations API.
|
||||
"""
|
||||
|
||||
shield_store: ShieldStore
|
||||
|
||||
@webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
|
||||
|
|
@ -105,7 +110,9 @@ class Safety(Protocol):
|
|||
messages: list[Message],
|
||||
params: dict[str, Any],
|
||||
) -> RunShieldResponse:
|
||||
"""Run a shield.
|
||||
"""Run shield.
|
||||
|
||||
Run a shield.
|
||||
|
||||
:param shield_id: The identifier of the shield to run.
|
||||
:param messages: The messages to run the shield on.
|
||||
|
|
@ -117,7 +124,9 @@ class Safety(Protocol):
|
|||
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
||||
"""Classifies if text and/or image inputs are potentially harmful.
|
||||
"""Create moderation.
|
||||
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
:param input: Input (or inputs) to classify.
|
||||
Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
|
||||
:param model: The content moderation model you would like to use.
|
||||
|
|
|
|||
|
|
@ -6,11 +6,18 @@
|
|||
|
||||
import argparse
|
||||
import os
|
||||
import ssl
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import uvicorn
|
||||
import yaml
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
from llama_stack.core.datatypes import LoggingConfig, StackRunConfig
|
||||
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars, validate_env_pair
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
||||
|
|
@ -146,23 +153,7 @@ class StackRun(Subcommand):
|
|||
# using the current environment packages.
|
||||
if not image_type and not image_name:
|
||||
logger.info("No image type or image name provided. Assuming environment packages.")
|
||||
from llama_stack.core.server.server import main as server_main
|
||||
|
||||
# Build the server args from the current args passed to the CLI
|
||||
server_args = argparse.Namespace()
|
||||
for arg in vars(args):
|
||||
# If this is a function, avoid passing it
|
||||
# "args" contains:
|
||||
# func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
|
||||
if callable(getattr(args, arg)):
|
||||
continue
|
||||
if arg == "config":
|
||||
server_args.config = str(config_file)
|
||||
else:
|
||||
setattr(server_args, arg, getattr(args, arg))
|
||||
|
||||
# Run the server
|
||||
server_main(server_args)
|
||||
self._uvicorn_run(config_file, args)
|
||||
else:
|
||||
run_args = formulate_run_args(image_type, image_name)
|
||||
|
||||
|
|
@ -184,6 +175,76 @@ class StackRun(Subcommand):
|
|||
|
||||
run_command(run_args)
|
||||
|
||||
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
||||
if not config_file:
|
||||
self.parser.error("Config file is required")
|
||||
|
||||
# Set environment variables if provided
|
||||
if args.env:
|
||||
for env_pair in args.env:
|
||||
try:
|
||||
key, value = validate_env_pair(env_pair)
|
||||
logger.info(f"Setting environment variable {key} => {value}")
|
||||
os.environ[key] = value
|
||||
except ValueError as e:
|
||||
logger.error(f"Error: {str(e)}")
|
||||
self.parser.error(f"Invalid environment variable format: {env_pair}")
|
||||
|
||||
config_file = resolve_config_or_distro(str(config_file), Mode.RUN)
|
||||
with open(config_file) as fp:
|
||||
config_contents = yaml.safe_load(fp)
|
||||
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
||||
logger_config = LoggingConfig(**cfg)
|
||||
else:
|
||||
logger_config = None
|
||||
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
||||
|
||||
port = args.port or config.server.port
|
||||
host = config.server.host or ["::", "0.0.0.0"]
|
||||
|
||||
# Set the config file in environment so create_app can find it
|
||||
os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
|
||||
|
||||
uvicorn_config = {
|
||||
"factory": True,
|
||||
"host": host,
|
||||
"port": port,
|
||||
"lifespan": "on",
|
||||
"log_level": logger.getEffectiveLevel(),
|
||||
"log_config": logger_config,
|
||||
}
|
||||
|
||||
keyfile = config.server.tls_keyfile
|
||||
certfile = config.server.tls_certfile
|
||||
if keyfile and certfile:
|
||||
uvicorn_config["ssl_keyfile"] = config.server.tls_keyfile
|
||||
uvicorn_config["ssl_certfile"] = config.server.tls_certfile
|
||||
if config.server.tls_cafile:
|
||||
uvicorn_config["ssl_ca_certs"] = config.server.tls_cafile
|
||||
uvicorn_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
||||
|
||||
logger.info(
|
||||
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
||||
)
|
||||
else:
|
||||
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
||||
|
||||
logger.info(f"Listening on {host}:{port}")
|
||||
|
||||
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
||||
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
||||
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
||||
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
||||
# SIGTERM (kill -15) works fine without this because Python doesn't
|
||||
# have a default handler for it.
|
||||
#
|
||||
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
||||
# signal handling but this is quite intrusive and not worth the effort.
|
||||
try:
|
||||
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config)
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
logger.info("Received interrupt signal, shutting down gracefully...")
|
||||
|
||||
def _start_ui_development_server(self, stack_server_port: int):
|
||||
logger.info("Attempting to start UI development server...")
|
||||
# Check if npm is available
|
||||
|
|
|
|||
|
|
@ -324,14 +324,14 @@ fi
|
|||
RUN pip uninstall -y uv
|
||||
EOF
|
||||
|
||||
# If a run config is provided, we use the --config flag
|
||||
# If a run config is provided, we use the llama stack CLI
|
||||
if [[ -n "$run_config" ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$RUN_CONFIG_PATH"]
|
||||
ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"]
|
||||
EOF
|
||||
elif [[ "$distro_or_config" != *.yaml ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$distro_or_config"]
|
||||
ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"]
|
||||
EOF
|
||||
fi
|
||||
|
||||
|
|
|
|||
|
|
@ -243,6 +243,7 @@ def get_external_providers_from_module(
|
|||
spec = module.get_provider_spec()
|
||||
else:
|
||||
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
|
||||
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
||||
spec = ProviderSpec(
|
||||
api=Api(provider_api),
|
||||
provider_type=provider.provider_type,
|
||||
|
|
@ -251,9 +252,20 @@ def get_external_providers_from_module(
|
|||
config_class="",
|
||||
)
|
||||
provider_type = provider.provider_type
|
||||
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
||||
# return a partially filled out spec that the build script will populate.
|
||||
registry[Api(provider_api)][provider_type] = spec
|
||||
if isinstance(spec, list):
|
||||
# optionally allow people to pass inline and remote provider specs as a returned list.
|
||||
# with the old method, users could pass in directories of specs using overlapping code
|
||||
# we want to ensure we preserve that flexibility in this method.
|
||||
logger.info(
|
||||
f"Detected a list of external provider specs from {provider.module} adding all to the registry"
|
||||
)
|
||||
for provider_spec in spec:
|
||||
if provider_spec.provider_type != provider.provider_type:
|
||||
continue
|
||||
logger.info(f"Adding {provider.provider_type} to registry")
|
||||
registry[Api(provider_api)][provider.provider_type] = provider_spec
|
||||
else:
|
||||
registry[Api(provider_api)][provider_type] = spec
|
||||
except ModuleNotFoundError as exc:
|
||||
raise ValueError(
|
||||
"get_provider_spec not found. If specifying an external provider via `module` in the Provider spec, the Provider must have the `provider.get_provider_spec` module available"
|
||||
|
|
|
|||
|
|
@ -374,6 +374,10 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
body = options.params or {}
|
||||
body |= options.json_data or {}
|
||||
|
||||
# Merge extra_json parameters (extra_body from SDK is converted to extra_json)
|
||||
if hasattr(options, "extra_json") and options.extra_json:
|
||||
body |= options.extra_json
|
||||
|
||||
matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
|
||||
body |= path_params
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import functools
|
||||
|
|
@ -12,7 +11,6 @@ import inspect
|
|||
import json
|
||||
import logging # allow-direct-logging
|
||||
import os
|
||||
import ssl
|
||||
import sys
|
||||
import traceback
|
||||
import warnings
|
||||
|
|
@ -35,7 +33,6 @@ from pydantic import BaseModel, ValidationError
|
|||
|
||||
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.cli.utils import add_config_distro_args, get_config_from_args
|
||||
from llama_stack.core.access_control.access_control import AccessDeniedError
|
||||
from llama_stack.core.datatypes import (
|
||||
AuthenticationRequiredError,
|
||||
|
|
@ -55,7 +52,6 @@ from llama_stack.core.stack import (
|
|||
Stack,
|
||||
cast_image_name_to_string,
|
||||
replace_env_vars,
|
||||
validate_env_pair,
|
||||
)
|
||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
|
|
@ -333,23 +329,18 @@ class ClientVersionMiddleware:
|
|||
return await self.app(scope, receive, send)
|
||||
|
||||
|
||||
def create_app(
|
||||
config_file: str | None = None,
|
||||
env_vars: list[str] | None = None,
|
||||
) -> StackApp:
|
||||
def create_app() -> StackApp:
|
||||
"""Create and configure the FastAPI application.
|
||||
|
||||
Args:
|
||||
config_file: Path to config file. If None, uses LLAMA_STACK_CONFIG env var or default resolution.
|
||||
env_vars: List of environment variables in KEY=value format.
|
||||
disable_version_check: Whether to disable version checking. If None, uses LLAMA_STACK_DISABLE_VERSION_CHECK env var.
|
||||
This factory function reads configuration from environment variables:
|
||||
- LLAMA_STACK_CONFIG: Path to config file (required)
|
||||
|
||||
Returns:
|
||||
Configured StackApp instance.
|
||||
"""
|
||||
config_file = config_file or os.getenv("LLAMA_STACK_CONFIG")
|
||||
config_file = os.getenv("LLAMA_STACK_CONFIG")
|
||||
if config_file is None:
|
||||
raise ValueError("No config file provided and LLAMA_STACK_CONFIG env var is not set")
|
||||
raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
|
||||
|
||||
config_file = resolve_config_or_distro(config_file, Mode.RUN)
|
||||
|
||||
|
|
@ -361,16 +352,6 @@ def create_app(
|
|||
logger_config = LoggingConfig(**cfg)
|
||||
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
||||
|
||||
if env_vars:
|
||||
for env_pair in env_vars:
|
||||
try:
|
||||
key, value = validate_env_pair(env_pair)
|
||||
logger.info(f"Setting environment variable {key} => {value}")
|
||||
os.environ[key] = value
|
||||
except ValueError as e:
|
||||
logger.error(f"Error: {str(e)}")
|
||||
raise ValueError(f"Invalid environment variable format: {env_pair}") from e
|
||||
|
||||
config = replace_env_vars(config_contents)
|
||||
config = StackRunConfig(**cast_image_name_to_string(config))
|
||||
|
||||
|
|
@ -494,101 +475,6 @@ def create_app(
|
|||
return app
|
||||
|
||||
|
||||
def main(args: argparse.Namespace | None = None):
|
||||
"""Start the LlamaStack server."""
|
||||
parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
|
||||
|
||||
add_config_distro_args(parser)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=int(os.getenv("LLAMA_STACK_PORT", 8321)),
|
||||
help="Port to listen on",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--env",
|
||||
action="append",
|
||||
help="Environment variables in KEY=value format. Can be specified multiple times.",
|
||||
)
|
||||
|
||||
# Determine whether the server args are being passed by the "run" command, if this is the case
|
||||
# the args will be passed as a Namespace object to the main function, otherwise they will be
|
||||
# parsed from the command line
|
||||
if args is None:
|
||||
args = parser.parse_args()
|
||||
|
||||
config_or_distro = get_config_from_args(args)
|
||||
|
||||
try:
|
||||
app = create_app(
|
||||
config_file=config_or_distro,
|
||||
env_vars=args.env,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating app: {str(e)}")
|
||||
sys.exit(1)
|
||||
|
||||
config_file = resolve_config_or_distro(config_or_distro, Mode.RUN)
|
||||
with open(config_file) as fp:
|
||||
config_contents = yaml.safe_load(fp)
|
||||
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
||||
logger_config = LoggingConfig(**cfg)
|
||||
else:
|
||||
logger_config = None
|
||||
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
||||
|
||||
import uvicorn
|
||||
|
||||
# Configure SSL if certificates are provided
|
||||
port = args.port or config.server.port
|
||||
|
||||
ssl_config = None
|
||||
keyfile = config.server.tls_keyfile
|
||||
certfile = config.server.tls_certfile
|
||||
|
||||
if keyfile and certfile:
|
||||
ssl_config = {
|
||||
"ssl_keyfile": keyfile,
|
||||
"ssl_certfile": certfile,
|
||||
}
|
||||
if config.server.tls_cafile:
|
||||
ssl_config["ssl_ca_certs"] = config.server.tls_cafile
|
||||
ssl_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
||||
logger.info(
|
||||
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
||||
)
|
||||
else:
|
||||
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
||||
|
||||
listen_host = config.server.host or ["::", "0.0.0.0"]
|
||||
logger.info(f"Listening on {listen_host}:{port}")
|
||||
|
||||
uvicorn_config = {
|
||||
"app": app,
|
||||
"host": listen_host,
|
||||
"port": port,
|
||||
"lifespan": "on",
|
||||
"log_level": logger.getEffectiveLevel(),
|
||||
"log_config": logger_config,
|
||||
}
|
||||
if ssl_config:
|
||||
uvicorn_config.update(ssl_config)
|
||||
|
||||
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
||||
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
||||
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
||||
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
||||
# SIGTERM (kill -15) works fine without this because Python doesn't
|
||||
# have a default handler for it.
|
||||
#
|
||||
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
||||
# signal handling but this is quite intrusive and not worth the effort.
|
||||
try:
|
||||
asyncio.run(uvicorn.Server(uvicorn.Config(**uvicorn_config)).serve())
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
logger.info("Received interrupt signal, shutting down gracefully...")
|
||||
|
||||
|
||||
def _log_run_config(run_config: StackRunConfig):
|
||||
"""Logs the run config with redacted fields and disabled providers removed."""
|
||||
logger.info("Run configuration:")
|
||||
|
|
@ -615,7 +501,3 @@ def remove_disabled_providers(obj):
|
|||
return [item for item in (remove_disabled_providers(i) for i in obj) if item is not None]
|
||||
else:
|
||||
return obj
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ if [[ "$env_type" == "venv" ]]; then
|
|||
yaml_config_arg=""
|
||||
fi
|
||||
|
||||
$PYTHON_BINARY -m llama_stack.core.server.server \
|
||||
llama stack run \
|
||||
$yaml_config_arg \
|
||||
--port "$port" \
|
||||
$env_vars \
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
|||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(__name__, "tokenizer_utils")
|
||||
logger = get_logger(__name__, "models")
|
||||
|
||||
|
||||
def load_bpe_file(model_path: Path) -> dict[bytes, int]:
|
||||
|
|
|
|||
|
|
@ -329,6 +329,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10,
|
||||
shields: list | None = None,
|
||||
) -> OpenAIResponseObject:
|
||||
return await self.openai_responses_impl.create_openai_response(
|
||||
input,
|
||||
|
|
@ -342,6 +343,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
tools,
|
||||
include,
|
||||
max_infer_iters,
|
||||
shields,
|
||||
)
|
||||
|
||||
async def list_openai_responses(
|
||||
|
|
|
|||
|
|
@ -208,10 +208,15 @@ class OpenAIResponsesImpl:
|
|||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10,
|
||||
shields: list | None = None,
|
||||
):
|
||||
stream = bool(stream)
|
||||
text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
|
||||
|
||||
# Shields parameter received via extra_body - not yet implemented
|
||||
if shields is not None:
|
||||
raise NotImplementedError("Shields parameter is not yet implemented in the meta-reference provider")
|
||||
|
||||
stream_gen = self._create_streaming_response(
|
||||
input=input,
|
||||
model=model,
|
||||
|
|
|
|||
|
|
@ -52,9 +52,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
adapter_type="cerebras",
|
||||
provider_type="remote::cerebras",
|
||||
pip_packages=[
|
||||
"cerebras_cloud_sdk",
|
||||
],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.remote.inference.cerebras",
|
||||
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
||||
description="Cerebras inference provider for running models on Cerebras Cloud platform.",
|
||||
|
|
@ -169,7 +167,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
adapter_type="openai",
|
||||
provider_type="remote::openai",
|
||||
pip_packages=["litellm"],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.remote.inference.openai",
|
||||
config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
|
||||
|
|
@ -179,7 +177,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
adapter_type="anthropic",
|
||||
provider_type="remote::anthropic",
|
||||
pip_packages=["litellm"],
|
||||
pip_packages=["anthropic"],
|
||||
module="llama_stack.providers.remote.inference.anthropic",
|
||||
config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
|
||||
|
|
@ -189,9 +187,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
adapter_type="gemini",
|
||||
provider_type="remote::gemini",
|
||||
pip_packages=[
|
||||
"litellm",
|
||||
],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.remote.inference.gemini",
|
||||
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
||||
|
|
@ -202,7 +198,6 @@ def available_providers() -> list[ProviderSpec]:
|
|||
adapter_type="vertexai",
|
||||
provider_type="remote::vertexai",
|
||||
pip_packages=[
|
||||
"litellm",
|
||||
"google-cloud-aiplatform",
|
||||
],
|
||||
module="llama_stack.providers.remote.inference.vertexai",
|
||||
|
|
@ -233,9 +228,7 @@ Available Models:
|
|||
api=Api.inference,
|
||||
adapter_type="groq",
|
||||
provider_type="remote::groq",
|
||||
pip_packages=[
|
||||
"litellm",
|
||||
],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.remote.inference.groq",
|
||||
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
||||
|
|
@ -245,7 +238,7 @@ Available Models:
|
|||
api=Api.inference,
|
||||
adapter_type="llama-openai-compat",
|
||||
provider_type="remote::llama-openai-compat",
|
||||
pip_packages=["litellm"],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.remote.inference.llama_openai_compat",
|
||||
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
|
||||
|
|
@ -255,9 +248,7 @@ Available Models:
|
|||
api=Api.inference,
|
||||
adapter_type="sambanova",
|
||||
provider_type="remote::sambanova",
|
||||
pip_packages=[
|
||||
"litellm",
|
||||
],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.remote.inference.sambanova",
|
||||
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
||||
|
|
@ -287,7 +278,7 @@ Available Models:
|
|||
api=Api.inference,
|
||||
provider_type="remote::azure",
|
||||
adapter_type="azure",
|
||||
pip_packages=["litellm"],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.remote.inference.azure",
|
||||
config_class="llama_stack.providers.remote.inference.azure.AzureConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.azure.config.AzureProviderDataValidator",
|
||||
|
|
|
|||
|
|
@ -10,6 +10,6 @@ from .config import AnthropicConfig
|
|||
async def get_adapter_impl(config: AnthropicConfig, _deps):
|
||||
from .anthropic import AnthropicInferenceAdapter
|
||||
|
||||
impl = AnthropicInferenceAdapter(config)
|
||||
impl = AnthropicInferenceAdapter(config=config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -4,13 +4,19 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||
from collections.abc import Iterable
|
||||
|
||||
from anthropic import AsyncAnthropic
|
||||
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
from .config import AnthropicConfig
|
||||
|
||||
|
||||
class AnthropicInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||
class AnthropicInferenceAdapter(OpenAIMixin):
|
||||
config: AnthropicConfig
|
||||
|
||||
provider_data_api_key_field: str = "anthropic_api_key"
|
||||
# source: https://docs.claude.com/en/docs/build-with-claude/embeddings
|
||||
# TODO: add support for voyageai, which is where these models are hosted
|
||||
# embedding_model_metadata = {
|
||||
|
|
@ -23,22 +29,11 @@ class AnthropicInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
|||
# "voyage-multimodal-3": {"embedding_dimension": 1024, "context_length": 32000},
|
||||
# }
|
||||
|
||||
def __init__(self, config: AnthropicConfig) -> None:
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
litellm_provider_name="anthropic",
|
||||
api_key_from_config=config.api_key,
|
||||
provider_data_api_key_field="anthropic_api_key",
|
||||
)
|
||||
self.config = config
|
||||
|
||||
async def initialize(self) -> None:
|
||||
await super().initialize()
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
await super().shutdown()
|
||||
|
||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||
def get_api_key(self) -> str:
|
||||
return self.config.api_key or ""
|
||||
|
||||
def get_base_url(self):
|
||||
return "https://api.anthropic.com/v1"
|
||||
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
return [m.id async for m in AsyncAnthropic(api_key=self.get_api_key()).models.list()]
|
||||
|
|
|
|||
|
|
@ -10,6 +10,6 @@ from .config import AzureConfig
|
|||
async def get_adapter_impl(config: AzureConfig, _deps):
|
||||
from .azure import AzureInferenceAdapter
|
||||
|
||||
impl = AzureInferenceAdapter(config)
|
||||
impl = AzureInferenceAdapter(config=config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -4,31 +4,20 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from llama_stack.apis.inference import ChatCompletionRequest
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
||||
LiteLLMOpenAIMixin,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
from .config import AzureConfig
|
||||
|
||||
|
||||
class AzureInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||
def __init__(self, config: AzureConfig) -> None:
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
litellm_provider_name="azure",
|
||||
api_key_from_config=config.api_key.get_secret_value(),
|
||||
provider_data_api_key_field="azure_api_key",
|
||||
openai_compat_api_base=str(config.api_base),
|
||||
)
|
||||
self.config = config
|
||||
class AzureInferenceAdapter(OpenAIMixin):
|
||||
config: AzureConfig
|
||||
|
||||
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||
provider_data_api_key_field: str = "azure_api_key"
|
||||
|
||||
def get_api_key(self) -> str:
|
||||
return self.config.api_key.get_secret_value()
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
"""
|
||||
|
|
@ -37,26 +26,3 @@ class AzureInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
|||
Returns the Azure API base URL from the configuration.
|
||||
"""
|
||||
return urljoin(str(self.config.api_base), "/openai/v1")
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
||||
# Get base parameters from parent
|
||||
params = await super()._get_params(request)
|
||||
|
||||
# Add Azure specific parameters
|
||||
provider_data = self.get_request_provider_data()
|
||||
if provider_data:
|
||||
if getattr(provider_data, "azure_api_key", None):
|
||||
params["api_key"] = provider_data.azure_api_key
|
||||
if getattr(provider_data, "azure_api_base", None):
|
||||
params["api_base"] = provider_data.azure_api_base
|
||||
if getattr(provider_data, "azure_api_version", None):
|
||||
params["api_version"] = provider_data.azure_api_version
|
||||
if getattr(provider_data, "azure_api_type", None):
|
||||
params["api_type"] = provider_data.azure_api_type
|
||||
else:
|
||||
params["api_key"] = self.config.api_key.get_secret_value()
|
||||
params["api_base"] = str(self.config.api_base)
|
||||
params["api_version"] = self.config.api_version
|
||||
params["api_type"] = self.config.api_type
|
||||
|
||||
return params
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ async def get_adapter_impl(config: CerebrasImplConfig, _deps):
|
|||
|
||||
assert isinstance(config, CerebrasImplConfig), f"Unexpected config type: {type(config)}"
|
||||
|
||||
impl = CerebrasInferenceAdapter(config)
|
||||
impl = CerebrasInferenceAdapter(config=config)
|
||||
|
||||
await impl.initialize()
|
||||
|
||||
|
|
|
|||
|
|
@ -6,39 +6,14 @@
|
|||
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from cerebras.cloud.sdk import AsyncCerebras
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
CompletionRequest,
|
||||
Inference,
|
||||
OpenAIEmbeddingsResponse,
|
||||
TopKSamplingStrategy,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
get_sampling_options,
|
||||
)
|
||||
from llama_stack.apis.inference import OpenAIEmbeddingsResponse
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
completion_request_to_prompt,
|
||||
)
|
||||
|
||||
from .config import CerebrasImplConfig
|
||||
|
||||
|
||||
class CerebrasInferenceAdapter(
|
||||
OpenAIMixin,
|
||||
Inference,
|
||||
):
|
||||
def __init__(self, config: CerebrasImplConfig) -> None:
|
||||
self.config = config
|
||||
|
||||
# TODO: make this use provider data, etc. like other providers
|
||||
self._cerebras_client = AsyncCerebras(
|
||||
base_url=self.config.base_url,
|
||||
api_key=self.config.api_key.get_secret_value(),
|
||||
)
|
||||
class CerebrasInferenceAdapter(OpenAIMixin):
|
||||
config: CerebrasImplConfig
|
||||
|
||||
def get_api_key(self) -> str:
|
||||
return self.config.api_key.get_secret_value()
|
||||
|
|
@ -46,31 +21,6 @@ class CerebrasInferenceAdapter(
|
|||
def get_base_url(self) -> str:
|
||||
return urljoin(self.config.base_url, "v1")
|
||||
|
||||
async def initialize(self) -> None:
|
||||
return
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
|
||||
if request.sampling_params and isinstance(request.sampling_params.strategy, TopKSamplingStrategy):
|
||||
raise ValueError("`top_k` not supported by Cerebras")
|
||||
|
||||
prompt = ""
|
||||
if isinstance(request, ChatCompletionRequest):
|
||||
prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model))
|
||||
elif isinstance(request, CompletionRequest):
|
||||
prompt = await completion_request_to_prompt(request)
|
||||
else:
|
||||
raise ValueError(f"Unknown request type {type(request)}")
|
||||
|
||||
return {
|
||||
"model": request.model,
|
||||
"prompt": prompt,
|
||||
"stream": request.stream,
|
||||
**get_sampling_options(request.sampling_params),
|
||||
}
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ class CerebrasImplConfig(RemoteInferenceProviderConfig):
|
|||
description="Base URL for the Cerebras API",
|
||||
)
|
||||
api_key: SecretStr = Field(
|
||||
default=SecretStr(os.environ.get("CEREBRAS_API_KEY")),
|
||||
default=SecretStr(os.environ.get("CEREBRAS_API_KEY")), # type: ignore[arg-type]
|
||||
description="Cerebras API Key",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,6 @@ async def get_adapter_impl(config: DatabricksImplConfig, _deps):
|
|||
from .databricks import DatabricksInferenceAdapter
|
||||
|
||||
assert isinstance(config, DatabricksImplConfig), f"Unexpected config type: {type(config)}"
|
||||
impl = DatabricksInferenceAdapter(config)
|
||||
impl = DatabricksInferenceAdapter(config=config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -14,12 +14,12 @@ from llama_stack.schema_utils import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class DatabricksImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
url: str | None = Field(
|
||||
default=None,
|
||||
description="The URL for the Databricks model serving endpoint",
|
||||
)
|
||||
api_token: SecretStr = Field(
|
||||
default=SecretStr(None),
|
||||
default=SecretStr(None), # type: ignore[arg-type]
|
||||
description="The Databricks API token",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,16 +4,12 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from collections.abc import Iterable
|
||||
from typing import Any
|
||||
|
||||
from databricks.sdk import WorkspaceClient
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
Inference,
|
||||
Model,
|
||||
OpenAICompletion,
|
||||
)
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.apis.inference import OpenAICompletion
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
|
|
@ -22,30 +18,31 @@ from .config import DatabricksImplConfig
|
|||
logger = get_logger(name=__name__, category="inference::databricks")
|
||||
|
||||
|
||||
class DatabricksInferenceAdapter(
|
||||
OpenAIMixin,
|
||||
Inference,
|
||||
):
|
||||
class DatabricksInferenceAdapter(OpenAIMixin):
|
||||
config: DatabricksImplConfig
|
||||
|
||||
# source: https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/supported-models
|
||||
embedding_model_metadata = {
|
||||
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||
"databricks-gte-large-en": {"embedding_dimension": 1024, "context_length": 8192},
|
||||
"databricks-bge-large-en": {"embedding_dimension": 1024, "context_length": 512},
|
||||
}
|
||||
|
||||
def __init__(self, config: DatabricksImplConfig) -> None:
|
||||
self.config = config
|
||||
|
||||
def get_api_key(self) -> str:
|
||||
return self.config.api_token.get_secret_value()
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return f"{self.config.url}/serving-endpoints"
|
||||
|
||||
async def initialize(self) -> None:
|
||||
return
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
return [
|
||||
endpoint.name
|
||||
for endpoint in WorkspaceClient(
|
||||
host=self.config.url, token=self.get_api_key()
|
||||
).serving_endpoints.list() # TODO: this is not async
|
||||
]
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
async def should_refresh_models(self) -> bool:
|
||||
return False
|
||||
|
||||
async def openai_completion(
|
||||
self,
|
||||
|
|
@ -71,32 +68,3 @@ class DatabricksInferenceAdapter(
|
|||
suffix: str | None = None,
|
||||
) -> OpenAICompletion:
|
||||
raise NotImplementedError()
|
||||
|
||||
async def list_models(self) -> list[Model] | None:
|
||||
self._model_cache = {} # from OpenAIMixin
|
||||
ws_client = WorkspaceClient(host=self.config.url, token=self.get_api_key()) # TODO: this is not async
|
||||
endpoints = ws_client.serving_endpoints.list()
|
||||
for endpoint in endpoints:
|
||||
model = Model(
|
||||
provider_id=self.__provider_id__,
|
||||
provider_resource_id=endpoint.name,
|
||||
identifier=endpoint.name,
|
||||
)
|
||||
if endpoint.task == "llm/v1/chat":
|
||||
model.model_type = ModelType.llm # this is redundant, but informative
|
||||
elif endpoint.task == "llm/v1/embeddings":
|
||||
if endpoint.name not in self.embedding_model_metadata:
|
||||
logger.warning(f"No metadata information available for embedding model {endpoint.name}, skipping.")
|
||||
continue
|
||||
model.model_type = ModelType.embedding
|
||||
model.metadata = self.embedding_model_metadata[endpoint.name]
|
||||
else:
|
||||
logger.warning(f"Unknown model type, skipping: {endpoint}")
|
||||
continue
|
||||
|
||||
self._model_cache[endpoint.name] = model
|
||||
|
||||
return list(self._model_cache.values())
|
||||
|
||||
async def should_refresh_models(self) -> bool:
|
||||
return False
|
||||
|
|
|
|||
|
|
@ -17,6 +17,6 @@ async def get_adapter_impl(config: FireworksImplConfig, _deps):
|
|||
from .fireworks import FireworksInferenceAdapter
|
||||
|
||||
assert isinstance(config, FireworksImplConfig), f"Unexpected config type: {type(config)}"
|
||||
impl = FireworksInferenceAdapter(config)
|
||||
impl = FireworksInferenceAdapter(config=config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -5,124 +5,26 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from fireworks.client import Fireworks
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
ResponseFormat,
|
||||
ResponseFormatType,
|
||||
SamplingParams,
|
||||
)
|
||||
from llama_stack.core.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
convert_message_to_openai_dict,
|
||||
get_sampling_options,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
from .config import FireworksImplConfig
|
||||
|
||||
logger = get_logger(name=__name__, category="inference::fireworks")
|
||||
|
||||
|
||||
class FireworksInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData):
|
||||
embedding_model_metadata = {
|
||||
class FireworksInferenceAdapter(OpenAIMixin):
|
||||
config: FireworksImplConfig
|
||||
|
||||
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||
"nomic-ai/nomic-embed-text-v1.5": {"embedding_dimension": 768, "context_length": 8192},
|
||||
"accounts/fireworks/models/qwen3-embedding-8b": {"embedding_dimension": 4096, "context_length": 40960},
|
||||
}
|
||||
|
||||
def __init__(self, config: FireworksImplConfig) -> None:
|
||||
ModelRegistryHelper.__init__(self)
|
||||
self.config = config
|
||||
self.allowed_models = config.allowed_models
|
||||
|
||||
async def initialize(self) -> None:
|
||||
pass
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
provider_data_api_key_field: str = "fireworks_api_key"
|
||||
|
||||
def get_api_key(self) -> str:
|
||||
config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None
|
||||
if config_api_key:
|
||||
return config_api_key
|
||||
else:
|
||||
provider_data = self.get_request_provider_data()
|
||||
if provider_data is None or not provider_data.fireworks_api_key:
|
||||
raise ValueError(
|
||||
'Pass Fireworks API Key in the header X-LlamaStack-Provider-Data as { "fireworks_api_key": <your api key>}'
|
||||
)
|
||||
return provider_data.fireworks_api_key
|
||||
return self.config.api_key.get_secret_value() if self.config.api_key else None # type: ignore[return-value]
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return "https://api.fireworks.ai/inference/v1"
|
||||
|
||||
def _get_client(self) -> Fireworks:
|
||||
fireworks_api_key = self.get_api_key()
|
||||
return Fireworks(api_key=fireworks_api_key)
|
||||
|
||||
def _build_options(
|
||||
self,
|
||||
sampling_params: SamplingParams | None,
|
||||
fmt: ResponseFormat | None,
|
||||
logprobs: LogProbConfig | None,
|
||||
) -> dict:
|
||||
options = get_sampling_options(sampling_params)
|
||||
options.setdefault("max_tokens", 512)
|
||||
|
||||
if fmt:
|
||||
if fmt.type == ResponseFormatType.json_schema.value:
|
||||
options["response_format"] = {
|
||||
"type": "json_object",
|
||||
"schema": fmt.json_schema,
|
||||
}
|
||||
elif fmt.type == ResponseFormatType.grammar.value:
|
||||
options["response_format"] = {
|
||||
"type": "grammar",
|
||||
"grammar": fmt.bnf,
|
||||
}
|
||||
else:
|
||||
raise ValueError(f"Unknown response format {fmt.type}")
|
||||
|
||||
if logprobs and logprobs.top_k:
|
||||
options["logprobs"] = logprobs.top_k
|
||||
if options["logprobs"] <= 0 or options["logprobs"] >= 5:
|
||||
raise ValueError("Required range: 0 < top_k < 5")
|
||||
|
||||
return options
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||
input_dict = {}
|
||||
media_present = request_has_media(request)
|
||||
|
||||
llama_model = self.get_llama_model(request.model)
|
||||
# TODO: tools are never added to the request, so we need to add them here
|
||||
if media_present or not llama_model:
|
||||
input_dict["messages"] = [await convert_message_to_openai_dict(m, download=True) for m in request.messages]
|
||||
else:
|
||||
input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model)
|
||||
|
||||
# Fireworks always prepends with BOS
|
||||
if "prompt" in input_dict:
|
||||
if input_dict["prompt"].startswith("<|begin_of_text|>"):
|
||||
input_dict["prompt"] = input_dict["prompt"][len("<|begin_of_text|>") :]
|
||||
|
||||
params = {
|
||||
"model": request.model,
|
||||
**input_dict,
|
||||
"stream": bool(request.stream),
|
||||
**self._build_options(request.sampling_params, request.response_format, request.logprobs),
|
||||
}
|
||||
logger.debug(f"params to fireworks: {params}")
|
||||
|
||||
return params
|
||||
|
|
|
|||
|
|
@ -10,6 +10,6 @@ from .config import GeminiConfig
|
|||
async def get_adapter_impl(config: GeminiConfig, _deps):
|
||||
from .gemini import GeminiInferenceAdapter
|
||||
|
||||
impl = GeminiInferenceAdapter(config)
|
||||
impl = GeminiInferenceAdapter(config=config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -4,33 +4,21 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
from .config import GeminiConfig
|
||||
|
||||
|
||||
class GeminiInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||
embedding_model_metadata = {
|
||||
class GeminiInferenceAdapter(OpenAIMixin):
|
||||
config: GeminiConfig
|
||||
|
||||
provider_data_api_key_field: str = "gemini_api_key"
|
||||
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||
"text-embedding-004": {"embedding_dimension": 768, "context_length": 2048},
|
||||
}
|
||||
|
||||
def __init__(self, config: GeminiConfig) -> None:
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
litellm_provider_name="gemini",
|
||||
api_key_from_config=config.api_key,
|
||||
provider_data_api_key_field="gemini_api_key",
|
||||
)
|
||||
self.config = config
|
||||
|
||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||
def get_api_key(self) -> str:
|
||||
return self.config.api_key or ""
|
||||
|
||||
def get_base_url(self):
|
||||
return "https://generativelanguage.googleapis.com/v1beta/openai/"
|
||||
|
||||
async def initialize(self) -> None:
|
||||
await super().initialize()
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
await super().shutdown()
|
||||
|
|
|
|||
|
|
@ -11,5 +11,5 @@ async def get_adapter_impl(config: GroqConfig, _deps):
|
|||
# import dynamically so the import is used only when it is needed
|
||||
from .groq import GroqInferenceAdapter
|
||||
|
||||
adapter = GroqInferenceAdapter(config)
|
||||
adapter = GroqInferenceAdapter(config=config)
|
||||
return adapter
|
||||
|
|
|
|||
|
|
@ -6,30 +6,16 @@
|
|||
|
||||
|
||||
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
|
||||
class GroqInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||
_config: GroqConfig
|
||||
class GroqInferenceAdapter(OpenAIMixin):
|
||||
config: GroqConfig
|
||||
|
||||
def __init__(self, config: GroqConfig):
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
litellm_provider_name="groq",
|
||||
api_key_from_config=config.api_key,
|
||||
provider_data_api_key_field="groq_api_key",
|
||||
)
|
||||
self.config = config
|
||||
provider_data_api_key_field: str = "groq_api_key"
|
||||
|
||||
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||
def get_api_key(self) -> str:
|
||||
return self.config.api_key or ""
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return f"{self.config.url}/openai/v1"
|
||||
|
||||
async def initialize(self):
|
||||
await super().initialize()
|
||||
|
||||
async def shutdown(self):
|
||||
await super().shutdown()
|
||||
|
|
|
|||
|
|
@ -4,14 +4,12 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.inference import InferenceProvider
|
||||
|
||||
from .config import LlamaCompatConfig
|
||||
|
||||
|
||||
async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> InferenceProvider:
|
||||
async def get_adapter_impl(config: LlamaCompatConfig, _deps):
|
||||
# import dynamically so the import is used only when it is needed
|
||||
from .llama import LlamaCompatInferenceAdapter
|
||||
|
||||
adapter = LlamaCompatInferenceAdapter(config)
|
||||
adapter = LlamaCompatInferenceAdapter(config=config)
|
||||
return adapter
|
||||
|
|
|
|||
|
|
@ -3,40 +3,26 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import Any
|
||||
|
||||
from llama_stack.apis.inference.inference import OpenAICompletion, OpenAIEmbeddingsResponse
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
logger = get_logger(name=__name__, category="inference::llama_openai_compat")
|
||||
|
||||
|
||||
class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||
class LlamaCompatInferenceAdapter(OpenAIMixin):
|
||||
config: LlamaCompatConfig
|
||||
|
||||
provider_data_api_key_field: str = "llama_api_key"
|
||||
"""
|
||||
Llama API Inference Adapter for Llama Stack.
|
||||
|
||||
Note: The inheritance order is important here. OpenAIMixin must come before
|
||||
LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
|
||||
is used instead of ModelRegistryHelper.check_model_availability().
|
||||
|
||||
- OpenAIMixin.check_model_availability() queries the Llama API to check if a model exists
|
||||
- ModelRegistryHelper.check_model_availability() (inherited by LiteLLMOpenAIMixin) just returns False and shows a warning
|
||||
"""
|
||||
|
||||
_config: LlamaCompatConfig
|
||||
|
||||
def __init__(self, config: LlamaCompatConfig):
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
litellm_provider_name="meta_llama",
|
||||
api_key_from_config=config.api_key,
|
||||
provider_data_api_key_field="llama_api_key",
|
||||
openai_compat_api_base=config.openai_compat_api_base,
|
||||
)
|
||||
self.config = config
|
||||
|
||||
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||
def get_api_key(self) -> str:
|
||||
return self.config.api_key or ""
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
"""
|
||||
|
|
@ -46,8 +32,37 @@ class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
|||
"""
|
||||
return self.config.openai_compat_api_base
|
||||
|
||||
async def initialize(self):
|
||||
await super().initialize()
|
||||
async def openai_completion(
|
||||
self,
|
||||
model: str,
|
||||
prompt: str | list[str] | list[int] | list[list[int]],
|
||||
best_of: int | None = None,
|
||||
echo: bool | None = None,
|
||||
frequency_penalty: float | None = None,
|
||||
logit_bias: dict[str, float] | None = None,
|
||||
logprobs: bool | None = None,
|
||||
max_tokens: int | None = None,
|
||||
n: int | None = None,
|
||||
presence_penalty: float | None = None,
|
||||
seed: int | None = None,
|
||||
stop: str | list[str] | None = None,
|
||||
stream: bool | None = None,
|
||||
stream_options: dict[str, Any] | None = None,
|
||||
temperature: float | None = None,
|
||||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
guided_choice: list[str] | None = None,
|
||||
prompt_logprobs: int | None = None,
|
||||
suffix: str | None = None,
|
||||
) -> OpenAICompletion:
|
||||
raise NotImplementedError()
|
||||
|
||||
async def shutdown(self):
|
||||
await super().shutdown()
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
input: str | list[str],
|
||||
encoding_format: str | None = "float",
|
||||
dimensions: int | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIEmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
|
|
|
|||
|
|
@ -15,7 +15,8 @@ async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference:
|
|||
|
||||
if not isinstance(config, NVIDIAConfig):
|
||||
raise RuntimeError(f"Unexpected config type: {type(config)}")
|
||||
adapter = NVIDIAInferenceAdapter(config)
|
||||
adapter = NVIDIAInferenceAdapter(config=config)
|
||||
await adapter.initialize()
|
||||
return adapter
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@
|
|||
from openai import NOT_GIVEN
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
Inference,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
|
|
@ -22,7 +21,9 @@ from .utils import _is_nvidia_hosted
|
|||
logger = get_logger(name=__name__, category="inference::nvidia")
|
||||
|
||||
|
||||
class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
||||
class NVIDIAInferenceAdapter(OpenAIMixin):
|
||||
config: NVIDIAConfig
|
||||
|
||||
"""
|
||||
NVIDIA Inference Adapter for Llama Stack.
|
||||
|
||||
|
|
@ -37,32 +38,21 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
|||
"""
|
||||
|
||||
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
||||
embedding_model_metadata = {
|
||||
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||
"nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192},
|
||||
"nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024},
|
||||
"nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096},
|
||||
"snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024},
|
||||
}
|
||||
|
||||
def __init__(self, config: NVIDIAConfig) -> None:
|
||||
logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")
|
||||
async def initialize(self) -> None:
|
||||
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
|
||||
|
||||
if _is_nvidia_hosted(config):
|
||||
if not config.api_key:
|
||||
if _is_nvidia_hosted(self.config):
|
||||
if not self.config.api_key:
|
||||
raise RuntimeError(
|
||||
"API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM."
|
||||
)
|
||||
# elif self._config.api_key:
|
||||
#
|
||||
# we don't raise this warning because a user may have deployed their
|
||||
# self-hosted NIM with an API key requirement.
|
||||
#
|
||||
# warnings.warn(
|
||||
# "API key is not required for self-hosted NVIDIA NIM. "
|
||||
# "Consider removing the api_key from the configuration."
|
||||
# )
|
||||
|
||||
self._config = config
|
||||
|
||||
def get_api_key(self) -> str:
|
||||
"""
|
||||
|
|
@ -70,7 +60,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
|||
|
||||
:return: The NVIDIA API key
|
||||
"""
|
||||
return self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY"
|
||||
return self.config.api_key.get_secret_value() if self.config.api_key else "NO KEY"
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
"""
|
||||
|
|
@ -78,7 +68,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
|
|||
|
||||
:return: The NVIDIA API base URL
|
||||
"""
|
||||
return f"{self._config.url}/v1" if self._config.append_api_version else self._config.url
|
||||
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -10,6 +10,6 @@ from .config import OllamaImplConfig
|
|||
async def get_adapter_impl(config: OllamaImplConfig, _deps):
|
||||
from .ollama import OllamaInferenceAdapter
|
||||
|
||||
impl = OllamaInferenceAdapter(config)
|
||||
impl = OllamaInferenceAdapter(config=config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -6,58 +6,29 @@
|
|||
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from ollama import AsyncClient as AsyncOllamaClient
|
||||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
ImageContentItem,
|
||||
TextContentItem,
|
||||
)
|
||||
from llama_stack.apis.common.errors import UnsupportedModelError
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
GrammarResponseFormat,
|
||||
InferenceProvider,
|
||||
JsonSchemaResponseFormat,
|
||||
Message,
|
||||
)
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.sku_types import CoreModelId
|
||||
from llama_stack.providers.datatypes import (
|
||||
HealthResponse,
|
||||
HealthStatus,
|
||||
ModelsProtocolPrivate,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
build_hf_repo_model_entry,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
get_sampling_options,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
convert_image_content_to_url,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="inference::ollama")
|
||||
|
||||
|
||||
class OllamaInferenceAdapter(
|
||||
OpenAIMixin,
|
||||
ModelRegistryHelper,
|
||||
InferenceProvider,
|
||||
ModelsProtocolPrivate,
|
||||
):
|
||||
class OllamaInferenceAdapter(OpenAIMixin):
|
||||
config: OllamaImplConfig
|
||||
|
||||
# automatically set by the resolver when instantiating the provider
|
||||
__provider_id__: str
|
||||
|
||||
embedding_model_metadata = {
|
||||
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||
"all-minilm:l6-v2": {
|
||||
"embedding_dimension": 384,
|
||||
"context_length": 512,
|
||||
|
|
@ -76,29 +47,8 @@ class OllamaInferenceAdapter(
|
|||
},
|
||||
}
|
||||
|
||||
def __init__(self, config: OllamaImplConfig) -> None:
|
||||
# TODO: remove ModelRegistryHelper.__init__ when completion and
|
||||
# chat_completion are. this exists to satisfy the input /
|
||||
# output processing for llama models. specifically,
|
||||
# tool_calling is handled by raw template processing,
|
||||
# instead of using the /api/chat endpoint w/ tools=...
|
||||
ModelRegistryHelper.__init__(
|
||||
self,
|
||||
model_entries=[
|
||||
build_hf_repo_model_entry(
|
||||
"llama3.2:3b-instruct-fp16",
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
),
|
||||
build_hf_repo_model_entry(
|
||||
"llama-guard3:1b",
|
||||
CoreModelId.llama_guard_3_1b.value,
|
||||
),
|
||||
],
|
||||
)
|
||||
self.config = config
|
||||
# Ollama does not support image urls, so we need to download the image and convert it to base64
|
||||
self.download_images = True
|
||||
self._clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {}
|
||||
download_images: bool = True
|
||||
_clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {}
|
||||
|
||||
@property
|
||||
def ollama_client(self) -> AsyncOllamaClient:
|
||||
|
|
@ -142,50 +92,6 @@ class OllamaInferenceAdapter(
|
|||
async def shutdown(self) -> None:
|
||||
self._clients.clear()
|
||||
|
||||
async def _get_model(self, model_id: str) -> Model:
|
||||
if not self.model_store:
|
||||
raise ValueError("Model store not set")
|
||||
return await self.model_store.get_model(model_id)
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||
sampling_options = get_sampling_options(request.sampling_params)
|
||||
# This is needed since the Ollama API expects num_predict to be set
|
||||
# for early truncation instead of max_tokens.
|
||||
if sampling_options.get("max_tokens") is not None:
|
||||
sampling_options["num_predict"] = sampling_options["max_tokens"]
|
||||
|
||||
input_dict: dict[str, Any] = {}
|
||||
media_present = request_has_media(request)
|
||||
llama_model = self.get_llama_model(request.model)
|
||||
if media_present or not llama_model:
|
||||
contents = [await convert_message_to_openai_dict_for_ollama(m) for m in request.messages]
|
||||
# flatten the list of lists
|
||||
input_dict["messages"] = [item for sublist in contents for item in sublist]
|
||||
else:
|
||||
input_dict["raw"] = True
|
||||
input_dict["prompt"] = await chat_completion_request_to_prompt(
|
||||
request,
|
||||
llama_model,
|
||||
)
|
||||
|
||||
if fmt := request.response_format:
|
||||
if isinstance(fmt, JsonSchemaResponseFormat):
|
||||
input_dict["format"] = fmt.json_schema
|
||||
elif isinstance(fmt, GrammarResponseFormat):
|
||||
raise NotImplementedError("Grammar response format is not supported")
|
||||
else:
|
||||
raise ValueError(f"Unknown response format type: {fmt.type}")
|
||||
|
||||
params = {
|
||||
"model": request.model,
|
||||
**input_dict,
|
||||
"options": sampling_options,
|
||||
"stream": request.stream,
|
||||
}
|
||||
logger.debug(f"params to ollama: {params}")
|
||||
|
||||
return params
|
||||
|
||||
async def register_model(self, model: Model) -> Model:
|
||||
if await self.check_model_availability(model.provider_model_id):
|
||||
return model
|
||||
|
|
@ -197,24 +103,3 @@ class OllamaInferenceAdapter(
|
|||
return model
|
||||
|
||||
raise UnsupportedModelError(model.provider_model_id, list(self._model_cache.keys()))
|
||||
|
||||
|
||||
async def convert_message_to_openai_dict_for_ollama(message: Message) -> list[dict]:
|
||||
async def _convert_content(content) -> dict:
|
||||
if isinstance(content, ImageContentItem):
|
||||
return {
|
||||
"role": message.role,
|
||||
"images": [await convert_image_content_to_url(content, download=True, include_format=False)],
|
||||
}
|
||||
else:
|
||||
text = content.text if isinstance(content, TextContentItem) else content
|
||||
assert isinstance(text, str)
|
||||
return {
|
||||
"role": message.role,
|
||||
"content": text,
|
||||
}
|
||||
|
||||
if isinstance(message.content, list):
|
||||
return [await _convert_content(c) for c in message.content]
|
||||
else:
|
||||
return [await _convert_content(message.content)]
|
||||
|
|
|
|||
|
|
@ -10,6 +10,6 @@ from .config import OpenAIConfig
|
|||
async def get_adapter_impl(config: OpenAIConfig, _deps):
|
||||
from .openai import OpenAIInferenceAdapter
|
||||
|
||||
impl = OpenAIInferenceAdapter(config)
|
||||
impl = OpenAIInferenceAdapter(config=config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
from .config import OpenAIConfig
|
||||
|
|
@ -14,52 +13,24 @@ logger = get_logger(name=__name__, category="inference::openai")
|
|||
|
||||
|
||||
#
|
||||
# This OpenAI adapter implements Inference methods using two mixins -
|
||||
# This OpenAI adapter implements Inference methods using OpenAIMixin
|
||||
#
|
||||
# | Inference Method | Implementation Source |
|
||||
# |----------------------------|--------------------------|
|
||||
# | completion | LiteLLMOpenAIMixin |
|
||||
# | chat_completion | LiteLLMOpenAIMixin |
|
||||
# | embedding | LiteLLMOpenAIMixin |
|
||||
# | openai_completion | OpenAIMixin |
|
||||
# | openai_chat_completion | OpenAIMixin |
|
||||
# | openai_embeddings | OpenAIMixin |
|
||||
#
|
||||
class OpenAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||
class OpenAIInferenceAdapter(OpenAIMixin):
|
||||
"""
|
||||
OpenAI Inference Adapter for Llama Stack.
|
||||
|
||||
Note: The inheritance order is important here. OpenAIMixin must come before
|
||||
LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
|
||||
is used instead of ModelRegistryHelper.check_model_availability().
|
||||
|
||||
- OpenAIMixin.check_model_availability() queries the OpenAI API to check if a model exists
|
||||
- ModelRegistryHelper.check_model_availability() (inherited by LiteLLMOpenAIMixin) just returns False and shows a warning
|
||||
"""
|
||||
|
||||
embedding_model_metadata = {
|
||||
config: OpenAIConfig
|
||||
|
||||
provider_data_api_key_field: str = "openai_api_key"
|
||||
|
||||
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||
"text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192},
|
||||
"text-embedding-3-large": {"embedding_dimension": 3072, "context_length": 8192},
|
||||
}
|
||||
|
||||
def __init__(self, config: OpenAIConfig) -> None:
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
litellm_provider_name="openai",
|
||||
api_key_from_config=config.api_key,
|
||||
provider_data_api_key_field="openai_api_key",
|
||||
)
|
||||
self.config = config
|
||||
# we set is_openai_compat so users can use the canonical
|
||||
# openai model names like "gpt-4" or "gpt-3.5-turbo"
|
||||
# and the model name will be translated to litellm's
|
||||
# "openai/gpt-4" or "openai/gpt-3.5-turbo" transparently.
|
||||
# if we do not set this, users will be exposed to the
|
||||
# litellm specific model names, an abstraction leak.
|
||||
self.is_openai_compat = True
|
||||
|
||||
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||
def get_api_key(self) -> str:
|
||||
return self.config.api_key or ""
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
"""
|
||||
|
|
@ -68,9 +39,3 @@ class OpenAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
|||
Returns the OpenAI API base URL from the configuration.
|
||||
"""
|
||||
return self.config.base_url
|
||||
|
||||
async def initialize(self) -> None:
|
||||
await super().initialize()
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
await super().shutdown()
|
||||
|
|
|
|||
|
|
@ -31,12 +31,6 @@ class PassthroughInferenceAdapter(Inference):
|
|||
ModelRegistryHelper.__init__(self)
|
||||
self.config = config
|
||||
|
||||
async def initialize(self) -> None:
|
||||
pass
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
async def unregister_model(self, model_id: str) -> None:
|
||||
pass
|
||||
|
||||
|
|
|
|||
|
|
@ -53,12 +53,6 @@ class RunpodInferenceAdapter(
|
|||
ModelRegistryHelper.__init__(self, stack_to_provider_models_map=RUNPOD_SUPPORTED_MODELS)
|
||||
self.config = config
|
||||
|
||||
async def initialize(self) -> None:
|
||||
return
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||
return {
|
||||
"model": self.map_to_provider_model(request.model),
|
||||
|
|
|
|||
|
|
@ -11,6 +11,6 @@ async def get_adapter_impl(config: SambaNovaImplConfig, _deps):
|
|||
from .sambanova import SambaNovaInferenceAdapter
|
||||
|
||||
assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}"
|
||||
impl = SambaNovaInferenceAdapter(config)
|
||||
impl = SambaNovaInferenceAdapter(config=config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -5,39 +5,22 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
from .config import SambaNovaImplConfig
|
||||
|
||||
|
||||
class SambaNovaInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||
class SambaNovaInferenceAdapter(OpenAIMixin):
|
||||
config: SambaNovaImplConfig
|
||||
|
||||
provider_data_api_key_field: str = "sambanova_api_key"
|
||||
download_images: bool = True # SambaNova does not support image downloads server-size, perform them on the client
|
||||
"""
|
||||
SambaNova Inference Adapter for Llama Stack.
|
||||
|
||||
Note: The inheritance order is important here. OpenAIMixin must come before
|
||||
LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
|
||||
is used instead of LiteLLMOpenAIMixin.check_model_availability().
|
||||
|
||||
- OpenAIMixin.check_model_availability() queries the /v1/models to check if a model exists
|
||||
- LiteLLMOpenAIMixin.check_model_availability() checks the static registry within LiteLLM
|
||||
"""
|
||||
|
||||
def __init__(self, config: SambaNovaImplConfig):
|
||||
self.config = config
|
||||
self.environment_available_models: list[str] = []
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
litellm_provider_name="sambanova",
|
||||
api_key_from_config=self.config.api_key.get_secret_value() if self.config.api_key else None,
|
||||
provider_data_api_key_field="sambanova_api_key",
|
||||
openai_compat_api_base=self.config.url,
|
||||
download_images=True, # SambaNova requires base64 image encoding
|
||||
json_schema_strict=False, # SambaNova doesn't support strict=True yet
|
||||
)
|
||||
|
||||
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||
def get_api_key(self) -> str:
|
||||
return self.config.api_key.get_secret_value() if self.config.api_key else ""
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -5,53 +5,21 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from collections.abc import Iterable
|
||||
|
||||
from huggingface_hub import AsyncInferenceClient, HfApi
|
||||
from pydantic import SecretStr
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
Inference,
|
||||
OpenAIEmbeddingsResponse,
|
||||
ResponseFormat,
|
||||
ResponseFormatType,
|
||||
SamplingParams,
|
||||
)
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.apis.inference import OpenAIEmbeddingsResponse
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.sku_list import all_registered_models
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
build_hf_repo_model_entry,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
get_sampling_options,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_model_input_info,
|
||||
)
|
||||
|
||||
from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
|
||||
|
||||
log = get_logger(name=__name__, category="inference::tgi")
|
||||
|
||||
|
||||
def build_hf_repo_model_entries():
|
||||
return [
|
||||
build_hf_repo_model_entry(
|
||||
model.huggingface_repo,
|
||||
model.descriptor(),
|
||||
)
|
||||
for model in all_registered_models()
|
||||
if model.huggingface_repo
|
||||
]
|
||||
|
||||
|
||||
class _HfAdapter(
|
||||
OpenAIMixin,
|
||||
Inference,
|
||||
):
|
||||
class _HfAdapter(OpenAIMixin):
|
||||
url: str
|
||||
api_key: SecretStr
|
||||
|
||||
|
|
@ -61,90 +29,14 @@ class _HfAdapter(
|
|||
|
||||
overwrite_completion_id = True # TGI always returns id=""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
|
||||
self.huggingface_repo_to_llama_model_id = {
|
||||
model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo
|
||||
}
|
||||
|
||||
def get_api_key(self):
|
||||
return self.api_key.get_secret_value()
|
||||
|
||||
def get_base_url(self):
|
||||
return self.url
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
async def list_models(self) -> list[Model] | None:
|
||||
models = []
|
||||
async for model in self.client.models.list():
|
||||
models.append(
|
||||
Model(
|
||||
identifier=model.id,
|
||||
provider_resource_id=model.id,
|
||||
provider_id=self.__provider_id__,
|
||||
metadata={},
|
||||
model_type=ModelType.llm,
|
||||
)
|
||||
)
|
||||
return models
|
||||
|
||||
async def register_model(self, model: Model) -> Model:
|
||||
if model.provider_resource_id != self.model_id:
|
||||
raise ValueError(
|
||||
f"Model {model.provider_resource_id} does not match the model {self.model_id} served by TGI."
|
||||
)
|
||||
return model
|
||||
|
||||
async def unregister_model(self, model_id: str) -> None:
|
||||
pass
|
||||
|
||||
def _get_max_new_tokens(self, sampling_params, input_tokens):
|
||||
return min(
|
||||
sampling_params.max_tokens or (self.max_tokens - input_tokens),
|
||||
self.max_tokens - input_tokens - 1,
|
||||
)
|
||||
|
||||
def _build_options(
|
||||
self,
|
||||
sampling_params: SamplingParams | None = None,
|
||||
fmt: ResponseFormat = None,
|
||||
):
|
||||
options = get_sampling_options(sampling_params)
|
||||
# TGI does not support temperature=0 when using greedy sampling
|
||||
# We set it to 1e-3 instead, anything lower outputs garbage from TGI
|
||||
# We can use top_p sampling strategy to specify lower temperature
|
||||
if abs(options["temperature"]) < 1e-10:
|
||||
options["temperature"] = 1e-3
|
||||
|
||||
# delete key "max_tokens" from options since its not supported by the API
|
||||
options.pop("max_tokens", None)
|
||||
if fmt:
|
||||
if fmt.type == ResponseFormatType.json_schema.value:
|
||||
options["grammar"] = {
|
||||
"type": "json",
|
||||
"value": fmt.json_schema,
|
||||
}
|
||||
elif fmt.type == ResponseFormatType.grammar.value:
|
||||
raise ValueError("Grammar response format not supported yet")
|
||||
else:
|
||||
raise ValueError(f"Unexpected response format: {fmt.type}")
|
||||
|
||||
return options
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||
prompt, input_tokens = await chat_completion_request_to_model_input_info(
|
||||
request, self.register_helper.get_llama_model(request.model)
|
||||
)
|
||||
return dict(
|
||||
prompt=prompt,
|
||||
stream=request.stream,
|
||||
details=True,
|
||||
max_new_tokens=self._get_max_new_tokens(request.sampling_params, input_tokens),
|
||||
stop_sequences=["<|eom_id|>", "<|eot_id|>"],
|
||||
**self._build_options(request.sampling_params, request.response_format),
|
||||
)
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
return [self.model_id]
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -17,6 +17,6 @@ async def get_adapter_impl(config: TogetherImplConfig, _deps):
|
|||
from .together import TogetherInferenceAdapter
|
||||
|
||||
assert isinstance(config, TogetherImplConfig), f"Unexpected config type: {type(config)}"
|
||||
impl = TogetherInferenceAdapter(config)
|
||||
impl = TogetherInferenceAdapter(config=config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -5,41 +5,29 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
from collections.abc import Iterable
|
||||
|
||||
from together import AsyncTogether
|
||||
from together.constants import BASE_URL
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
Inference,
|
||||
LogProbConfig,
|
||||
OpenAIEmbeddingsResponse,
|
||||
ResponseFormat,
|
||||
ResponseFormatType,
|
||||
SamplingParams,
|
||||
)
|
||||
from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.core.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
convert_message_to_openai_dict,
|
||||
get_sampling_options,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
from .config import TogetherImplConfig
|
||||
|
||||
logger = get_logger(name=__name__, category="inference::together")
|
||||
|
||||
|
||||
class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData):
|
||||
embedding_model_metadata = {
|
||||
class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
|
||||
config: TogetherImplConfig
|
||||
|
||||
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||
"togethercomputer/m2-bert-80M-32k-retrieval": {"embedding_dimension": 768, "context_length": 32768},
|
||||
"BAAI/bge-large-en-v1.5": {"embedding_dimension": 1024, "context_length": 512},
|
||||
"BAAI/bge-base-en-v1.5": {"embedding_dimension": 768, "context_length": 512},
|
||||
|
|
@ -47,24 +35,16 @@ class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData)
|
|||
"intfloat/multilingual-e5-large-instruct": {"embedding_dimension": 1024, "context_length": 512},
|
||||
}
|
||||
|
||||
def __init__(self, config: TogetherImplConfig) -> None:
|
||||
ModelRegistryHelper.__init__(self)
|
||||
self.config = config
|
||||
self.allowed_models = config.allowed_models
|
||||
self._model_cache: dict[str, Model] = {}
|
||||
_model_cache: dict[str, Model] = {}
|
||||
|
||||
provider_data_api_key_field: str = "together_api_key"
|
||||
|
||||
def get_api_key(self):
|
||||
return self.config.api_key.get_secret_value()
|
||||
return self.config.api_key.get_secret_value() if self.config.api_key else None
|
||||
|
||||
def get_base_url(self):
|
||||
return BASE_URL
|
||||
|
||||
async def initialize(self) -> None:
|
||||
pass
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
def _get_client(self) -> AsyncTogether:
|
||||
together_api_key = None
|
||||
config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None
|
||||
|
|
@ -79,90 +59,13 @@ class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData)
|
|||
together_api_key = provider_data.together_api_key
|
||||
return AsyncTogether(api_key=together_api_key)
|
||||
|
||||
def _get_openai_client(self) -> AsyncOpenAI:
|
||||
together_client = self._get_client().client
|
||||
return AsyncOpenAI(
|
||||
base_url=together_client.base_url,
|
||||
api_key=together_client.api_key,
|
||||
)
|
||||
|
||||
def _build_options(
|
||||
self,
|
||||
sampling_params: SamplingParams | None,
|
||||
logprobs: LogProbConfig | None,
|
||||
fmt: ResponseFormat,
|
||||
) -> dict:
|
||||
options = get_sampling_options(sampling_params)
|
||||
if fmt:
|
||||
if fmt.type == ResponseFormatType.json_schema.value:
|
||||
options["response_format"] = {
|
||||
"type": "json_object",
|
||||
"schema": fmt.json_schema,
|
||||
}
|
||||
elif fmt.type == ResponseFormatType.grammar.value:
|
||||
raise NotImplementedError("Grammar response format not supported yet")
|
||||
else:
|
||||
raise ValueError(f"Unknown response format {fmt.type}")
|
||||
|
||||
if logprobs and logprobs.top_k:
|
||||
if logprobs.top_k != 1:
|
||||
raise ValueError(
|
||||
f"Unsupported value: Together only supports logprobs top_k=1. {logprobs.top_k} was provided",
|
||||
)
|
||||
options["logprobs"] = 1
|
||||
|
||||
return options
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||
input_dict = {}
|
||||
media_present = request_has_media(request)
|
||||
llama_model = self.get_llama_model(request.model)
|
||||
if media_present or not llama_model:
|
||||
input_dict["messages"] = [await convert_message_to_openai_dict(m) for m in request.messages]
|
||||
else:
|
||||
input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model)
|
||||
|
||||
params = {
|
||||
"model": request.model,
|
||||
**input_dict,
|
||||
"stream": request.stream,
|
||||
**self._build_options(request.sampling_params, request.logprobs, request.response_format),
|
||||
}
|
||||
logger.debug(f"params to together: {params}")
|
||||
return params
|
||||
|
||||
async def list_models(self) -> list[Model] | None:
|
||||
self._model_cache = {}
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
# Together's /v1/models is not compatible with OpenAI's /v1/models. Together support ticket #13355 -> will not fix, use Together's own client
|
||||
for m in await self._get_client().models.list():
|
||||
if m.type == "embedding":
|
||||
if m.id not in self.embedding_model_metadata:
|
||||
logger.warning(f"Unknown embedding dimension for model {m.id}, skipping.")
|
||||
continue
|
||||
metadata = self.embedding_model_metadata[m.id]
|
||||
self._model_cache[m.id] = Model(
|
||||
provider_id=self.__provider_id__,
|
||||
provider_resource_id=m.id,
|
||||
identifier=m.id,
|
||||
model_type=ModelType.embedding,
|
||||
metadata=metadata,
|
||||
)
|
||||
else:
|
||||
self._model_cache[m.id] = Model(
|
||||
provider_id=self.__provider_id__,
|
||||
provider_resource_id=m.id,
|
||||
identifier=m.id,
|
||||
model_type=ModelType.llm,
|
||||
)
|
||||
|
||||
return self._model_cache.values()
|
||||
return [m.id for m in await self._get_client().models.list()]
|
||||
|
||||
async def should_refresh_models(self) -> bool:
|
||||
return True
|
||||
|
||||
async def check_model_availability(self, model):
|
||||
return model in self._model_cache
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
|
|
@ -203,4 +106,4 @@ class TogetherInferenceAdapter(OpenAIMixin, Inference, NeedsRequestProviderData)
|
|||
)
|
||||
response.usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
|
||||
|
||||
return response
|
||||
return response # type: ignore[no-any-return]
|
||||
|
|
|
|||
|
|
@ -10,6 +10,6 @@ from .config import VertexAIConfig
|
|||
async def get_adapter_impl(config: VertexAIConfig, _deps):
|
||||
from .vertexai import VertexAIInferenceAdapter
|
||||
|
||||
impl = VertexAIInferenceAdapter(config)
|
||||
impl = VertexAIInferenceAdapter(config=config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -4,29 +4,19 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
|
||||
import google.auth.transport.requests
|
||||
from google.auth import default
|
||||
|
||||
from llama_stack.apis.inference import ChatCompletionRequest
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
||||
LiteLLMOpenAIMixin,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
from .config import VertexAIConfig
|
||||
|
||||
|
||||
class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||
def __init__(self, config: VertexAIConfig) -> None:
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
litellm_provider_name="vertex_ai",
|
||||
api_key_from_config=None, # Vertex AI uses ADC, not API keys
|
||||
provider_data_api_key_field="vertex_project", # Use project for validation
|
||||
)
|
||||
self.config = config
|
||||
class VertexAIInferenceAdapter(OpenAIMixin):
|
||||
config: VertexAIConfig
|
||||
|
||||
provider_data_api_key_field: str = "vertex_project"
|
||||
|
||||
def get_api_key(self) -> str:
|
||||
"""
|
||||
|
|
@ -41,8 +31,7 @@ class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
|||
credentials.refresh(google.auth.transport.requests.Request())
|
||||
return str(credentials.token)
|
||||
except Exception:
|
||||
# If we can't get credentials, return empty string to let LiteLLM handle it
|
||||
# This allows the LiteLLM mixin to work with ADC directly
|
||||
# If we can't get credentials, return empty string to let the env work with ADC directly
|
||||
return ""
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
|
|
@ -53,23 +42,3 @@ class VertexAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
|||
Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
|
||||
"""
|
||||
return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
||||
# Get base parameters from parent
|
||||
params = await super()._get_params(request)
|
||||
|
||||
# Add Vertex AI specific parameters
|
||||
provider_data = self.get_request_provider_data()
|
||||
if provider_data:
|
||||
if getattr(provider_data, "vertex_project", None):
|
||||
params["vertex_project"] = provider_data.vertex_project
|
||||
if getattr(provider_data, "vertex_location", None):
|
||||
params["vertex_location"] = provider_data.vertex_location
|
||||
else:
|
||||
params["vertex_project"] = self.config.project
|
||||
params["vertex_location"] = self.config.location
|
||||
|
||||
# Remove api_key since Vertex AI uses ADC
|
||||
params.pop("api_key", None)
|
||||
|
||||
return params
|
||||
|
|
|
|||
|
|
@ -17,6 +17,6 @@ async def get_adapter_impl(config: VLLMInferenceAdapterConfig, _deps):
|
|||
from .vllm import VLLMInferenceAdapter
|
||||
|
||||
assert isinstance(config, VLLMInferenceAdapterConfig), f"Unexpected config type: {type(config)}"
|
||||
impl = VLLMInferenceAdapter(config)
|
||||
impl = VLLMInferenceAdapter(config=config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -3,56 +3,26 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import json
|
||||
from collections.abc import AsyncGenerator, AsyncIterator
|
||||
from collections.abc import AsyncIterator
|
||||
from typing import Any
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import httpx
|
||||
from openai import APIConnectionError
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
ChatCompletionChunk as OpenAIChatCompletionChunk,
|
||||
)
|
||||
from pydantic import ConfigDict
|
||||
|
||||
from llama_stack.apis.common.content_types import (
|
||||
TextDelta,
|
||||
ToolCallDelta,
|
||||
ToolCallParseStatus,
|
||||
)
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponseEvent,
|
||||
ChatCompletionResponseEventType,
|
||||
ChatCompletionResponseStreamChunk,
|
||||
GrammarResponseFormat,
|
||||
Inference,
|
||||
JsonSchemaResponseFormat,
|
||||
ModelStore,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
ToolChoice,
|
||||
ToolDefinition,
|
||||
)
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
|
||||
from llama_stack.models.llama.sku_list import all_registered_models
|
||||
from llama_stack.providers.datatypes import (
|
||||
HealthResponse,
|
||||
HealthStatus,
|
||||
ModelsProtocolPrivate,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
build_hf_repo_model_entry,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
UnparseableToolCall,
|
||||
convert_message_to_openai_dict,
|
||||
convert_tool_call,
|
||||
get_sampling_options,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
|
|
@ -61,210 +31,15 @@ from .config import VLLMInferenceAdapterConfig
|
|||
log = get_logger(name=__name__, category="inference::vllm")
|
||||
|
||||
|
||||
def build_hf_repo_model_entries():
|
||||
return [
|
||||
build_hf_repo_model_entry(
|
||||
model.huggingface_repo,
|
||||
model.descriptor(),
|
||||
)
|
||||
for model in all_registered_models()
|
||||
if model.huggingface_repo
|
||||
]
|
||||
class VLLMInferenceAdapter(OpenAIMixin):
|
||||
config: VLLMInferenceAdapterConfig
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
def _convert_to_vllm_tool_calls_in_response(
|
||||
tool_calls,
|
||||
) -> list[ToolCall]:
|
||||
if not tool_calls:
|
||||
return []
|
||||
provider_data_api_key_field: str = "vllm_api_token"
|
||||
|
||||
return [
|
||||
ToolCall(
|
||||
call_id=call.id,
|
||||
tool_name=call.function.name,
|
||||
arguments=call.function.arguments,
|
||||
)
|
||||
for call in tool_calls
|
||||
]
|
||||
|
||||
|
||||
def _convert_to_vllm_tools_in_request(tools: list[ToolDefinition]) -> list[dict]:
|
||||
compat_tools = []
|
||||
|
||||
for tool in tools:
|
||||
# The tool.tool_name can be a str or a BuiltinTool enum. If
|
||||
# it's the latter, convert to a string.
|
||||
tool_name = tool.tool_name
|
||||
if isinstance(tool_name, BuiltinTool):
|
||||
tool_name = tool_name.value
|
||||
|
||||
compat_tool = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tool_name,
|
||||
"description": tool.description,
|
||||
"parameters": tool.input_schema
|
||||
or {
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"required": [],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
compat_tools.append(compat_tool)
|
||||
|
||||
return compat_tools
|
||||
|
||||
|
||||
def _convert_to_vllm_finish_reason(finish_reason: str) -> StopReason:
|
||||
return {
|
||||
"stop": StopReason.end_of_turn,
|
||||
"length": StopReason.out_of_tokens,
|
||||
"tool_calls": StopReason.end_of_message,
|
||||
}.get(finish_reason, StopReason.end_of_turn)
|
||||
|
||||
|
||||
def _process_vllm_chat_completion_end_of_stream(
|
||||
finish_reason: str | None,
|
||||
last_chunk_content: str | None,
|
||||
current_event_type: ChatCompletionResponseEventType,
|
||||
tool_call_bufs: dict[str, UnparseableToolCall] | None = None,
|
||||
) -> list[OpenAIChatCompletionChunk]:
|
||||
chunks = []
|
||||
|
||||
if finish_reason is not None:
|
||||
stop_reason = _convert_to_vllm_finish_reason(finish_reason)
|
||||
else:
|
||||
stop_reason = StopReason.end_of_message
|
||||
|
||||
tool_call_bufs = tool_call_bufs or {}
|
||||
for _index, tool_call_buf in sorted(tool_call_bufs.items()):
|
||||
args_str = tool_call_buf.arguments or "{}"
|
||||
try:
|
||||
chunks.append(
|
||||
ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=current_event_type,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=ToolCall(
|
||||
call_id=tool_call_buf.call_id,
|
||||
tool_name=tool_call_buf.tool_name,
|
||||
arguments=args_str,
|
||||
),
|
||||
parse_status=ToolCallParseStatus.succeeded,
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
log.warning(f"Failed to parse tool call buffer arguments: {args_str} \nError: {e}")
|
||||
|
||||
chunks.append(
|
||||
ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.progress,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=str(tool_call_buf),
|
||||
parse_status=ToolCallParseStatus.failed,
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
chunks.append(
|
||||
ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.complete,
|
||||
delta=TextDelta(text=last_chunk_content or ""),
|
||||
logprobs=None,
|
||||
stop_reason=stop_reason,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
async def _process_vllm_chat_completion_stream_response(
|
||||
stream: AsyncGenerator[OpenAIChatCompletionChunk, None],
|
||||
) -> AsyncGenerator:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=ChatCompletionResponseEventType.start,
|
||||
delta=TextDelta(text=""),
|
||||
)
|
||||
)
|
||||
event_type = ChatCompletionResponseEventType.progress
|
||||
tool_call_bufs: dict[str, UnparseableToolCall] = {}
|
||||
end_of_stream_processed = False
|
||||
|
||||
async for chunk in stream:
|
||||
if not chunk.choices:
|
||||
log.warning("vLLM failed to generation any completions - check the vLLM server logs for an error.")
|
||||
return
|
||||
choice = chunk.choices[0]
|
||||
if choice.delta.tool_calls:
|
||||
for delta_tool_call in choice.delta.tool_calls:
|
||||
tool_call = convert_tool_call(delta_tool_call)
|
||||
if delta_tool_call.index not in tool_call_bufs:
|
||||
tool_call_bufs[delta_tool_call.index] = UnparseableToolCall()
|
||||
tool_call_buf = tool_call_bufs[delta_tool_call.index]
|
||||
tool_call_buf.tool_name += str(tool_call.tool_name)
|
||||
tool_call_buf.call_id += tool_call.call_id
|
||||
tool_call_buf.arguments += (
|
||||
tool_call.arguments if isinstance(tool_call.arguments, str) else json.dumps(tool_call.arguments)
|
||||
)
|
||||
if choice.finish_reason:
|
||||
chunks = _process_vllm_chat_completion_end_of_stream(
|
||||
finish_reason=choice.finish_reason,
|
||||
last_chunk_content=choice.delta.content,
|
||||
current_event_type=event_type,
|
||||
tool_call_bufs=tool_call_bufs,
|
||||
)
|
||||
for c in chunks:
|
||||
yield c
|
||||
end_of_stream_processed = True
|
||||
elif not choice.delta.tool_calls:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=event_type,
|
||||
delta=TextDelta(text=choice.delta.content or ""),
|
||||
logprobs=None,
|
||||
)
|
||||
)
|
||||
event_type = ChatCompletionResponseEventType.progress
|
||||
|
||||
if end_of_stream_processed:
|
||||
return
|
||||
|
||||
# the stream ended without a chunk containing finish_reason - we have to generate the
|
||||
# respective completion chunks manually
|
||||
chunks = _process_vllm_chat_completion_end_of_stream(
|
||||
finish_reason=None, last_chunk_content=None, current_event_type=event_type, tool_call_bufs=tool_call_bufs
|
||||
)
|
||||
for c in chunks:
|
||||
yield c
|
||||
|
||||
|
||||
class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsProtocolPrivate):
|
||||
# automatically set by the resolver when instantiating the provider
|
||||
__provider_id__: str
|
||||
model_store: ModelStore | None = None
|
||||
|
||||
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
model_entries=build_hf_repo_model_entries(),
|
||||
litellm_provider_name="vllm",
|
||||
api_key_from_config=config.api_token,
|
||||
provider_data_api_key_field="vllm_api_token",
|
||||
openai_compat_api_base=config.url,
|
||||
)
|
||||
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
|
||||
self.config = config
|
||||
|
||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||
def get_api_key(self) -> str:
|
||||
return self.config.api_token or ""
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
"""Get the base URL from config."""
|
||||
|
|
@ -282,27 +57,6 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro
|
|||
# Strictly respecting the refresh_models directive
|
||||
return self.config.refresh_models
|
||||
|
||||
async def list_models(self) -> list[Model] | None:
|
||||
models = []
|
||||
async for m in self.client.models.list():
|
||||
model_type = ModelType.llm # unclear how to determine embedding vs. llm models
|
||||
models.append(
|
||||
Model(
|
||||
identifier=m.id,
|
||||
provider_resource_id=m.id,
|
||||
provider_id=self.__provider_id__,
|
||||
metadata={},
|
||||
model_type=model_type,
|
||||
)
|
||||
)
|
||||
return models
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
async def unregister_model(self, model_id: str) -> None:
|
||||
pass
|
||||
|
||||
async def health(self) -> HealthResponse:
|
||||
"""
|
||||
Performs a health check by verifying connectivity to the remote vLLM server.
|
||||
|
|
@ -324,63 +78,9 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro
|
|||
except Exception as e:
|
||||
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
||||
|
||||
async def _get_model(self, model_id: str) -> Model:
|
||||
if not self.model_store:
|
||||
raise ValueError("Model store not set")
|
||||
return await self.model_store.get_model(model_id)
|
||||
|
||||
def get_extra_client_params(self):
|
||||
return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
|
||||
|
||||
async def register_model(self, model: Model) -> Model:
|
||||
try:
|
||||
model = await self.register_helper.register_model(model)
|
||||
except ValueError:
|
||||
pass # Ignore statically unknown model, will check live listing
|
||||
try:
|
||||
res = self.client.models.list()
|
||||
except APIConnectionError as e:
|
||||
raise ValueError(
|
||||
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
|
||||
) from e
|
||||
available_models = [m.id async for m in res]
|
||||
if model.provider_resource_id not in available_models:
|
||||
raise ValueError(
|
||||
f"Model {model.provider_resource_id} is not being served by vLLM. "
|
||||
f"Available models: {', '.join(available_models)}"
|
||||
)
|
||||
return model
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||
options = get_sampling_options(request.sampling_params)
|
||||
if "max_tokens" not in options:
|
||||
options["max_tokens"] = self.config.max_tokens
|
||||
|
||||
input_dict: dict[str, Any] = {}
|
||||
# Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM.
|
||||
if isinstance(request, ChatCompletionRequest) and request.tools:
|
||||
input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}
|
||||
|
||||
input_dict["messages"] = [await convert_message_to_openai_dict(m, download=True) for m in request.messages]
|
||||
|
||||
if fmt := request.response_format:
|
||||
if isinstance(fmt, JsonSchemaResponseFormat):
|
||||
input_dict["extra_body"] = {"guided_json": fmt.json_schema}
|
||||
elif isinstance(fmt, GrammarResponseFormat):
|
||||
raise NotImplementedError("Grammar response format not supported yet")
|
||||
else:
|
||||
raise ValueError(f"Unknown response format {fmt.type}")
|
||||
|
||||
if request.logprobs and request.logprobs.top_k:
|
||||
input_dict["logprobs"] = request.logprobs.top_k
|
||||
|
||||
return {
|
||||
"model": request.model,
|
||||
**input_dict,
|
||||
"stream": request.stream,
|
||||
**options,
|
||||
}
|
||||
|
||||
async def openai_chat_completion(
|
||||
self,
|
||||
model: str,
|
||||
|
|
|
|||
|
|
@ -7,10 +7,11 @@
|
|||
import base64
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import AsyncIterator
|
||||
from collections.abc import AsyncIterator, Iterable
|
||||
from typing import Any
|
||||
|
||||
from openai import NOT_GIVEN, AsyncOpenAI
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
Model,
|
||||
|
|
@ -26,14 +27,14 @@ from llama_stack.apis.inference import (
|
|||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.core.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
|
||||
|
||||
logger = get_logger(name=__name__, category="providers::utils")
|
||||
|
||||
|
||||
class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
||||
class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
||||
"""
|
||||
Mixin class that provides OpenAI-specific functionality for inference providers.
|
||||
This class handles direct OpenAI API calls using the AsyncOpenAI client.
|
||||
|
|
@ -42,12 +43,25 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
|||
- get_api_key(): Method to retrieve the API key
|
||||
- get_base_url(): Method to retrieve the OpenAI-compatible API base URL
|
||||
|
||||
The behavior of this class can be customized by child classes in the following ways:
|
||||
- overwrite_completion_id: If True, overwrites the 'id' field in OpenAI responses
|
||||
- download_images: If True, downloads images and converts to base64 for providers that require it
|
||||
- embedding_model_metadata: A dictionary mapping model IDs to their embedding metadata
|
||||
- provider_data_api_key_field: Optional field name in provider data to look for API key
|
||||
- list_provider_model_ids: Method to list available models from the provider
|
||||
- get_extra_client_params: Method to provide extra parameters to the AsyncOpenAI client
|
||||
|
||||
Expected Dependencies:
|
||||
- self.model_store: Injected by the Llama Stack distribution system at runtime.
|
||||
This provides model registry functionality for looking up registered models.
|
||||
The model_store is set in routing_tables/common.py during provider initialization.
|
||||
"""
|
||||
|
||||
# Allow extra fields so the routing infra can inject model_store, __provider_id__, etc.
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
config: RemoteInferenceProviderConfig
|
||||
|
||||
# Allow subclasses to control whether to overwrite the 'id' field in OpenAI responses
|
||||
# is overwritten with a client-side generated id.
|
||||
#
|
||||
|
|
@ -73,9 +87,6 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
|||
# Optional field name in provider data to look for API key, which takes precedence
|
||||
provider_data_api_key_field: str | None = None
|
||||
|
||||
# automatically set by the resolver when instantiating the provider
|
||||
__provider_id__: str
|
||||
|
||||
@abstractmethod
|
||||
def get_api_key(self) -> str:
|
||||
"""
|
||||
|
|
@ -111,6 +122,38 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
|||
"""
|
||||
return {}
|
||||
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
"""
|
||||
List available models from the provider.
|
||||
|
||||
Child classes can override this method to provide a custom implementation
|
||||
for listing models. The default implementation uses the AsyncOpenAI client
|
||||
to list models from the OpenAI-compatible endpoint.
|
||||
|
||||
:return: An iterable of model IDs or None if not implemented
|
||||
"""
|
||||
return [m.id async for m in self.client.models.list()]
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""
|
||||
Initialize the OpenAI mixin.
|
||||
|
||||
This method provides a default implementation that does nothing.
|
||||
Subclasses can override this method to perform initialization tasks
|
||||
such as setting up clients, validating configurations, etc.
|
||||
"""
|
||||
pass
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
"""
|
||||
Shutdown the OpenAI mixin.
|
||||
|
||||
This method provides a default implementation that does nothing.
|
||||
Subclasses can override this method to perform cleanup tasks
|
||||
such as closing connections, releasing resources, etc.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
def client(self) -> AsyncOpenAI:
|
||||
"""
|
||||
|
|
@ -371,7 +414,7 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
|||
|
||||
async def register_model(self, model: Model) -> Model:
|
||||
if not await self.check_model_availability(model.provider_model_id):
|
||||
raise ValueError(f"Model {model.provider_model_id} is not available from provider {self.__provider_id__}")
|
||||
raise ValueError(f"Model {model.provider_model_id} is not available from provider {self.__provider_id__}") # type: ignore[attr-defined]
|
||||
return model
|
||||
|
||||
async def unregister_model(self, model_id: str) -> None:
|
||||
|
|
@ -387,28 +430,42 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
|||
"""
|
||||
self._model_cache = {}
|
||||
|
||||
async for m in self.client.models.list():
|
||||
if self.allowed_models and m.id not in self.allowed_models:
|
||||
logger.info(f"Skipping model {m.id} as it is not in the allowed models list")
|
||||
try:
|
||||
iterable = await self.list_provider_model_ids()
|
||||
except Exception as e:
|
||||
logger.error(f"{self.__class__.__name__}.list_provider_model_ids() failed with: {e}")
|
||||
raise
|
||||
if not hasattr(iterable, "__iter__"):
|
||||
raise TypeError(
|
||||
f"Failed to list models: {self.__class__.__name__}.list_provider_model_ids() must return an iterable of "
|
||||
f"strings, but returned {type(iterable).__name__}"
|
||||
)
|
||||
|
||||
provider_models_ids = list(iterable)
|
||||
logger.info(f"{self.__class__.__name__}.list_provider_model_ids() returned {len(provider_models_ids)} models")
|
||||
|
||||
for provider_model_id in provider_models_ids:
|
||||
if not isinstance(provider_model_id, str):
|
||||
raise ValueError(f"Model ID {provider_model_id} from list_provider_model_ids() is not a string")
|
||||
if self.allowed_models and provider_model_id not in self.allowed_models:
|
||||
logger.info(f"Skipping model {provider_model_id} as it is not in the allowed models list")
|
||||
continue
|
||||
if metadata := self.embedding_model_metadata.get(m.id):
|
||||
# This is an embedding model - augment with metadata
|
||||
if metadata := self.embedding_model_metadata.get(provider_model_id):
|
||||
model = Model(
|
||||
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
||||
provider_resource_id=m.id,
|
||||
identifier=m.id,
|
||||
provider_resource_id=provider_model_id,
|
||||
identifier=provider_model_id,
|
||||
model_type=ModelType.embedding,
|
||||
metadata=metadata,
|
||||
)
|
||||
else:
|
||||
# This is an LLM
|
||||
model = Model(
|
||||
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
||||
provider_resource_id=m.id,
|
||||
identifier=m.id,
|
||||
provider_resource_id=provider_model_id,
|
||||
identifier=provider_model_id,
|
||||
model_type=ModelType.llm,
|
||||
)
|
||||
self._model_cache[m.id] = model
|
||||
self._model_cache[provider_model_id] = model
|
||||
|
||||
return list(self._model_cache.values())
|
||||
|
||||
|
|
@ -425,3 +482,29 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
|
|||
|
||||
async def should_refresh_models(self) -> bool:
|
||||
return False
|
||||
|
||||
#
|
||||
# The model_dump implementations are to avoid serializing the extra fields,
|
||||
# e.g. model_store, which are not pydantic.
|
||||
#
|
||||
|
||||
def _filter_fields(self, **kwargs):
|
||||
"""Helper to exclude extra fields from serialization."""
|
||||
# Exclude any extra fields stored in __pydantic_extra__
|
||||
if hasattr(self, "__pydantic_extra__") and self.__pydantic_extra__:
|
||||
exclude = kwargs.get("exclude", set())
|
||||
if not isinstance(exclude, set):
|
||||
exclude = set(exclude) if exclude else set()
|
||||
exclude.update(self.__pydantic_extra__.keys())
|
||||
kwargs["exclude"] = exclude
|
||||
return kwargs
|
||||
|
||||
def model_dump(self, **kwargs):
|
||||
"""Override to exclude extra fields from serialization."""
|
||||
kwargs = self._filter_fields(**kwargs)
|
||||
return super().model_dump(**kwargs)
|
||||
|
||||
def model_dump_json(self, **kwargs):
|
||||
"""Override to exclude extra fields from JSON serialization."""
|
||||
kwargs = self._filter_fields(**kwargs)
|
||||
return super().model_dump_json(**kwargs)
|
||||
|
|
|
|||
|
|
@ -11,6 +11,43 @@ from typing import Any, TypeVar
|
|||
from .strong_typing.schema import json_schema_type, register_schema # noqa: F401
|
||||
|
||||
|
||||
class ExtraBodyField[T]:
|
||||
"""
|
||||
Marker annotation for parameters that arrive via extra_body in the client SDK.
|
||||
|
||||
These parameters:
|
||||
- Will NOT appear in the generated client SDK method signature
|
||||
- WILL be documented in OpenAPI spec under x-llama-stack-extra-body-params
|
||||
- MUST be passed via the extra_body parameter in client SDK calls
|
||||
- WILL be available in server-side method signature with proper typing
|
||||
|
||||
Example:
|
||||
```python
|
||||
async def create_openai_response(
|
||||
self,
|
||||
input: str,
|
||||
model: str,
|
||||
shields: Annotated[
|
||||
list[str] | None, ExtraBodyField("List of shields to apply")
|
||||
] = None,
|
||||
) -> ResponseObject:
|
||||
# shields is available here with proper typing
|
||||
if shields:
|
||||
print(f"Using shields: {shields}")
|
||||
```
|
||||
|
||||
Client usage:
|
||||
```python
|
||||
client.responses.create(
|
||||
input="hello", model="llama-3", extra_body={"shields": ["shield-1"]}
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, description: str | None = None):
|
||||
self.description = description
|
||||
|
||||
|
||||
@dataclass
|
||||
class WebMethod:
|
||||
level: str | None = None
|
||||
|
|
@ -26,7 +63,7 @@ class WebMethod:
|
|||
deprecated: bool | None = False
|
||||
|
||||
|
||||
T = TypeVar("T", bound=Callable[..., Any])
|
||||
CallableT = TypeVar("CallableT", bound=Callable[..., Any])
|
||||
|
||||
|
||||
def webmethod(
|
||||
|
|
@ -40,7 +77,7 @@ def webmethod(
|
|||
descriptive_name: str | None = None,
|
||||
required_scope: str | None = None,
|
||||
deprecated: bool | None = False,
|
||||
) -> Callable[[T], T]:
|
||||
) -> Callable[[CallableT], CallableT]:
|
||||
"""
|
||||
Decorator that supplies additional metadata to an endpoint operation function.
|
||||
|
||||
|
|
@ -51,7 +88,7 @@ def webmethod(
|
|||
:param required_scope: Required scope for this endpoint (e.g., 'monitoring.viewer').
|
||||
"""
|
||||
|
||||
def wrap(func: T) -> T:
|
||||
def wrap(func: CallableT) -> CallableT:
|
||||
webmethod_obj = WebMethod(
|
||||
route=route,
|
||||
method=method,
|
||||
|
|
|
|||
|
|
@ -22,10 +22,18 @@ from llama_stack.log import get_logger
|
|||
logger = get_logger(__name__, category="testing")
|
||||
|
||||
# Global state for the recording system
|
||||
# Note: Using module globals instead of ContextVars because the session-scoped
|
||||
# client initialization happens in one async context, but tests run in different
|
||||
# contexts, and we need the mode/storage to persist across all contexts.
|
||||
_current_mode: str | None = None
|
||||
_current_storage: ResponseStorage | None = None
|
||||
_original_methods: dict[str, Any] = {}
|
||||
|
||||
# Test context uses ContextVar since it changes per-test and needs async isolation
|
||||
from contextvars import ContextVar
|
||||
|
||||
_test_context: ContextVar[str | None] = ContextVar("_test_context", default=None)
|
||||
|
||||
from openai.types.completion_choice import CompletionChoice
|
||||
|
||||
# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
|
||||
|
|
@ -33,22 +41,38 @@ CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "len
|
|||
CompletionChoice.model_rebuild()
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent.parent
|
||||
DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/recordings"
|
||||
DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/common"
|
||||
|
||||
|
||||
class InferenceMode(StrEnum):
|
||||
LIVE = "live"
|
||||
RECORD = "record"
|
||||
REPLAY = "replay"
|
||||
RECORD_IF_MISSING = "record-if-missing"
|
||||
|
||||
|
||||
def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict[str, Any]) -> str:
|
||||
"""Create a normalized hash of the request for consistent matching."""
|
||||
"""Create a normalized hash of the request for consistent matching.
|
||||
|
||||
Includes test_id from context to ensure test isolation - identical requests
|
||||
from different tests will have different hashes.
|
||||
|
||||
Exception: Model list endpoints (/v1/models, /api/tags) exclude test_id since
|
||||
they are infrastructure/shared and need to work across session setup and tests.
|
||||
"""
|
||||
# Extract just the endpoint path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed = urlparse(url)
|
||||
normalized = {"method": method.upper(), "endpoint": parsed.path, "body": body}
|
||||
normalized: dict[str, Any] = {
|
||||
"method": method.upper(),
|
||||
"endpoint": parsed.path,
|
||||
"body": body,
|
||||
}
|
||||
|
||||
# Include test_id for isolation, except for shared infrastructure endpoints
|
||||
if parsed.path not in ("/api/tags", "/v1/models"):
|
||||
normalized["test_id"] = _test_context.get()
|
||||
|
||||
# Create hash - sort_keys=True ensures deterministic ordering
|
||||
normalized_json = json.dumps(normalized, sort_keys=True)
|
||||
|
|
@ -67,7 +91,11 @@ def setup_inference_recording():
|
|||
Currently, this is only supported for OpenAI and Ollama clients. These should cover the vast majority of use cases.
|
||||
|
||||
Two environment variables are supported:
|
||||
- LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', or 'replay'. Default is 'replay'.
|
||||
- LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', 'replay', or 'record-if-missing'. Default is 'replay'.
|
||||
- 'live': Make all requests live without recording
|
||||
- 'record': Record all requests (overwrites existing recordings)
|
||||
- 'replay': Use only recorded responses (fails if recording not found)
|
||||
- 'record-if-missing': Use recorded responses when available, record new ones when not found
|
||||
- LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in. Default is 'tests/integration/recordings'.
|
||||
|
||||
The recordings are stored as JSON files.
|
||||
|
|
@ -80,9 +108,43 @@ def setup_inference_recording():
|
|||
return inference_recording(mode=mode, storage_dir=storage_dir)
|
||||
|
||||
|
||||
def _serialize_response(response: Any) -> Any:
|
||||
def _normalize_response_data(data: dict[str, Any], request_hash: str) -> dict[str, Any]:
|
||||
"""Normalize fields that change between recordings but don't affect functionality.
|
||||
|
||||
This reduces noise in git diffs by making IDs deterministic and timestamps constant.
|
||||
"""
|
||||
# Only normalize ID for completion/chat responses, not for model objects
|
||||
# Model objects have "object": "model" and the ID is the actual model identifier
|
||||
if "id" in data and data.get("object") != "model":
|
||||
data["id"] = f"rec-{request_hash[:12]}"
|
||||
|
||||
# Normalize timestamp to epoch (0) (for OpenAI-style responses)
|
||||
# But not for model objects where created timestamp might be meaningful
|
||||
if "created" in data and data.get("object") != "model":
|
||||
data["created"] = 0
|
||||
|
||||
# Normalize Ollama-specific timestamp fields
|
||||
if "created_at" in data:
|
||||
data["created_at"] = "1970-01-01T00:00:00.000000Z"
|
||||
|
||||
# Normalize Ollama-specific duration fields (these vary based on system load)
|
||||
if "total_duration" in data and data["total_duration"] is not None:
|
||||
data["total_duration"] = 0
|
||||
if "load_duration" in data and data["load_duration"] is not None:
|
||||
data["load_duration"] = 0
|
||||
if "prompt_eval_duration" in data and data["prompt_eval_duration"] is not None:
|
||||
data["prompt_eval_duration"] = 0
|
||||
if "eval_duration" in data and data["eval_duration"] is not None:
|
||||
data["eval_duration"] = 0
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _serialize_response(response: Any, request_hash: str = "") -> Any:
|
||||
if hasattr(response, "model_dump"):
|
||||
data = response.model_dump(mode="json")
|
||||
# Normalize fields to reduce noise
|
||||
data = _normalize_response_data(data, request_hash)
|
||||
return {
|
||||
"__type__": f"{response.__class__.__module__}.{response.__class__.__qualname__}",
|
||||
"__data__": data,
|
||||
|
|
@ -120,61 +182,121 @@ def _deserialize_response(data: dict[str, Any]) -> Any:
|
|||
class ResponseStorage:
|
||||
"""Handles SQLite index + JSON file storage/retrieval for inference recordings."""
|
||||
|
||||
def __init__(self, test_dir: Path):
|
||||
self.test_dir = test_dir
|
||||
self.responses_dir = self.test_dir / "responses"
|
||||
def __init__(self, base_dir: Path):
|
||||
self.base_dir = base_dir
|
||||
# Don't create responses_dir here - determine it per-test at runtime
|
||||
|
||||
self._ensure_directories()
|
||||
def _get_test_dir(self) -> Path:
|
||||
"""Get the recordings directory in the test file's parent directory.
|
||||
|
||||
For test at "tests/integration/inference/test_foo.py::test_bar",
|
||||
returns "tests/integration/inference/recordings/".
|
||||
"""
|
||||
test_id = _test_context.get()
|
||||
if test_id:
|
||||
# Extract the directory path from the test nodeid
|
||||
# e.g., "tests/integration/inference/test_basic.py::test_foo[params]"
|
||||
# -> get "tests/integration/inference"
|
||||
test_file = test_id.split("::")[0] # Remove test function part
|
||||
test_dir = Path(test_file).parent # Get parent directory
|
||||
|
||||
# Put recordings in a "recordings" subdirectory of the test's parent dir
|
||||
# e.g., "tests/integration/inference" -> "tests/integration/inference/recordings"
|
||||
return test_dir / "recordings"
|
||||
else:
|
||||
# Fallback for non-test contexts
|
||||
return self.base_dir / "recordings"
|
||||
|
||||
def _ensure_directories(self):
|
||||
self.test_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.responses_dir.mkdir(exist_ok=True)
|
||||
"""Ensure test-specific directories exist."""
|
||||
test_dir = self._get_test_dir()
|
||||
test_dir.mkdir(parents=True, exist_ok=True)
|
||||
return test_dir
|
||||
|
||||
def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
|
||||
"""Store a request/response pair."""
|
||||
# Generate unique response filename
|
||||
short_hash = request_hash[:12]
|
||||
response_file = f"{short_hash}.json"
|
||||
responses_dir = self._ensure_directories()
|
||||
|
||||
# Use FULL hash (not truncated)
|
||||
response_file = f"{request_hash}.json"
|
||||
|
||||
# Serialize response body if needed
|
||||
serialized_response = dict(response)
|
||||
if "body" in serialized_response:
|
||||
if isinstance(serialized_response["body"], list):
|
||||
# Handle streaming responses (list of chunks)
|
||||
serialized_response["body"] = [_serialize_response(chunk) for chunk in serialized_response["body"]]
|
||||
serialized_response["body"] = [
|
||||
_serialize_response(chunk, request_hash) for chunk in serialized_response["body"]
|
||||
]
|
||||
else:
|
||||
# Handle single response
|
||||
serialized_response["body"] = _serialize_response(serialized_response["body"])
|
||||
serialized_response["body"] = _serialize_response(serialized_response["body"], request_hash)
|
||||
|
||||
# If this is an Ollama /api/tags recording, include models digest in filename to distinguish variants
|
||||
# For model-list endpoints, include digest in filename to distinguish different model sets
|
||||
endpoint = request.get("endpoint")
|
||||
if endpoint in ("/api/tags", "/v1/models"):
|
||||
digest = _model_identifiers_digest(endpoint, response)
|
||||
response_file = f"models-{short_hash}-{digest}.json"
|
||||
response_file = f"models-{request_hash}-{digest}.json"
|
||||
|
||||
response_path = self.responses_dir / response_file
|
||||
response_path = responses_dir / response_file
|
||||
|
||||
# Save response to JSON file
|
||||
# Save response to JSON file with metadata
|
||||
with open(response_path, "w") as f:
|
||||
json.dump({"request": request, "response": serialized_response}, f, indent=2)
|
||||
json.dump(
|
||||
{
|
||||
"test_id": _test_context.get(), # Include for debugging
|
||||
"request": request,
|
||||
"response": serialized_response,
|
||||
},
|
||||
f,
|
||||
indent=2,
|
||||
)
|
||||
f.write("\n")
|
||||
f.flush()
|
||||
|
||||
def find_recording(self, request_hash: str) -> dict[str, Any] | None:
|
||||
"""Find a recorded response by request hash."""
|
||||
response_file = f"{request_hash[:12]}.json"
|
||||
response_path = self.responses_dir / response_file
|
||||
"""Find a recorded response by request hash.
|
||||
|
||||
if not response_path.exists():
|
||||
return None
|
||||
Uses fallback: first checks test-specific dir, then falls back to base recordings dir.
|
||||
This handles cases where recordings happen during session setup (no test context) but
|
||||
are requested during tests (with test context).
|
||||
"""
|
||||
response_file = f"{request_hash}.json"
|
||||
|
||||
return _recording_from_file(response_path)
|
||||
# Try test-specific directory first
|
||||
test_dir = self._get_test_dir()
|
||||
response_path = test_dir / response_file
|
||||
|
||||
def _model_list_responses(self, short_hash: str) -> list[dict[str, Any]]:
|
||||
if response_path.exists():
|
||||
return _recording_from_file(response_path)
|
||||
|
||||
# Fallback to base recordings directory (for session-level recordings)
|
||||
fallback_dir = self.base_dir / "recordings"
|
||||
fallback_path = fallback_dir / response_file
|
||||
|
||||
if fallback_path.exists():
|
||||
return _recording_from_file(fallback_path)
|
||||
|
||||
return None
|
||||
|
||||
def _model_list_responses(self, request_hash: str) -> list[dict[str, Any]]:
|
||||
"""Find all model-list recordings with the given hash (different digests)."""
|
||||
results: list[dict[str, Any]] = []
|
||||
for path in self.responses_dir.glob(f"models-{short_hash}-*.json"):
|
||||
data = _recording_from_file(path)
|
||||
results.append(data)
|
||||
|
||||
# Check test-specific directory first
|
||||
test_dir = self._get_test_dir()
|
||||
if test_dir.exists():
|
||||
for path in test_dir.glob(f"models-{request_hash}-*.json"):
|
||||
data = _recording_from_file(path)
|
||||
results.append(data)
|
||||
|
||||
# Also check fallback directory
|
||||
fallback_dir = self.base_dir / "recordings"
|
||||
if fallback_dir.exists():
|
||||
for path in fallback_dir.glob(f"models-{request_hash}-*.json"):
|
||||
data = _recording_from_file(path)
|
||||
results.append(data)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
|
@ -195,6 +317,8 @@ def _recording_from_file(response_path) -> dict[str, Any]:
|
|||
|
||||
|
||||
def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
||||
"""Generate a digest from model identifiers for distinguishing different model sets."""
|
||||
|
||||
def _extract_model_identifiers():
|
||||
"""Extract a stable set of identifiers for model-list endpoints.
|
||||
|
||||
|
|
@ -217,7 +341,14 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
|||
|
||||
|
||||
def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None:
|
||||
"""Return a single, unioned recording for supported model-list endpoints."""
|
||||
"""Return a single, unioned recording for supported model-list endpoints.
|
||||
|
||||
Merges multiple recordings with different model sets (from different servers) into
|
||||
a single response containing all models.
|
||||
"""
|
||||
if not records:
|
||||
return None
|
||||
|
||||
seen: dict[str, dict[str, Any]] = {}
|
||||
for rec in records:
|
||||
body = rec["response"]["body"]
|
||||
|
|
@ -246,7 +377,10 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
|
|||
async def _patched_inference_method(original_method, self, client_type, endpoint, *args, **kwargs):
|
||||
global _current_mode, _current_storage
|
||||
|
||||
if _current_mode == InferenceMode.LIVE or _current_storage is None:
|
||||
mode = _current_mode
|
||||
storage = _current_storage
|
||||
|
||||
if mode == InferenceMode.LIVE or storage is None:
|
||||
if endpoint == "/v1/models":
|
||||
return original_method(self, *args, **kwargs)
|
||||
else:
|
||||
|
|
@ -277,13 +411,16 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
|
||||
request_hash = normalize_request(method, url, headers, body)
|
||||
|
||||
if _current_mode == InferenceMode.REPLAY:
|
||||
# Special handling for model-list endpoints: return union of all responses
|
||||
# Try to find existing recording for REPLAY or RECORD_IF_MISSING modes
|
||||
recording = None
|
||||
if mode == InferenceMode.REPLAY or mode == InferenceMode.RECORD_IF_MISSING:
|
||||
# Special handling for model-list endpoints: merge all recordings with this hash
|
||||
if endpoint in ("/api/tags", "/v1/models"):
|
||||
records = _current_storage._model_list_responses(request_hash[:12])
|
||||
records = storage._model_list_responses(request_hash)
|
||||
recording = _combine_model_list_responses(endpoint, records)
|
||||
else:
|
||||
recording = _current_storage.find_recording(request_hash)
|
||||
recording = storage.find_recording(request_hash)
|
||||
|
||||
if recording:
|
||||
response_body = recording["response"]["body"]
|
||||
|
||||
|
|
@ -296,7 +433,8 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
return replay_stream()
|
||||
else:
|
||||
return response_body
|
||||
else:
|
||||
elif mode == InferenceMode.REPLAY:
|
||||
# REPLAY mode requires recording to exist
|
||||
raise RuntimeError(
|
||||
f"No recorded response found for request hash: {request_hash}\n"
|
||||
f"Request: {method} {url} {body}\n"
|
||||
|
|
@ -304,7 +442,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
f"To record this response, run with LLAMA_STACK_TEST_INFERENCE_MODE=record"
|
||||
)
|
||||
|
||||
elif _current_mode == InferenceMode.RECORD:
|
||||
if mode == InferenceMode.RECORD or (mode == InferenceMode.RECORD_IF_MISSING and not recording):
|
||||
if endpoint == "/v1/models":
|
||||
response = original_method(self, *args, **kwargs)
|
||||
else:
|
||||
|
|
@ -335,7 +473,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
|
||||
# Store the recording immediately
|
||||
response_data = {"body": chunks, "is_streaming": True}
|
||||
_current_storage.store_recording(request_hash, request_data, response_data)
|
||||
storage.store_recording(request_hash, request_data, response_data)
|
||||
|
||||
# Return a generator that replays the stored chunks
|
||||
async def replay_recorded_stream():
|
||||
|
|
@ -345,11 +483,11 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
return replay_recorded_stream()
|
||||
else:
|
||||
response_data = {"body": response, "is_streaming": False}
|
||||
_current_storage.store_recording(request_hash, request_data, response_data)
|
||||
storage.store_recording(request_hash, request_data, response_data)
|
||||
return response
|
||||
|
||||
else:
|
||||
raise AssertionError(f"Invalid mode: {_current_mode}")
|
||||
raise AssertionError(f"Invalid mode: {mode}")
|
||||
|
||||
|
||||
def patch_inference_clients():
|
||||
|
|
@ -490,9 +628,9 @@ def inference_recording(mode: str, storage_dir: str | Path | None = None) -> Gen
|
|||
try:
|
||||
_current_mode = mode
|
||||
|
||||
if mode in ["record", "replay"]:
|
||||
if mode in ["record", "replay", "record-if-missing"]:
|
||||
if storage_dir is None:
|
||||
raise ValueError("storage_dir is required for record and replay modes")
|
||||
raise ValueError("storage_dir is required for record, replay, and record-if-missing modes")
|
||||
_current_storage = ResponseStorage(Path(storage_dir))
|
||||
patch_inference_clients()
|
||||
|
||||
|
|
@ -500,7 +638,7 @@ def inference_recording(mode: str, storage_dir: str | Path | None = None) -> Gen
|
|||
|
||||
finally:
|
||||
# Restore previous state
|
||||
if mode in ["record", "replay"]:
|
||||
if mode in ["record", "replay", "record-if-missing"]:
|
||||
unpatch_inference_clients()
|
||||
|
||||
_current_mode = prev_mode
|
||||
|
|
|
|||
118
llama_stack/ui/package-lock.json
generated
118
llama_stack/ui/package-lock.json
generated
|
|
@ -20,11 +20,11 @@
|
|||
"framer-motion": "^12.23.12",
|
||||
"llama-stack-client": "^0.2.23",
|
||||
"lucide-react": "^0.542.0",
|
||||
"next": "15.5.3",
|
||||
"next": "15.5.4",
|
||||
"next-auth": "^4.24.11",
|
||||
"next-themes": "^0.4.6",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.1.1",
|
||||
"react-dom": "^19.2.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"remeda": "^2.32.0",
|
||||
|
|
@ -2279,9 +2279,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-15.5.3.tgz",
|
||||
"integrity": "sha512-RSEDTRqyihYXygx/OJXwvVupfr9m04+0vH8vyy0HfZ7keRto6VX9BbEk0J2PUk0VGy6YhklJUSrgForov5F9pw==",
|
||||
"version": "15.5.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-15.5.4.tgz",
|
||||
"integrity": "sha512-27SQhYp5QryzIT5uO8hq99C69eLQ7qkzkDPsk3N+GuS2XgOgoYEeOav7Pf8Tn4drECOVDsDg8oj+/DVy8qQL2A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
|
|
@ -2295,9 +2295,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.5.3.tgz",
|
||||
"integrity": "sha512-nzbHQo69+au9wJkGKTU9lP7PXv0d1J5ljFpvb+LnEomLtSbJkbZyEs6sbF3plQmiOB2l9OBtN2tNSvCH1nQ9Jg==",
|
||||
"version": "15.5.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.5.4.tgz",
|
||||
"integrity": "sha512-nopqz+Ov6uvorej8ndRX6HlxCYWCO3AHLfKK2TYvxoSB2scETOcfm/HSS3piPqc3A+MUgyHoqE6je4wnkjfrOA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
|
|
@ -2311,9 +2311,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.5.3.tgz",
|
||||
"integrity": "sha512-w83w4SkOOhekJOcA5HBvHyGzgV1W/XvOfpkrxIse4uPWhYTTRwtGEM4v/jiXwNSJvfRvah0H8/uTLBKRXlef8g==",
|
||||
"version": "15.5.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.5.4.tgz",
|
||||
"integrity": "sha512-QOTCFq8b09ghfjRJKfb68kU9k2K+2wsC4A67psOiMn849K9ZXgCSRQr0oVHfmKnoqCbEmQWG1f2h1T2vtJJ9mA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
|
@ -2327,9 +2327,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.5.3.tgz",
|
||||
"integrity": "sha512-+m7pfIs0/yvgVu26ieaKrifV8C8yiLe7jVp9SpcIzg7XmyyNE7toC1fy5IOQozmr6kWl/JONC51osih2RyoXRw==",
|
||||
"version": "15.5.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.5.4.tgz",
|
||||
"integrity": "sha512-eRD5zkts6jS3VfE/J0Kt1VxdFqTnMc3QgO5lFE5GKN3KDI/uUpSyK3CjQHmfEkYR4wCOl0R0XrsjpxfWEA++XA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
|
|
@ -2343,9 +2343,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.5.3.tgz",
|
||||
"integrity": "sha512-u3PEIzuguSenoZviZJahNLgCexGFhso5mxWCrrIMdvpZn6lkME5vc/ADZG8UUk5K1uWRy4hqSFECrON6UKQBbQ==",
|
||||
"version": "15.5.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.5.4.tgz",
|
||||
"integrity": "sha512-TOK7iTxmXFc45UrtKqWdZ1shfxuL4tnVAOuuJK4S88rX3oyVV4ZkLjtMT85wQkfBrOOvU55aLty+MV8xmcJR8A==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
|
|
@ -2359,9 +2359,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.5.3.tgz",
|
||||
"integrity": "sha512-lDtOOScYDZxI2BENN9m0pfVPJDSuUkAD1YXSvlJF0DKwZt0WlA7T7o3wrcEr4Q+iHYGzEaVuZcsIbCps4K27sA==",
|
||||
"version": "15.5.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.5.4.tgz",
|
||||
"integrity": "sha512-7HKolaj+481FSW/5lL0BcTkA4Ueam9SPYWyN/ib/WGAFZf0DGAN8frNpNZYFHtM4ZstrHZS3LY3vrwlIQfsiMA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
|
@ -2375,9 +2375,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.5.3.tgz",
|
||||
"integrity": "sha512-9vWVUnsx9PrY2NwdVRJ4dUURAQ8Su0sLRPqcCCxtX5zIQUBES12eRVHq6b70bbfaVaxIDGJN2afHui0eDm+cLg==",
|
||||
"version": "15.5.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.5.4.tgz",
|
||||
"integrity": "sha512-nlQQ6nfgN0nCO/KuyEUwwOdwQIGjOs4WNMjEUtpIQJPR2NUfmGpW2wkJln1d4nJ7oUzd1g4GivH5GoEPBgfsdw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
|
@ -2391,9 +2391,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.5.3.tgz",
|
||||
"integrity": "sha512-1CU20FZzY9LFQigRi6jM45oJMU3KziA5/sSG+dXeVaTm661snQP6xu3ykGxxwU5sLG3sh14teO/IOEPVsQMRfA==",
|
||||
"version": "15.5.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.5.4.tgz",
|
||||
"integrity": "sha512-PcR2bN7FlM32XM6eumklmyWLLbu2vs+D7nJX8OAIoWy69Kef8mfiN4e8TUv2KohprwifdpFKPzIP1njuCjD0YA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
|
|
@ -2407,9 +2407,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.5.3.tgz",
|
||||
"integrity": "sha512-JMoLAq3n3y5tKXPQwCK5c+6tmwkuFDa2XAxz8Wm4+IVthdBZdZGh+lmiLUHg9f9IDwIQpUjp+ysd6OkYTyZRZw==",
|
||||
"version": "15.5.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.5.4.tgz",
|
||||
"integrity": "sha512-1ur2tSHZj8Px/KMAthmuI9FMp/YFusMMGoRNJaRZMOlSkgvLjzosSdQI0cJAKogdHl3qXUQKL9MGaYvKwA7DXg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
|
@ -3995,22 +3995,22 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@types/react": {
|
||||
"version": "19.1.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/react/-/react-19.1.4.tgz",
|
||||
"integrity": "sha512-EB1yiiYdvySuIITtD5lhW4yPyJ31RkJkkDw794LaQYrxCSaQV/47y5o1FMC4zF9ZyjUjzJMZwbovEnT5yHTW6g==",
|
||||
"version": "19.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.0.tgz",
|
||||
"integrity": "sha512-1LOH8xovvsKsCBq1wnT4ntDUdCJKmnEakhsuoUSy6ExlHCkGP2hqnatagYTgFk6oeL0VU31u7SNjunPN+GchtA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"csstype": "^3.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/react-dom": {
|
||||
"version": "19.1.9",
|
||||
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.1.9.tgz",
|
||||
"integrity": "sha512-qXRuZaOsAdXKFyOhRBg6Lqqc0yay13vN7KrIg4L7N4aaHN68ma9OK3NE1BoDFgFOTfM7zg+3/8+2n8rLUH3OKQ==",
|
||||
"version": "19.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.2.0.tgz",
|
||||
"integrity": "sha512-brtBs0MnE9SMx7px208g39lRmC5uHZs96caOJfTjFcYSLHNamvaSMfJNagChVNkup2SdtOxKX1FDBkRSJe1ZAg==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"@types/react": "^19.0.0"
|
||||
"@types/react": "^19.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/stack-utils": {
|
||||
|
|
@ -11414,12 +11414,12 @@
|
|||
}
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-15.5.3.tgz",
|
||||
"integrity": "sha512-r/liNAx16SQj4D+XH/oI1dlpv9tdKJ6cONYPwwcCC46f2NjpaRWY+EKCzULfgQYV6YKXjHBchff2IZBSlZmJNw==",
|
||||
"version": "15.5.4",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-15.5.4.tgz",
|
||||
"integrity": "sha512-xH4Yjhb82sFYQfY3vbkJfgSDgXvBB6a8xPs9i35k6oZJRoQRihZH+4s9Yo2qsWpzBmZ3lPXaJ2KPXLfkvW4LnA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@next/env": "15.5.3",
|
||||
"@next/env": "15.5.4",
|
||||
"@swc/helpers": "0.5.15",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
"postcss": "8.4.31",
|
||||
|
|
@ -11432,14 +11432,14 @@
|
|||
"node": "^18.18.0 || ^19.8.0 || >= 20.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "15.5.3",
|
||||
"@next/swc-darwin-x64": "15.5.3",
|
||||
"@next/swc-linux-arm64-gnu": "15.5.3",
|
||||
"@next/swc-linux-arm64-musl": "15.5.3",
|
||||
"@next/swc-linux-x64-gnu": "15.5.3",
|
||||
"@next/swc-linux-x64-musl": "15.5.3",
|
||||
"@next/swc-win32-arm64-msvc": "15.5.3",
|
||||
"@next/swc-win32-x64-msvc": "15.5.3",
|
||||
"@next/swc-darwin-arm64": "15.5.4",
|
||||
"@next/swc-darwin-x64": "15.5.4",
|
||||
"@next/swc-linux-arm64-gnu": "15.5.4",
|
||||
"@next/swc-linux-arm64-musl": "15.5.4",
|
||||
"@next/swc-linux-x64-gnu": "15.5.4",
|
||||
"@next/swc-linux-x64-musl": "15.5.4",
|
||||
"@next/swc-win32-arm64-msvc": "15.5.4",
|
||||
"@next/swc-win32-x64-msvc": "15.5.4",
|
||||
"sharp": "^0.34.3"
|
||||
},
|
||||
"peerDependencies": {
|
||||
|
|
@ -12450,24 +12450,24 @@
|
|||
}
|
||||
},
|
||||
"node_modules/react": {
|
||||
"version": "19.1.1",
|
||||
"resolved": "https://registry.npmjs.org/react/-/react-19.1.1.tgz",
|
||||
"integrity": "sha512-w8nqGImo45dmMIfljjMwOGtbmC/mk4CMYhWIicdSflH91J9TyCyczcPFXJzrZ/ZXcgGRFeP6BU0BEJTw6tZdfQ==",
|
||||
"version": "19.2.0",
|
||||
"resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz",
|
||||
"integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-dom": {
|
||||
"version": "19.1.1",
|
||||
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.1.tgz",
|
||||
"integrity": "sha512-Dlq/5LAZgF0Gaz6yiqZCf6VCcZs1ghAJyrsu84Q/GT0gV+mCxbfmKNoGRKBYMJ8IEdGPqu49YWXD02GCknEDkw==",
|
||||
"version": "19.2.0",
|
||||
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz",
|
||||
"integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"scheduler": "^0.26.0"
|
||||
"scheduler": "^0.27.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "^19.1.1"
|
||||
"react": "^19.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-is": {
|
||||
|
|
@ -12982,9 +12982,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/scheduler": {
|
||||
"version": "0.26.0",
|
||||
"resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz",
|
||||
"integrity": "sha512-NlHwttCI/l5gCPR3D1nNXtWABUmBwvZpEQiD4IXSbIDq8BzLIK/7Ir5gTFSGZDUu37K5cMNp0hFtzO38sC7gWA==",
|
||||
"version": "0.27.0",
|
||||
"resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz",
|
||||
"integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/semver": {
|
||||
|
|
|
|||
|
|
@ -25,11 +25,11 @@
|
|||
"framer-motion": "^12.23.12",
|
||||
"llama-stack-client": "^0.2.23",
|
||||
"lucide-react": "^0.542.0",
|
||||
"next": "15.5.3",
|
||||
"next": "15.5.4",
|
||||
"next-auth": "^4.24.11",
|
||||
"next-themes": "^0.4.6",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.1.1",
|
||||
"react-dom": "^19.2.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"remeda": "^2.32.0",
|
||||
|
|
|
|||
|
|
@ -99,6 +99,7 @@ unit = [
|
|||
"coverage",
|
||||
"chromadb>=1.0.15",
|
||||
"moto[s3]>=5.1.10",
|
||||
"weaviate-client>=4.16.4",
|
||||
]
|
||||
# These are the core dependencies required for running integration tests. They are shared across all
|
||||
# providers. If a provider requires additional dependencies, please add them to your environment
|
||||
|
|
@ -277,14 +278,10 @@ exclude = [
|
|||
"^llama_stack/providers/remote/datasetio/huggingface/",
|
||||
"^llama_stack/providers/remote/datasetio/nvidia/",
|
||||
"^llama_stack/providers/remote/inference/bedrock/",
|
||||
"^llama_stack/providers/remote/inference/cerebras/",
|
||||
"^llama_stack/providers/remote/inference/databricks/",
|
||||
"^llama_stack/providers/remote/inference/fireworks/",
|
||||
"^llama_stack/providers/remote/inference/nvidia/",
|
||||
"^llama_stack/providers/remote/inference/passthrough/",
|
||||
"^llama_stack/providers/remote/inference/runpod/",
|
||||
"^llama_stack/providers/remote/inference/tgi/",
|
||||
"^llama_stack/providers/remote/inference/together/",
|
||||
"^llama_stack/providers/remote/inference/watsonx/",
|
||||
"^llama_stack/providers/remote/safety/bedrock/",
|
||||
"^llama_stack/providers/remote/safety/nvidia/",
|
||||
|
|
|
|||
120
scripts/normalize_recordings.py
Executable file
120
scripts/normalize_recordings.py
Executable file
|
|
@ -0,0 +1,120 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
Utility script to re-normalize existing recording files.
|
||||
|
||||
This script reads all recording JSON files and applies the normalization
|
||||
to make IDs deterministic and timestamps constant. This reduces noise in
|
||||
git diffs when recordings are re-recorded.
|
||||
|
||||
Usage:
|
||||
python scripts/normalize_recordings.py [--dry-run]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def normalize_response_data(data: dict, request_hash: str) -> dict:
|
||||
"""Normalize fields that change between recordings but don't affect functionality."""
|
||||
# Only normalize ID for completion/chat responses, not for model objects
|
||||
# Model objects have "object": "model" and the ID is the actual model identifier
|
||||
if "id" in data and data.get("object") != "model":
|
||||
data["id"] = f"rec-{request_hash[:12]}"
|
||||
|
||||
# Normalize timestamp to epoch (0) (for OpenAI-style responses)
|
||||
# But not for model objects where created timestamp might be meaningful
|
||||
if "created" in data and data.get("object") != "model":
|
||||
data["created"] = 0
|
||||
|
||||
# Normalize Ollama-specific timestamp fields
|
||||
if "created_at" in data:
|
||||
data["created_at"] = "1970-01-01T00:00:00.000000Z"
|
||||
|
||||
# Normalize Ollama-specific duration fields (these vary based on system load)
|
||||
if "total_duration" in data and data["total_duration"] is not None:
|
||||
data["total_duration"] = 0
|
||||
if "load_duration" in data and data["load_duration"] is not None:
|
||||
data["load_duration"] = 0
|
||||
if "prompt_eval_duration" in data and data["prompt_eval_duration"] is not None:
|
||||
data["prompt_eval_duration"] = 0
|
||||
if "eval_duration" in data and data["eval_duration"] is not None:
|
||||
data["eval_duration"] = 0
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def normalize_recording_file(file_path: Path, dry_run: bool = False) -> bool:
|
||||
"""Normalize a single recording file. Returns True if file was modified."""
|
||||
with open(file_path) as f:
|
||||
recording = json.load(f)
|
||||
|
||||
# Extract request hash from filename (first 12 chars)
|
||||
request_hash = file_path.stem.split("-")[-1] if "-" in file_path.stem else file_path.stem
|
||||
|
||||
modified = False
|
||||
old_recording = json.dumps(recording, sort_keys=True)
|
||||
|
||||
# NOTE: We do NOT normalize request body here because that would change the request hash
|
||||
# and break recording lookups. The recorder will normalize tool_call_ids in future recordings.
|
||||
|
||||
# Normalize response body
|
||||
if "response" in recording and "body" in recording["response"]:
|
||||
body = recording["response"]["body"]
|
||||
|
||||
if isinstance(body, list):
|
||||
# Handle streaming responses (list of chunks)
|
||||
for chunk in body:
|
||||
if isinstance(chunk, dict) and "__data__" in chunk:
|
||||
normalize_response_data(chunk["__data__"], request_hash)
|
||||
elif isinstance(body, dict) and "__data__" in body:
|
||||
# Handle single response
|
||||
normalize_response_data(body["__data__"], request_hash)
|
||||
|
||||
# Check if anything changed
|
||||
new_recording = json.dumps(recording, sort_keys=True)
|
||||
modified = old_recording != new_recording
|
||||
|
||||
if modified and not dry_run:
|
||||
with open(file_path, "w") as f:
|
||||
json.dump(recording, f, indent=2)
|
||||
f.write("\n")
|
||||
|
||||
return modified
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Normalize recording files to reduce git diff noise")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show what would be changed without modifying files")
|
||||
args = parser.parse_args()
|
||||
|
||||
recordings_dir = Path(__file__).parent.parent / "tests/integration/recordings/responses"
|
||||
|
||||
if not recordings_dir.exists():
|
||||
print(f"Recordings directory not found: {recordings_dir}")
|
||||
return 1
|
||||
|
||||
modified_count = 0
|
||||
total_count = 0
|
||||
|
||||
for file_path in sorted(recordings_dir.glob("*.json")):
|
||||
total_count += 1
|
||||
was_modified = normalize_recording_file(file_path, dry_run=args.dry_run)
|
||||
|
||||
if was_modified:
|
||||
modified_count += 1
|
||||
status = "[DRY RUN] Would normalize" if args.dry_run else "Normalized"
|
||||
print(f"{status}: {file_path.name}")
|
||||
|
||||
print(f"\n{'[DRY RUN] ' if args.dry_run else ''}Summary: {modified_count}/{total_count} files modified")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
15
scripts/telemetry/grafana-datasources.yaml
Normal file
15
scripts/telemetry/grafana-datasources.yaml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
editable: true
|
||||
|
||||
- name: Jaeger
|
||||
type: jaeger
|
||||
access: proxy
|
||||
url: http://jaeger:16686
|
||||
editable: true
|
||||
40
scripts/telemetry/otel-collector-config.yaml
Normal file
40
scripts/telemetry/otel-collector-config.yaml
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 1s
|
||||
send_batch_size: 1024
|
||||
|
||||
exporters:
|
||||
# Export traces to Jaeger
|
||||
otlp/jaeger:
|
||||
endpoint: jaeger:4317
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
# Export metrics to Prometheus
|
||||
prometheus:
|
||||
endpoint: 0.0.0.0:9464
|
||||
namespace: llama_stack
|
||||
|
||||
# Debug exporter for troubleshooting
|
||||
debug:
|
||||
verbosity: detailed
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [otlp/jaeger, debug]
|
||||
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [prometheus, debug]
|
||||
12
scripts/telemetry/prometheus.yml
Normal file
12
scripts/telemetry/prometheus.yml
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: 'otel-collector'
|
||||
static_configs:
|
||||
- targets: ['otel-collector:9464']
|
||||
|
|
@ -17,6 +17,7 @@
|
|||
set -Eeuo pipefail
|
||||
|
||||
CONTAINER_RUNTIME=${CONTAINER_RUNTIME:-docker}
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
echo "🚀 Setting up telemetry stack for Llama Stack using Podman..."
|
||||
|
||||
|
|
@ -53,7 +54,7 @@ $CONTAINER_RUNTIME run -d --name otel-collector \
|
|||
-p 4317:4317 \
|
||||
-p 9464:9464 \
|
||||
-p 13133:13133 \
|
||||
-v $(pwd)/otel-collector-config.yaml:/etc/otel-collector-config.yaml:Z \
|
||||
-v "$SCRIPT_DIR/otel-collector-config.yaml:/etc/otel-collector-config.yaml:Z" \
|
||||
docker.io/otel/opentelemetry-collector-contrib:latest \
|
||||
--config /etc/otel-collector-config.yaml
|
||||
|
||||
|
|
@ -62,7 +63,7 @@ echo "📈 Starting Prometheus..."
|
|||
$CONTAINER_RUNTIME run -d --name prometheus \
|
||||
--network llama-telemetry \
|
||||
-p 9090:9090 \
|
||||
-v $(pwd)/prometheus.yml:/etc/prometheus/prometheus.yml:Z \
|
||||
-v "$SCRIPT_DIR/prometheus.yml:/etc/prometheus/prometheus.yml:Z" \
|
||||
docker.io/prom/prometheus:latest \
|
||||
--config.file=/etc/prometheus/prometheus.yml \
|
||||
--storage.tsdb.path=/prometheus \
|
||||
|
|
@ -72,13 +73,15 @@ $CONTAINER_RUNTIME run -d --name prometheus \
|
|||
--web.enable-lifecycle
|
||||
|
||||
# Start Grafana
|
||||
# Note: Using 11.0.0 because grafana:latest arm64 image has a broken /run.sh (0 bytes)
|
||||
echo "📊 Starting Grafana..."
|
||||
$CONTAINER_RUNTIME run -d --name grafana \
|
||||
--network llama-telemetry \
|
||||
-p 3000:3000 \
|
||||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||
docker.io/grafana/grafana:latest
|
||||
-v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||
docker.io/grafana/grafana:11.0.0
|
||||
|
||||
# Wait for services to start
|
||||
echo "⏳ Waiting for services to start..."
|
||||
|
|
@ -125,21 +125,28 @@ pytest -s -v tests/integration/vector_io/ \
|
|||
|
||||
## Recording Modes
|
||||
|
||||
The testing system supports three modes controlled by environment variables:
|
||||
The testing system supports four modes controlled by environment variables:
|
||||
|
||||
### REPLAY Mode (Default)
|
||||
Uses cached responses instead of making API calls:
|
||||
```bash
|
||||
pytest tests/integration/
|
||||
```
|
||||
|
||||
### RECORD-IF-MISSING Mode (Recommended for adding new tests)
|
||||
Records only when no recording exists, otherwise replays. This is the preferred mode for iterative development:
|
||||
```bash
|
||||
pytest tests/integration/inference/test_new_feature.py --inference-mode=record-if-missing
|
||||
```
|
||||
|
||||
### RECORD Mode
|
||||
Captures API interactions for later replay:
|
||||
**Force-records all API interactions**, overwriting existing recordings. Use with caution as this will re-record everything:
|
||||
```bash
|
||||
pytest tests/integration/inference/test_new_feature.py --inference-mode=record
|
||||
```
|
||||
|
||||
### LIVE Mode
|
||||
Tests make real API calls (but not recorded):
|
||||
Tests make real API calls (not recorded):
|
||||
```bash
|
||||
pytest tests/integration/ --inference-mode=live
|
||||
```
|
||||
|
|
|
|||
|
|
@ -0,0 +1,58 @@
|
|||
{
|
||||
"test_id": "tests/integration/agents/test_agents.py::test_custom_tool_infinite_loop[ollama/llama3.2:3b-instruct-fp16]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama-guard3:1b",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Task: Check if there is unsafe content in 'Tool' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: Get the boiling point of polyjuice with a tool call.\n\nAssistant: \n\nTool: Error when running tool: get_boiling_point() missing 1 required positional argument: 'liquid_name'\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Tool message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
|
||||
}
|
||||
],
|
||||
"stream": false,
|
||||
"temperature": 0.0
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama-guard3:1b"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "rec-000506671ad4",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "safe",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama-guard3:1b",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 2,
|
||||
"prompt_tokens": 422,
|
||||
"total_tokens": 424,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
||||
|
|
@ -28,7 +28,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -43,7 +43,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -54,7 +54,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -69,7 +69,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -80,7 +80,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -95,7 +95,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -106,7 +106,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -121,7 +121,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -132,7 +132,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -147,7 +147,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -158,7 +158,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -173,7 +173,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -184,7 +184,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -199,7 +199,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -210,7 +210,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -225,7 +225,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -236,7 +236,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -251,7 +251,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -262,7 +262,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -277,7 +277,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -288,7 +288,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -303,7 +303,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -314,7 +314,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -329,7 +329,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -340,7 +340,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -355,7 +355,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -366,7 +366,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -381,7 +381,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -392,7 +392,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -407,7 +407,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -418,7 +418,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -433,7 +433,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -444,7 +444,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -459,7 +459,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -470,7 +470,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -485,7 +485,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437810,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -496,7 +496,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -511,7 +511,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437811,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -522,7 +522,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-130",
|
||||
"id": "rec-044dcd8fdeb1",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -537,7 +537,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759437811,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
{
|
||||
"test_id": "tests/integration/agents/test_agents.py::test_custom_tool[ollama/llama3.2:3b-instruct-fp16]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama-guard3:1b",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Task: Check if there is unsafe content in 'Tool' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: Error when running tool: get_boiling_point() got an unexpected keyword argument 'liquid'\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Tool message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
|
||||
}
|
||||
],
|
||||
"stream": false,
|
||||
"temperature": 0.0
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama-guard3:1b"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "rec-06fbbb88ed5e",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "safe",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama-guard3:1b",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 2,
|
||||
"prompt_tokens": 421,
|
||||
"total_tokens": 423,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
||||
|
|
@ -73,7 +73,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -88,7 +88,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -99,7 +99,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -114,7 +114,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -125,7 +125,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -140,7 +140,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -151,7 +151,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -166,7 +166,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -177,7 +177,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -192,7 +192,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -203,7 +203,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -218,7 +218,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -229,7 +229,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -244,7 +244,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -255,7 +255,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -270,7 +270,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -281,7 +281,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -296,7 +296,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -307,7 +307,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -322,7 +322,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -333,7 +333,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -348,7 +348,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -359,7 +359,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -374,7 +374,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441160,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
|
|
@ -385,7 +385,7 @@
|
|||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-67",
|
||||
"id": "rec-4a32ce3da3ce",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
|
|
@ -400,7 +400,7 @@
|
|||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 1759441161,
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -21,7 +21,7 @@
|
|||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-912",
|
||||
"id": "rec-b58e35a624b0",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
|
|
@ -38,7 +38,7 @@
|
|||
}
|
||||
}
|
||||
],
|
||||
"created": 1759437811,
|
||||
"created": 0,
|
||||
"model": "llama-guard3:1b",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
{
|
||||
"test_id": "tests/integration/agents/test_agents.py::test_create_turn_response[ollama/llama3.2:3b-instruct-fp16-client_tools1]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Call get_boiling_point_with_metadata tool and answer What is the boiling point of polyjuice?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 512,
|
||||
"stream": true,
|
||||
"temperature": 0.0001,
|
||||
"tool_choice": "auto",
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_boiling_point_with_metadata",
|
||||
"description": "Returns the boiling point of a liquid in Celcius or Fahrenheit"
|
||||
}
|
||||
}
|
||||
],
|
||||
"top_p": 0.9
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-176bcef706a9",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_wxinam9c",
|
||||
"function": {
|
||||
"arguments": "{}",
|
||||
"name": "get_boiling_point_with_metadata"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-176bcef706a9",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
{
|
||||
"test_id": "tests/integration/agents/test_agents.py::test_tool_choice_none[ollama/llama3.2:3b-instruct-fp16]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama-guard3:1b",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Task: Check if there is unsafe content in 'User' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST User message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
|
||||
}
|
||||
],
|
||||
"stream": false,
|
||||
"temperature": 0.0
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama-guard3:1b"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "rec-1a0d3109cf92",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "safe",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama-guard3:1b",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 2,
|
||||
"prompt_tokens": 398,
|
||||
"total_tokens": 400,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
||||
388
tests/integration/agents/recordings/1d82e9439ae3.json
Normal file
388
tests/integration/agents/recordings/1d82e9439ae3.json
Normal file
|
|
@ -0,0 +1,388 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Call get_boiling_point tool and answer What is the boiling point of polyjuice?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "toolcall-1d82e943-0",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_boiling_point",
|
||||
"arguments": "{\"celcius\":null,\"liquid_name\":\"polyjuice\"}"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "toolcall-1d82e943-0",
|
||||
"content": "-212"
|
||||
}
|
||||
],
|
||||
"max_tokens": 512,
|
||||
"stream": true,
|
||||
"temperature": 0.0001,
|
||||
"tool_choice": "auto",
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_boiling_point",
|
||||
"description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"liquid_name": {
|
||||
"type": "string",
|
||||
"description": "The name of the liquid"
|
||||
},
|
||||
"celcius": {
|
||||
"type": "boolean",
|
||||
"description": "Whether to return the boiling point in Celcius"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"liquid_name"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"top_p": 0.9
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "The",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " boiling",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " point",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " of",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " poly",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "ju",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "ice",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " is",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " -",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "212",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ".",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1d82e9439ae3",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue