mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 18:00:36 +00:00
Merge branch 'llamastack:main' into llama_stack_how_to_documentation
This commit is contained in:
commit
c83343de84
1030 changed files with 249861 additions and 104196 deletions
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
|
@ -2,4 +2,4 @@
|
||||||
|
|
||||||
# These owners will be the default owners for everything in
|
# These owners will be the default owners for everything in
|
||||||
# the repo. Unless a later match takes precedence,
|
# the repo. Unless a later match takes precedence,
|
||||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1
|
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
|
||||||
|
|
|
||||||
1
.github/TRIAGERS.md
vendored
1
.github/TRIAGERS.md
vendored
|
|
@ -1,2 +1 @@
|
||||||
# This file documents Triage members in the Llama Stack community
|
# This file documents Triage members in the Llama Stack community
|
||||||
@franciscojavierarceo
|
|
||||||
|
|
|
||||||
4
.github/workflows/integration-auth-tests.yml
vendored
4
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -85,8 +85,8 @@ jobs:
|
||||||
cat $run_dir/run.yaml
|
cat $run_dir/run.yaml
|
||||||
|
|
||||||
# avoid line breaks in the server log, especially because we grep it below.
|
# avoid line breaks in the server log, especially because we grep it below.
|
||||||
export COLUMNS=1984
|
export LLAMA_STACK_LOG_WIDTH=200
|
||||||
nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
|
nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
25
.github/workflows/integration-tests.yml
vendored
25
.github/workflows/integration-tests.yml
vendored
|
|
@ -42,18 +42,27 @@ jobs:
|
||||||
|
|
||||||
run-replay-mode-tests:
|
run-replay-mode-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }}
|
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
client-type: [library, server]
|
client-type: [library, server]
|
||||||
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
|
|
||||||
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
|
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
suite: [base, vision]
|
# Define (setup, suite) pairs - they are always matched and cannot be independent
|
||||||
|
# Weekly schedule (Sun 1 AM): vllm+base
|
||||||
|
# Input test-setup=ollama-vision: ollama-vision+vision
|
||||||
|
# Default (including test-setup=ollama): both ollama+base and ollama-vision+vision
|
||||||
|
config: >-
|
||||||
|
${{
|
||||||
|
github.event.schedule == '1 0 * * 0'
|
||||||
|
&& fromJSON('[{"setup": "vllm", "suite": "base"}]')
|
||||||
|
|| github.event.inputs.test-setup == 'ollama-vision'
|
||||||
|
&& fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
|
||||||
|
|| fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]')
|
||||||
|
}}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
|
@ -64,14 +73,14 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
client-version: ${{ matrix.client-version }}
|
client-version: ${{ matrix.client-version }}
|
||||||
setup: ${{ matrix.setup }}
|
setup: ${{ matrix.config.setup }}
|
||||||
suite: ${{ matrix.suite }}
|
suite: ${{ matrix.config.suite }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
setup: ${{ matrix.setup }}
|
setup: ${{ matrix.config.setup }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: ${{ matrix.suite }}
|
suite: ${{ matrix.config.suite }}
|
||||||
|
|
|
||||||
12
.github/workflows/precommit-trigger.yml
vendored
12
.github/workflows/precommit-trigger.yml
vendored
|
|
@ -18,7 +18,7 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
- name: Check comment author and get PR details
|
- name: Check comment author and get PR details
|
||||||
id: check_author
|
id: check_author
|
||||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
with:
|
with:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
script: |
|
script: |
|
||||||
|
|
@ -78,7 +78,7 @@ jobs:
|
||||||
|
|
||||||
- name: React to comment
|
- name: React to comment
|
||||||
if: steps.check_author.outputs.authorized == 'true'
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
with:
|
with:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
script: |
|
script: |
|
||||||
|
|
@ -91,7 +91,7 @@ jobs:
|
||||||
|
|
||||||
- name: Comment starting
|
- name: Comment starting
|
||||||
if: steps.check_author.outputs.authorized == 'true'
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
with:
|
with:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
script: |
|
script: |
|
||||||
|
|
@ -189,7 +189,7 @@ jobs:
|
||||||
|
|
||||||
- name: Comment success with changes
|
- name: Comment success with changes
|
||||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
with:
|
with:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
script: |
|
script: |
|
||||||
|
|
@ -202,7 +202,7 @@ jobs:
|
||||||
|
|
||||||
- name: Comment success without changes
|
- name: Comment success without changes
|
||||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success'
|
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success'
|
||||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
with:
|
with:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
script: |
|
script: |
|
||||||
|
|
@ -215,7 +215,7 @@ jobs:
|
||||||
|
|
||||||
- name: Comment failure
|
- name: Comment failure
|
||||||
if: failure()
|
if: failure()
|
||||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||||
with:
|
with:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
script: |
|
script: |
|
||||||
|
|
|
||||||
4
.github/workflows/providers-build.yml
vendored
4
.github/workflows/providers-build.yml
vendored
|
|
@ -112,7 +112,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
@ -150,7 +150,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
2
.github/workflows/python-build-test.yml
vendored
2
.github/workflows/python-build-test.yml
vendored
|
|
@ -24,7 +24,7 @@ jobs:
|
||||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@b75a909f75acd358c2196fb9a5f1299a9a8868a4 # v6.7.0
|
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6.8.0
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
activate-environment: true
|
activate-environment: true
|
||||||
|
|
|
||||||
2
.github/workflows/stale_bot.yml
vendored
2
.github/workflows/stale_bot.yml
vendored
|
|
@ -24,7 +24,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Stale Action
|
- name: Stale Action
|
||||||
uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
|
uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
|
||||||
with:
|
with:
|
||||||
stale-issue-label: 'stale'
|
stale-issue-label: 'stale'
|
||||||
stale-issue-message: >
|
stale-issue-message: >
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ jobs:
|
||||||
# Use the virtual environment created by the build step (name comes from build config)
|
# Use the virtual environment created by the build step (name comes from build config)
|
||||||
source ramalama-stack-test/bin/activate
|
source ramalama-stack-test/bin/activate
|
||||||
uv pip list
|
uv pip list
|
||||||
nohup llama stack run tests/external/ramalama-stack/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
|
nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
2
.github/workflows/test-external.yml
vendored
2
.github/workflows/test-external.yml
vendored
|
|
@ -59,7 +59,7 @@ jobs:
|
||||||
# Use the virtual environment created by the build step (name comes from build config)
|
# Use the virtual environment created by the build step (name comes from build config)
|
||||||
source ci-test/bin/activate
|
source ci-test/bin/activate
|
||||||
uv pip list
|
uv pip list
|
||||||
nohup llama stack run tests/external/run-byoa.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
|
nohup llama stack run tests/external/run-byoa.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
||||||
|
|
||||||
[**Quick Start**](https://llamastack.github.io/latest/getting_started/index.html) | [**Documentation**](https://llamastack.github.io/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
[**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||||
|
|
||||||
|
|
||||||
### ✨🎉 Llama 4 Support 🎉✨
|
### ✨🎉 Llama 4 Support 🎉✨
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ You can access the HuggingFace trainer via the `starter` distribution:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro starter --image-type venv
|
llama stack build --distro starter --image-type venv
|
||||||
llama stack run --image-type venv ~/.llama/distributions/starter/starter-run.yaml
|
llama stack run ~/.llama/distributions/starter/starter-run.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### Usage Example
|
### Usage Example
|
||||||
|
|
|
||||||
|
|
@ -187,21 +187,21 @@ Configure telemetry behavior using environment variables:
|
||||||
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
|
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
|
||||||
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `console,sqlite`)
|
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `console,sqlite`)
|
||||||
|
|
||||||
## Visualization with Jaeger
|
### Quick Setup: Complete Telemetry Stack
|
||||||
|
|
||||||
The `otel_trace` sink works with any service compatible with the OpenTelemetry collector. Traces and metrics use separate endpoints but can share the same collector.
|
Use the automated setup script to launch the complete telemetry stack (Jaeger, OpenTelemetry Collector, Prometheus, and Grafana):
|
||||||
|
|
||||||
### Starting Jaeger
|
|
||||||
|
|
||||||
Start a Jaeger instance with OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run --pull always --rm --name jaeger \
|
./scripts/telemetry/setup_telemetry.sh
|
||||||
-p 16686:16686 -p 4318:4318 \
|
|
||||||
jaegertracing/jaeger:2.1.0
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Once running, you can visualize traces by navigating to [http://localhost:16686/](http://localhost:16686/).
|
This sets up:
|
||||||
|
- **Jaeger UI**: http://localhost:16686 (traces visualization)
|
||||||
|
- **Prometheus**: http://localhost:9090 (metrics)
|
||||||
|
- **Grafana**: http://localhost:3000 (dashboards with auto-configured data sources)
|
||||||
|
- **OTEL Collector**: http://localhost:4318 (OTLP endpoint)
|
||||||
|
|
||||||
|
Once running, you can visualize traces by navigating to [Grafana](http://localhost:3000/) and login with login `admin` and password `admin`.
|
||||||
|
|
||||||
## Querying Metrics
|
## Querying Metrics
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -219,13 +219,10 @@ group_tools = client.tools.list_tools(toolgroup_id="search_tools")
|
||||||
<TabItem value="setup" label="Setup & Configuration">
|
<TabItem value="setup" label="Setup & Configuration">
|
||||||
|
|
||||||
1. Start by registering a Tavily API key at [Tavily](https://tavily.com/).
|
1. Start by registering a Tavily API key at [Tavily](https://tavily.com/).
|
||||||
2. [Optional] Provide the API key directly to the Llama Stack server
|
2. [Optional] Set the API key in your environment before starting the Llama Stack server
|
||||||
```bash
|
```bash
|
||||||
export TAVILY_SEARCH_API_KEY="your key"
|
export TAVILY_SEARCH_API_KEY="your key"
|
||||||
```
|
```
|
||||||
```bash
|
|
||||||
--env TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY}
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="implementation" label="Implementation">
|
<TabItem value="implementation" label="Implementation">
|
||||||
|
|
@ -273,9 +270,9 @@ for log in EventLogger().log(response):
|
||||||
<TabItem value="setup" label="Setup & Configuration">
|
<TabItem value="setup" label="Setup & Configuration">
|
||||||
|
|
||||||
1. Start by registering for a WolframAlpha API key at [WolframAlpha Developer Portal](https://developer.wolframalpha.com/access).
|
1. Start by registering for a WolframAlpha API key at [WolframAlpha Developer Portal](https://developer.wolframalpha.com/access).
|
||||||
2. Provide the API key either when starting the Llama Stack server:
|
2. Provide the API key either by setting it in your environment before starting the Llama Stack server:
|
||||||
```bash
|
```bash
|
||||||
--env WOLFRAM_ALPHA_API_KEY=${WOLFRAM_ALPHA_API_KEY}
|
export WOLFRAM_ALPHA_API_KEY="your key"
|
||||||
```
|
```
|
||||||
or from the client side:
|
or from the client side:
|
||||||
```python
|
```python
|
||||||
|
|
|
||||||
|
|
@ -357,7 +357,7 @@ server:
|
||||||
8. Run the server:
|
8. Run the server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
|
llama stack run ~/.llama/run-byoa.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
9. Test the API:
|
9. Test the API:
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,7 @@ Integration tests are located in [tests/integration](https://github.com/meta-lla
|
||||||
Consult [tests/integration/README.md](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more details on how to run the tests.
|
Consult [tests/integration/README.md](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more details on how to run the tests.
|
||||||
|
|
||||||
Note that each provider's `sample_run_config()` method (in the configuration class for that provider)
|
Note that each provider's `sample_run_config()` method (in the configuration class for that provider)
|
||||||
typically references some environment variables for specifying API keys and the like. You can set these in the environment or pass these via the `--env` flag to the test command.
|
typically references some environment variables for specifying API keys and the like. You can set these in the environment before running the test command.
|
||||||
|
|
||||||
|
|
||||||
### 2. Unit Testing
|
### 2. Unit Testing
|
||||||
|
|
|
||||||
|
|
@ -170,7 +170,7 @@ spec:
|
||||||
- name: llama-stack
|
- name: llama-stack
|
||||||
image: localhost/llama-stack-run-k8s:latest
|
image: localhost/llama-stack-run-k8s:latest
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
|
command: ["llama", "stack", "run", "/app/config.yaml"]
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5000
|
- containerPort: 5000
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|
|
||||||
|
|
@ -289,10 +289,10 @@ After this step is successful, you should be able to find the built container im
|
||||||
docker run -d \
|
docker run -d \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
|
-e OLLAMA_URL=http://host.docker.internal:11434 \
|
||||||
localhost/distribution-ollama:dev \
|
localhost/distribution-ollama:dev \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
|
||||||
--env OLLAMA_URL=http://host.docker.internal:11434
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Here are the docker flags and their uses:
|
Here are the docker flags and their uses:
|
||||||
|
|
@ -305,12 +305,12 @@ Here are the docker flags and their uses:
|
||||||
|
|
||||||
* `localhost/distribution-ollama:dev`: The name and tag of the container image to run
|
* `localhost/distribution-ollama:dev`: The name and tag of the container image to run
|
||||||
|
|
||||||
|
* `-e INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the INFERENCE_MODEL environment variable in the container
|
||||||
|
|
||||||
|
* `-e OLLAMA_URL=http://host.docker.internal:11434`: Sets the OLLAMA_URL environment variable in the container
|
||||||
|
|
||||||
* `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
|
* `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
|
||||||
|
|
||||||
* `--env INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the model to use for inference
|
|
||||||
|
|
||||||
* `--env OLLAMA_URL=http://host.docker.internal:11434`: Configures the URL for the Ollama service
|
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
@ -320,23 +320,22 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
|
||||||
|
|
||||||
```
|
```
|
||||||
llama stack run -h
|
llama stack run -h
|
||||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE]
|
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
|
||||||
[--image-type {venv}] [--enable-ui]
|
[--image-type {venv}] [--enable-ui]
|
||||||
[config | template]
|
[config | distro]
|
||||||
|
|
||||||
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
config | template Path to config file to use for the run or name of known template (`llama stack list` for a list). (default: None)
|
config | distro Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None)
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
||||||
--image-name IMAGE_NAME
|
--image-name IMAGE_NAME
|
||||||
Name of the image to run. Defaults to the current environment (default: None)
|
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||||
--env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: None)
|
|
||||||
--image-type {venv}
|
--image-type {venv}
|
||||||
Image Type used during the build. This should be venv. (default: None)
|
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||||
--enable-ui Start the UI server (default: False)
|
--enable-ui Start the UI server (default: False)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -348,9 +347,6 @@ llama stack run tgi
|
||||||
|
|
||||||
# Start using config file
|
# Start using config file
|
||||||
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||||
|
|
||||||
# Start using a venv
|
|
||||||
llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -101,7 +101,7 @@ A few things to note:
|
||||||
- The id is a string you can choose freely.
|
- The id is a string you can choose freely.
|
||||||
- You can instantiate any number of provider instances of the same type.
|
- You can instantiate any number of provider instances of the same type.
|
||||||
- The configuration dictionary is provider-specific.
|
- The configuration dictionary is provider-specific.
|
||||||
- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
|
- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server, you can set environment variables in your shell before running `llama stack run` to override the default values.
|
||||||
|
|
||||||
### Environment Variable Substitution
|
### Environment Variable Substitution
|
||||||
|
|
||||||
|
|
@ -173,13 +173,10 @@ optional_token: ${env.OPTIONAL_TOKEN:+}
|
||||||
|
|
||||||
#### Runtime Override
|
#### Runtime Override
|
||||||
|
|
||||||
You can override environment variables at runtime when starting the server:
|
You can override environment variables at runtime by setting them in your shell before starting the server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Override specific environment variables
|
# Set environment variables in your shell
|
||||||
llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com
|
|
||||||
|
|
||||||
# Or set them in your shell
|
|
||||||
export API_KEY=sk-123
|
export API_KEY=sk-123
|
||||||
export BASE_URL=https://custom-api.com
|
export BASE_URL=https://custom-api.com
|
||||||
llama stack run --config run.yaml
|
llama stack run --config run.yaml
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ spec:
|
||||||
value: "${SAFETY_MODEL}"
|
value: "${SAFETY_MODEL}"
|
||||||
- name: TAVILY_SEARCH_API_KEY
|
- name: TAVILY_SEARCH_API_KEY
|
||||||
value: "${TAVILY_SEARCH_API_KEY}"
|
value: "${TAVILY_SEARCH_API_KEY}"
|
||||||
command: ["python", "-m", "llama_stack.core.server.server", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
command: ["llama", "stack", "run", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8321
|
- containerPort: 8321
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|
|
||||||
|
|
@ -69,10 +69,10 @@ docker run \
|
||||||
-it \
|
-it \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ./run.yaml:/root/my-run.yaml \
|
-v ./run.yaml:/root/my-run.yaml \
|
||||||
|
-e WATSONX_API_KEY=$WATSONX_API_KEY \
|
||||||
|
-e WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
||||||
|
-e WATSONX_BASE_URL=$WATSONX_BASE_URL \
|
||||||
llamastack/distribution-watsonx \
|
llamastack/distribution-watsonx \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env WATSONX_API_KEY=$WATSONX_API_KEY \
|
|
||||||
--env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
|
||||||
--env WATSONX_BASE_URL=$WATSONX_BASE_URL
|
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -129,11 +129,11 @@ docker run -it \
|
||||||
# NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
|
# NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
|
||||||
-v $HOME/git/llama-stack:/app/llama-stack-source -v $HOME/git/llama-models:/app/llama-models-source \
|
-v $HOME/git/llama-stack:/app/llama-stack-source -v $HOME/git/llama-models:/app/llama-models-source \
|
||||||
# localhost/distribution-dell:dev if building / testing locally
|
# localhost/distribution-dell:dev if building / testing locally
|
||||||
llamastack/distribution-dell\
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
--port $LLAMA_STACK_PORT \
|
-e DEH_URL=$DEH_URL \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
-e CHROMA_URL=$CHROMA_URL \
|
||||||
--env DEH_URL=$DEH_URL \
|
llamastack/distribution-dell \
|
||||||
--env CHROMA_URL=$CHROMA_URL
|
--port $LLAMA_STACK_PORT
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -154,14 +154,14 @@ docker run \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $HOME/.llama:/root/.llama \
|
-v $HOME/.llama:/root/.llama \
|
||||||
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||||
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
|
-e DEH_URL=$DEH_URL \
|
||||||
|
-e SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
|
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||||
|
-e CHROMA_URL=$CHROMA_URL \
|
||||||
llamastack/distribution-dell \
|
llamastack/distribution-dell \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
|
||||||
--env DEH_URL=$DEH_URL \
|
|
||||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
|
||||||
--env DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
|
||||||
--env CHROMA_URL=$CHROMA_URL
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
@ -170,21 +170,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro dell --image-type venv
|
llama stack build --distro dell --image-type venv
|
||||||
llama stack run dell
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
--port $LLAMA_STACK_PORT \
|
DEH_URL=$DEH_URL \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
CHROMA_URL=$CHROMA_URL \
|
||||||
--env DEH_URL=$DEH_URL \
|
llama stack run dell \
|
||||||
--env CHROMA_URL=$CHROMA_URL
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
|
DEH_URL=$DEH_URL \
|
||||||
|
SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
|
DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||||
|
CHROMA_URL=$CHROMA_URL \
|
||||||
llama stack run ./run-with-safety.yaml \
|
llama stack run ./run-with-safety.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
|
||||||
--env DEH_URL=$DEH_URL \
|
|
||||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
|
||||||
--env DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
|
||||||
--env CHROMA_URL=$CHROMA_URL
|
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -84,9 +84,9 @@ docker run \
|
||||||
--gpu all \
|
--gpu all \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
|
-e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llamastack/distribution-meta-reference-gpu \
|
llamastack/distribution-meta-reference-gpu \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
@ -98,10 +98,10 @@ docker run \
|
||||||
--gpu all \
|
--gpu all \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
|
-e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
|
-e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
|
||||||
llamastack/distribution-meta-reference-gpu \
|
llamastack/distribution-meta-reference-gpu \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
@ -110,16 +110,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro meta-reference-gpu --image-type venv
|
llama stack build --distro meta-reference-gpu --image-type venv
|
||||||
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
llama stack run distributions/meta-reference-gpu/run.yaml \
|
||||||
--port 8321 \
|
--port 8321
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
|
SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
|
||||||
llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
|
llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
|
||||||
--port 8321 \
|
--port 8321
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -129,10 +129,10 @@ docker run \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ./run.yaml:/root/my-run.yaml \
|
-v ./run.yaml:/root/my-run.yaml \
|
||||||
|
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
llamastack/distribution-nvidia \
|
llamastack/distribution-nvidia \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
@ -142,10 +142,10 @@ If you've set up your local development environment, you can also build the imag
|
||||||
```bash
|
```bash
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
llama stack build --distro nvidia --image-type venv
|
llama stack build --distro nvidia --image-type venv
|
||||||
|
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./run.yaml \
|
||||||
--port 8321 \
|
--port 8321
|
||||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Example Notebooks
|
## Example Notebooks
|
||||||
|
|
|
||||||
|
|
@ -86,9 +86,9 @@ docker run -it \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
|
-e OLLAMA_URL=http://host.docker.internal:11434 \
|
||||||
llamastack/distribution-starter \
|
llamastack/distribution-starter \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env OLLAMA_URL=http://host.docker.internal:11434
|
|
||||||
```
|
```
|
||||||
Note to start the container with Podman, you can do the same but replace `docker` at the start of the command with
|
Note to start the container with Podman, you can do the same but replace `docker` at the start of the command with
|
||||||
`podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL`
|
`podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL`
|
||||||
|
|
@ -106,9 +106,9 @@ docker run -it \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
--network=host \
|
--network=host \
|
||||||
|
-e OLLAMA_URL=http://localhost:11434 \
|
||||||
llamastack/distribution-starter \
|
llamastack/distribution-starter \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env OLLAMA_URL=http://localhost:11434
|
|
||||||
```
|
```
|
||||||
:::
|
:::
|
||||||
You will see output like below:
|
You will see output like below:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,7 @@
|
||||||
---
|
---
|
||||||
|
description: "Files
|
||||||
|
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack APIs."
|
||||||
sidebar_label: Files
|
sidebar_label: Files
|
||||||
title: Files
|
title: Files
|
||||||
---
|
---
|
||||||
|
|
@ -7,4 +10,8 @@ title: Files
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
|
Files
|
||||||
|
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack APIs.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **files** API.
|
This section contains documentation for all available providers for the **files** API.
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
---
|
---
|
||||||
description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
description: "Inference
|
||||||
|
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||||
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
|
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
|
||||||
|
|
@ -12,7 +14,9 @@ title: Inference
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
Inference
|
||||||
|
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `api_key` | `str \| None` | No | | API key for Anthropic models |
|
| `api_key` | `str \| None` | No | | API key for Anthropic models |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Azure API key for Azure |
|
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Azure API key for Azure |
|
||||||
| `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
|
| `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
|
||||||
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
|
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||||
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||||
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
|
| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
|
||||||
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Cerebras API Key |
|
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Cerebras API Key |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,8 @@ Databricks inference provider for running models on Databricks' unified analytic
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
| `url` | `<class 'str'>` | No | | The URL for the Databricks model serving endpoint |
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
|
| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint |
|
||||||
| `api_token` | `<class 'pydantic.types.SecretStr'>` | No | | The Databricks API token |
|
| `api_token` | `<class 'pydantic.types.SecretStr'>` | No | | The Databricks API token |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
|
| `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
|
||||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Fireworks.ai API Key |
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Fireworks.ai API Key |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `api_key` | `str \| None` | No | | API key for Gemini models |
|
| `api_key` | `str \| None` | No | | API key for Gemini models |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `api_key` | `str \| None` | No | | The Groq API key |
|
| `api_key` | `str \| None` | No | | The Groq API key |
|
||||||
| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
|
| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `api_key` | `str \| None` | No | | The Llama API key |
|
| `api_key` | `str \| None` | No | | The Llama API key |
|
||||||
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
||||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The NVIDIA API key, only needed of using the hosted service |
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The NVIDIA API key, only needed of using the hosted service |
|
||||||
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||||
|
|
|
||||||
|
|
@ -15,8 +15,8 @@ Ollama inference provider for running local models through the Ollama runtime.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `url` | `<class 'str'>` | No | http://localhost:11434 | |
|
| `url` | `<class 'str'>` | No | http://localhost:11434 | |
|
||||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |
|
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `api_key` | `str \| None` | No | | API key for OpenAI models |
|
| `api_key` | `str \| None` | No | | API key for OpenAI models |
|
||||||
| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
|
| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ Passthrough inference provider for connecting to any external inference service
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint |
|
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint |
|
||||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint |
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint |
|
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint |
|
||||||
| `api_token` | `str \| None` | No | | The API token |
|
| `api_token` | `str \| None` | No | | The API token |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `url` | `<class 'str'>` | No | | The URL for the TGI serving endpoint |
|
| `url` | `<class 'str'>` | No | | The URL for the TGI serving endpoint |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ Together AI inference provider for open-source models and collaborative AI devel
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
|
| `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
|
||||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Together AI API Key |
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Together AI API Key |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -54,6 +54,7 @@ Available Models:
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `project` | `<class 'str'>` | No | | Google Cloud project ID for Vertex AI |
|
| `project` | `<class 'str'>` | No | | Google Cloud project ID for Vertex AI |
|
||||||
| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
|
| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,11 +15,11 @@ Remote vLLM inference provider for connecting to vLLM servers.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint |
|
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint |
|
||||||
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
|
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
|
||||||
| `api_token` | `str \| None` | No | fake | The API token |
|
| `api_token` | `str \| None` | No | fake | The API token |
|
||||||
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
|
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
|
||||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |
|
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,9 +15,10 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key |
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx.ai API key |
|
||||||
| `project_id` | `str \| None` | No | | The Project ID key |
|
| `project_id` | `str \| None` | No | | The watsonx.ai project ID |
|
||||||
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,7 @@
|
||||||
---
|
---
|
||||||
|
description: "Safety
|
||||||
|
|
||||||
|
OpenAI-compatible Moderations API."
|
||||||
sidebar_label: Safety
|
sidebar_label: Safety
|
||||||
title: Safety
|
title: Safety
|
||||||
---
|
---
|
||||||
|
|
@ -7,4 +10,8 @@ title: Safety
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
|
Safety
|
||||||
|
|
||||||
|
OpenAI-compatible Moderations API.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **safety** API.
|
This section contains documentation for all available providers for the **safety** API.
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||||
|
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||||
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||||
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||||
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||||
|
|
|
||||||
|
|
@ -123,12 +123,12 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro together --image-type venv\n",
|
"!uv run --with llama-stack llama stack build --distro together\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
" process = subprocess.Popen(\n",
|
" process = subprocess.Popen(\n",
|
||||||
" \"uv run --with llama-stack llama stack run together --image-type venv\",\n",
|
" \"uv run --with llama-stack llama stack run together\",\n",
|
||||||
" shell=True,\n",
|
" shell=True,\n",
|
||||||
" stdout=log_file,\n",
|
" stdout=log_file,\n",
|
||||||
" stderr=log_file,\n",
|
" stderr=log_file,\n",
|
||||||
|
|
|
||||||
|
|
@ -233,12 +233,12 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro meta-reference-gpu --image-type venv\n",
|
"!uv run --with llama-stack llama stack build --distro meta-reference-gpu\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
" process = subprocess.Popen(\n",
|
" process = subprocess.Popen(\n",
|
||||||
" f\"uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv --env INFERENCE_MODEL={model_id}\",\n",
|
" f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu\",\n",
|
||||||
" shell=True,\n",
|
" shell=True,\n",
|
||||||
" stdout=log_file,\n",
|
" stdout=log_file,\n",
|
||||||
" stderr=log_file,\n",
|
" stderr=log_file,\n",
|
||||||
|
|
|
||||||
|
|
@ -223,12 +223,12 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro llama_api --image-type venv\n",
|
"!uv run --with llama-stack llama stack build --distro llama_api\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
" process = subprocess.Popen(\n",
|
" process = subprocess.Popen(\n",
|
||||||
" \"uv run --with llama-stack llama stack run llama_api --image-type venv\",\n",
|
" \"uv run --with llama-stack llama stack run llama_api\",\n",
|
||||||
" shell=True,\n",
|
" shell=True,\n",
|
||||||
" stdout=log_file,\n",
|
" stdout=log_file,\n",
|
||||||
" stderr=log_file,\n",
|
" stderr=log_file,\n",
|
||||||
|
|
|
||||||
|
|
@ -145,12 +145,12 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
|
"!uv run --with llama-stack llama stack build --distro starter\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
" process = subprocess.Popen(\n",
|
" process = subprocess.Popen(\n",
|
||||||
" f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter --image-type venv\n",
|
" f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\n",
|
||||||
" shell=True,\n",
|
" shell=True,\n",
|
||||||
" stdout=log_file,\n",
|
" stdout=log_file,\n",
|
||||||
" stderr=log_file,\n",
|
" stderr=log_file,\n",
|
||||||
|
|
|
||||||
74
docs/static/deprecated-llama-stack-spec.html
vendored
74
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -1443,8 +1443,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "List all chat completions.",
|
"summary": "List chat completions.",
|
||||||
"description": "List all chat completions.",
|
"description": "List chat completions.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -1520,8 +1520,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"summary": "Create chat completions.",
|
||||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1565,8 +1565,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Describe a chat completion by its ID.",
|
"summary": "Get chat completion.",
|
||||||
"description": "Describe a chat completion by its ID.",
|
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "completion_id",
|
"name": "completion_id",
|
||||||
|
|
@ -1610,8 +1610,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"summary": "Create completion.",
|
||||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1655,8 +1655,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"summary": "Create embeddings.",
|
||||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1700,8 +1700,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns a list of files that belong to the user's organization.",
|
"summary": "List files.",
|
||||||
"description": "Returns a list of files that belong to the user's organization.",
|
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -1770,8 +1770,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Upload a file that can be used across various endpoints.",
|
"summary": "Upload file.",
|
||||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1831,8 +1831,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns information about a specific file.",
|
"summary": "Retrieve file.",
|
||||||
"description": "Returns information about a specific file.",
|
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1874,8 +1874,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Delete a file.",
|
"summary": "Delete file.",
|
||||||
"description": "Delete a file.",
|
"description": "Delete file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1919,8 +1919,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns the contents of the specified file.",
|
"summary": "Retrieve file content.",
|
||||||
"description": "Returns the contents of the specified file.",
|
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1999,8 +1999,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Safety"
|
"Safety"
|
||||||
],
|
],
|
||||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
"summary": "Create moderation.",
|
||||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -2044,8 +2044,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List all OpenAI responses.",
|
"summary": "List all responses.",
|
||||||
"description": "List all OpenAI responses.",
|
"description": "List all responses.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -2119,8 +2119,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Create a new OpenAI response.",
|
"summary": "Create a model response.",
|
||||||
"description": "Create a new OpenAI response.",
|
"description": "Create a model response.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -2184,8 +2184,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Retrieve an OpenAI response by its ID.",
|
"summary": "Get a model response.",
|
||||||
"description": "Retrieve an OpenAI response by its ID.",
|
"description": "Get a model response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -2227,8 +2227,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Delete an OpenAI response by its ID.",
|
"summary": "Delete a response.",
|
||||||
"description": "Delete an OpenAI response by its ID.",
|
"description": "Delete a response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -2272,8 +2272,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List input items for a given OpenAI response.",
|
"summary": "List input items.",
|
||||||
"description": "List input items for a given OpenAI response.",
|
"description": "List input items.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -13366,12 +13366,13 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
"description": ""
|
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||||
|
"x-displayName": "Files"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Inference",
|
"name": "Inference",
|
||||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
"x-displayName": "Inference"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Models",
|
"name": "Models",
|
||||||
|
|
@ -13383,7 +13384,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Safety",
|
"name": "Safety",
|
||||||
"description": ""
|
"description": "OpenAI-compatible Moderations API.",
|
||||||
|
"x-displayName": "Safety"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Telemetry",
|
"name": "Telemetry",
|
||||||
|
|
|
||||||
97
docs/static/deprecated-llama-stack-spec.yaml
vendored
97
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -1033,8 +1033,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: List all chat completions.
|
summary: List chat completions.
|
||||||
description: List all chat completions.
|
description: List chat completions.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -1087,10 +1087,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create chat completions.
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
|
||||||
the specified model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create chat completions.
|
||||||
|
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
Generate an OpenAI-compatible chat completion for the given messages using
|
||||||
the specified model.
|
the specified model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -1122,8 +1122,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: Describe a chat completion by its ID.
|
summary: Get chat completion.
|
||||||
description: Describe a chat completion by its ID.
|
description: >-
|
||||||
|
Get chat completion.
|
||||||
|
|
||||||
|
Describe a chat completion by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: completion_id
|
- name: completion_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1153,10 +1156,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create completion.
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create completion.
|
||||||
|
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -1189,10 +1192,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create embeddings.
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create embeddings.
|
||||||
|
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -1225,9 +1228,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: List files.
|
||||||
Returns a list of files that belong to the user's organization.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
List files.
|
||||||
|
|
||||||
Returns a list of files that belong to the user's organization.
|
Returns a list of files that belong to the user's organization.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
|
|
@ -1285,11 +1289,13 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Upload file.
|
||||||
Upload a file that can be used across various endpoints.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Upload file.
|
||||||
|
|
||||||
Upload a file that can be used across various endpoints.
|
Upload a file that can be used across various endpoints.
|
||||||
|
|
||||||
|
|
||||||
The file upload should be a multipart form request with:
|
The file upload should be a multipart form request with:
|
||||||
|
|
||||||
- file: The File object (not file name) to be uploaded.
|
- file: The File object (not file name) to be uploaded.
|
||||||
|
|
@ -1338,9 +1344,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file.
|
||||||
Returns information about a specific file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file.
|
||||||
|
|
||||||
Returns information about a specific file.
|
Returns information about a specific file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -1372,8 +1379,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: Delete a file.
|
summary: Delete file.
|
||||||
description: Delete a file.
|
description: Delete file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1405,9 +1412,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file content.
|
||||||
Returns the contents of the specified file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file content.
|
||||||
|
|
||||||
Returns the contents of the specified file.
|
Returns the contents of the specified file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -1464,9 +1472,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Safety
|
- Safety
|
||||||
summary: >-
|
summary: Create moderation.
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create moderation.
|
||||||
|
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
Classifies if text and/or image inputs are potentially harmful.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
|
|
@ -1497,8 +1506,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: List all OpenAI responses.
|
summary: List all responses.
|
||||||
description: List all OpenAI responses.
|
description: List all responses.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -1549,8 +1558,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Create a new OpenAI response.
|
summary: Create a model response.
|
||||||
description: Create a new OpenAI response.
|
description: Create a model response.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -1592,8 +1601,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Retrieve an OpenAI response by its ID.
|
summary: Get a model response.
|
||||||
description: Retrieve an OpenAI response by its ID.
|
description: Get a model response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1623,8 +1632,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Delete an OpenAI response by its ID.
|
summary: Delete a response.
|
||||||
description: Delete an OpenAI response by its ID.
|
description: Delete a response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1654,10 +1663,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: >-
|
summary: List input items.
|
||||||
List input items for a given OpenAI response.
|
description: List input items.
|
||||||
description: >-
|
|
||||||
List input items for a given OpenAI response.
|
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -10011,9 +10018,16 @@ tags:
|
||||||
x-displayName: >-
|
x-displayName: >-
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
- name: Files
|
- name: Files
|
||||||
description: ''
|
description: >-
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
APIs.
|
||||||
|
x-displayName: Files
|
||||||
- name: Inference
|
- name: Inference
|
||||||
description: >-
|
description: >-
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and
|
||||||
|
embeddings.
|
||||||
|
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models
|
This API provides the raw interface to the underlying models. Two kinds of models
|
||||||
are supported:
|
are supported:
|
||||||
|
|
||||||
|
|
@ -10021,15 +10035,14 @@ tags:
|
||||||
|
|
||||||
- Embedding models: these models generate embeddings to be used for semantic
|
- Embedding models: these models generate embeddings to be used for semantic
|
||||||
search.
|
search.
|
||||||
x-displayName: >-
|
x-displayName: Inference
|
||||||
Llama Stack Inference API for generating completions, chat completions, and
|
|
||||||
embeddings.
|
|
||||||
- name: Models
|
- name: Models
|
||||||
description: ''
|
description: ''
|
||||||
- name: PostTraining (Coming Soon)
|
- name: PostTraining (Coming Soon)
|
||||||
description: ''
|
description: ''
|
||||||
- name: Safety
|
- name: Safety
|
||||||
description: ''
|
description: OpenAI-compatible Moderations API.
|
||||||
|
x-displayName: Safety
|
||||||
- name: Telemetry
|
- name: Telemetry
|
||||||
description: ''
|
description: ''
|
||||||
- name: VectorIO
|
- name: VectorIO
|
||||||
|
|
|
||||||
145
docs/static/llama-stack-spec.html
vendored
145
docs/static/llama-stack-spec.html
vendored
|
|
@ -69,8 +69,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "List all chat completions.",
|
"summary": "List chat completions.",
|
||||||
"description": "List all chat completions.",
|
"description": "List chat completions.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -146,8 +146,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"summary": "Create chat completions.",
|
||||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -191,8 +191,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Describe a chat completion by its ID.",
|
"summary": "Get chat completion.",
|
||||||
"description": "Describe a chat completion by its ID.",
|
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "completion_id",
|
"name": "completion_id",
|
||||||
|
|
@ -236,8 +236,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"summary": "Create completion.",
|
||||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -758,8 +758,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"summary": "Create embeddings.",
|
||||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -803,8 +803,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns a list of files that belong to the user's organization.",
|
"summary": "List files.",
|
||||||
"description": "Returns a list of files that belong to the user's organization.",
|
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -873,8 +873,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Upload a file that can be used across various endpoints.",
|
"summary": "Upload file.",
|
||||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -934,8 +934,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns information about a specific file.",
|
"summary": "Retrieve file.",
|
||||||
"description": "Returns information about a specific file.",
|
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -977,8 +977,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Delete a file.",
|
"summary": "Delete file.",
|
||||||
"description": "Delete a file.",
|
"description": "Delete file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1022,8 +1022,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns the contents of the specified file.",
|
"summary": "Retrieve file content.",
|
||||||
"description": "Returns the contents of the specified file.",
|
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1067,8 +1067,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "Get the current health status of the service.",
|
"summary": "Get health status.",
|
||||||
"description": "Get the current health status of the service.",
|
"description": "Get health status.\nGet the current health status of the service.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1102,8 +1102,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "List all available API routes with their methods and implementing providers.",
|
"summary": "List routes.",
|
||||||
"description": "List all available API routes with their methods and implementing providers.",
|
"description": "List routes.\nList all available API routes with their methods and implementing providers.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1170,8 +1170,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Register a model.",
|
"summary": "Register model.",
|
||||||
"description": "Register a model.",
|
"description": "Register model.\nRegister a model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1215,8 +1215,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Get a model by its identifier.",
|
"summary": "Get model.",
|
||||||
"description": "Get a model by its identifier.",
|
"description": "Get model.\nGet a model by its identifier.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "model_id",
|
"name": "model_id",
|
||||||
|
|
@ -1251,8 +1251,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Unregister a model.",
|
"summary": "Unregister model.",
|
||||||
"description": "Unregister a model.",
|
"description": "Unregister model.\nUnregister a model.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "model_id",
|
"name": "model_id",
|
||||||
|
|
@ -1296,8 +1296,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Safety"
|
"Safety"
|
||||||
],
|
],
|
||||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
"summary": "Create moderation.",
|
||||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1374,8 +1374,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Create a new prompt.",
|
"summary": "Create prompt.",
|
||||||
"description": "Create a new prompt.",
|
"description": "Create prompt.\nCreate a new prompt.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1419,8 +1419,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Get a prompt by its identifier and optional version.",
|
"summary": "Get prompt.",
|
||||||
"description": "Get a prompt by its identifier and optional version.",
|
"description": "Get prompt.\nGet a prompt by its identifier and optional version.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1471,8 +1471,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Update an existing prompt (increments version).",
|
"summary": "Update prompt.",
|
||||||
"description": "Update an existing prompt (increments version).",
|
"description": "Update prompt.\nUpdate an existing prompt (increments version).",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1517,8 +1517,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Delete a prompt.",
|
"summary": "Delete prompt.",
|
||||||
"description": "Delete a prompt.",
|
"description": "Delete prompt.\nDelete a prompt.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1562,8 +1562,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Set which version of a prompt should be the default in get_prompt (latest).",
|
"summary": "Set prompt version.",
|
||||||
"description": "Set which version of a prompt should be the default in get_prompt (latest).",
|
"description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1617,8 +1617,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "List all versions of a specific prompt.",
|
"summary": "List prompt versions.",
|
||||||
"description": "List all versions of a specific prompt.",
|
"description": "List prompt versions.\nList all versions of a specific prompt.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1662,8 +1662,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Providers"
|
"Providers"
|
||||||
],
|
],
|
||||||
"summary": "List all available providers.",
|
"summary": "List providers.",
|
||||||
"description": "List all available providers.",
|
"description": "List providers.\nList all available providers.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1697,8 +1697,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Providers"
|
"Providers"
|
||||||
],
|
],
|
||||||
"summary": "Get detailed information about a specific provider.",
|
"summary": "Get provider.",
|
||||||
"description": "Get detailed information about a specific provider.",
|
"description": "Get provider.\nGet detailed information about a specific provider.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "provider_id",
|
"name": "provider_id",
|
||||||
|
|
@ -1742,8 +1742,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List all OpenAI responses.",
|
"summary": "List all responses.",
|
||||||
"description": "List all OpenAI responses.",
|
"description": "List all responses.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -1817,8 +1817,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Create a new OpenAI response.",
|
"summary": "Create a model response.",
|
||||||
"description": "Create a new OpenAI response.",
|
"description": "Create a model response.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1882,8 +1882,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Retrieve an OpenAI response by its ID.",
|
"summary": "Get a model response.",
|
||||||
"description": "Retrieve an OpenAI response by its ID.",
|
"description": "Get a model response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -1925,8 +1925,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Delete an OpenAI response by its ID.",
|
"summary": "Delete a response.",
|
||||||
"description": "Delete an OpenAI response by its ID.",
|
"description": "Delete a response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -1970,8 +1970,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List input items for a given OpenAI response.",
|
"summary": "List input items.",
|
||||||
"description": "List input items for a given OpenAI response.",
|
"description": "List input items.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -2063,8 +2063,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Safety"
|
"Safety"
|
||||||
],
|
],
|
||||||
"summary": "Run a shield.",
|
"summary": "Run shield.",
|
||||||
"description": "Run a shield.",
|
"description": "Run shield.\nRun a shield.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -4196,8 +4196,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "Get the version of the service.",
|
"summary": "Get version.",
|
||||||
"description": "Get the version of the service.",
|
"description": "Get version.\nGet the version of the service.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -12914,16 +12914,18 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
"description": ""
|
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||||
|
"x-displayName": "Files"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Inference",
|
"name": "Inference",
|
||||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
"x-displayName": "Inference"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Inspect",
|
"name": "Inspect",
|
||||||
"description": ""
|
"description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
|
||||||
|
"x-displayName": "Inspect"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Models",
|
"name": "Models",
|
||||||
|
|
@ -12931,17 +12933,18 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Prompts",
|
"name": "Prompts",
|
||||||
"description": "",
|
"description": "Protocol for prompt management operations.",
|
||||||
"x-displayName": "Protocol for prompt management operations."
|
"x-displayName": "Prompts"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Providers",
|
"name": "Providers",
|
||||||
"description": "",
|
"description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
|
||||||
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
"x-displayName": "Providers"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Safety",
|
"name": "Safety",
|
||||||
"description": ""
|
"description": "OpenAI-compatible Moderations API.",
|
||||||
|
"x-displayName": "Safety"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Scoring",
|
"name": "Scoring",
|
||||||
|
|
|
||||||
203
docs/static/llama-stack-spec.yaml
vendored
203
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -33,8 +33,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: List all chat completions.
|
summary: List chat completions.
|
||||||
description: List all chat completions.
|
description: List chat completions.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -87,10 +87,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create chat completions.
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
|
||||||
the specified model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create chat completions.
|
||||||
|
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
Generate an OpenAI-compatible chat completion for the given messages using
|
||||||
the specified model.
|
the specified model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -122,8 +122,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: Describe a chat completion by its ID.
|
summary: Get chat completion.
|
||||||
description: Describe a chat completion by its ID.
|
description: >-
|
||||||
|
Get chat completion.
|
||||||
|
|
||||||
|
Describe a chat completion by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: completion_id
|
- name: completion_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -153,10 +156,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create completion.
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create completion.
|
||||||
|
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -603,10 +606,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create embeddings.
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create embeddings.
|
||||||
|
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -639,9 +642,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: List files.
|
||||||
Returns a list of files that belong to the user's organization.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
List files.
|
||||||
|
|
||||||
Returns a list of files that belong to the user's organization.
|
Returns a list of files that belong to the user's organization.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
|
|
@ -699,11 +703,13 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Upload file.
|
||||||
Upload a file that can be used across various endpoints.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Upload file.
|
||||||
|
|
||||||
Upload a file that can be used across various endpoints.
|
Upload a file that can be used across various endpoints.
|
||||||
|
|
||||||
|
|
||||||
The file upload should be a multipart form request with:
|
The file upload should be a multipart form request with:
|
||||||
|
|
||||||
- file: The File object (not file name) to be uploaded.
|
- file: The File object (not file name) to be uploaded.
|
||||||
|
|
@ -752,9 +758,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file.
|
||||||
Returns information about a specific file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file.
|
||||||
|
|
||||||
Returns information about a specific file.
|
Returns information about a specific file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -786,8 +793,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: Delete a file.
|
summary: Delete file.
|
||||||
description: Delete a file.
|
description: Delete file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -819,9 +826,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file content.
|
||||||
Returns the contents of the specified file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file content.
|
||||||
|
|
||||||
Returns the contents of the specified file.
|
Returns the contents of the specified file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -854,9 +862,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: >-
|
summary: Get health status.
|
||||||
Get the current health status of the service.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get health status.
|
||||||
|
|
||||||
Get the current health status of the service.
|
Get the current health status of the service.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
|
|
@ -882,9 +891,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: >-
|
summary: List routes.
|
||||||
List all available API routes with their methods and implementing providers.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
List routes.
|
||||||
|
|
||||||
List all available API routes with their methods and implementing providers.
|
List all available API routes with their methods and implementing providers.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
|
|
@ -933,8 +943,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Register a model.
|
summary: Register model.
|
||||||
description: Register a model.
|
description: >-
|
||||||
|
Register model.
|
||||||
|
|
||||||
|
Register a model.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -964,8 +977,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Get a model by its identifier.
|
summary: Get model.
|
||||||
description: Get a model by its identifier.
|
description: >-
|
||||||
|
Get model.
|
||||||
|
|
||||||
|
Get a model by its identifier.
|
||||||
parameters:
|
parameters:
|
||||||
- name: model_id
|
- name: model_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -990,8 +1006,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Unregister a model.
|
summary: Unregister model.
|
||||||
description: Unregister a model.
|
description: >-
|
||||||
|
Unregister model.
|
||||||
|
|
||||||
|
Unregister a model.
|
||||||
parameters:
|
parameters:
|
||||||
- name: model_id
|
- name: model_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1022,9 +1041,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Safety
|
- Safety
|
||||||
summary: >-
|
summary: Create moderation.
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create moderation.
|
||||||
|
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
Classifies if text and/or image inputs are potentially harmful.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
|
|
@ -1080,8 +1100,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: Create a new prompt.
|
summary: Create prompt.
|
||||||
description: Create a new prompt.
|
description: >-
|
||||||
|
Create prompt.
|
||||||
|
|
||||||
|
Create a new prompt.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -1111,9 +1134,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Get prompt.
|
||||||
Get a prompt by its identifier and optional version.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get prompt.
|
||||||
|
|
||||||
Get a prompt by its identifier and optional version.
|
Get a prompt by its identifier and optional version.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1151,9 +1175,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Update prompt.
|
||||||
Update an existing prompt (increments version).
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Update prompt.
|
||||||
|
|
||||||
Update an existing prompt (increments version).
|
Update an existing prompt (increments version).
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1185,8 +1210,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: Delete a prompt.
|
summary: Delete prompt.
|
||||||
description: Delete a prompt.
|
description: >-
|
||||||
|
Delete prompt.
|
||||||
|
|
||||||
|
Delete a prompt.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1217,9 +1245,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Set prompt version.
|
||||||
Set which version of a prompt should be the default in get_prompt (latest).
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Set prompt version.
|
||||||
|
|
||||||
Set which version of a prompt should be the default in get_prompt (latest).
|
Set which version of a prompt should be the default in get_prompt (latest).
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1257,8 +1286,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: List all versions of a specific prompt.
|
summary: List prompt versions.
|
||||||
description: List all versions of a specific prompt.
|
description: >-
|
||||||
|
List prompt versions.
|
||||||
|
|
||||||
|
List all versions of a specific prompt.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1290,8 +1322,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Providers
|
- Providers
|
||||||
summary: List all available providers.
|
summary: List providers.
|
||||||
description: List all available providers.
|
description: >-
|
||||||
|
List providers.
|
||||||
|
|
||||||
|
List all available providers.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
/v1/providers/{provider_id}:
|
/v1/providers/{provider_id}:
|
||||||
|
|
@ -1316,9 +1351,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Providers
|
- Providers
|
||||||
summary: >-
|
summary: Get provider.
|
||||||
Get detailed information about a specific provider.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get provider.
|
||||||
|
|
||||||
Get detailed information about a specific provider.
|
Get detailed information about a specific provider.
|
||||||
parameters:
|
parameters:
|
||||||
- name: provider_id
|
- name: provider_id
|
||||||
|
|
@ -1349,8 +1385,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: List all OpenAI responses.
|
summary: List all responses.
|
||||||
description: List all OpenAI responses.
|
description: List all responses.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -1401,8 +1437,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Create a new OpenAI response.
|
summary: Create a model response.
|
||||||
description: Create a new OpenAI response.
|
description: Create a model response.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -1444,8 +1480,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Retrieve an OpenAI response by its ID.
|
summary: Get a model response.
|
||||||
description: Retrieve an OpenAI response by its ID.
|
description: Get a model response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1475,8 +1511,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Delete an OpenAI response by its ID.
|
summary: Delete a response.
|
||||||
description: Delete an OpenAI response by its ID.
|
description: Delete a response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1506,10 +1542,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: >-
|
summary: List input items.
|
||||||
List input items for a given OpenAI response.
|
description: List input items.
|
||||||
description: >-
|
|
||||||
List input items for a given OpenAI response.
|
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1578,8 +1612,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Safety
|
- Safety
|
||||||
summary: Run a shield.
|
summary: Run shield.
|
||||||
description: Run a shield.
|
description: >-
|
||||||
|
Run shield.
|
||||||
|
|
||||||
|
Run a shield.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -3135,8 +3172,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: Get the version of the service.
|
summary: Get version.
|
||||||
description: Get the version of the service.
|
description: >-
|
||||||
|
Get version.
|
||||||
|
|
||||||
|
Get the version of the service.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
jsonSchemaDialect: >-
|
jsonSchemaDialect: >-
|
||||||
|
|
@ -9749,9 +9789,16 @@ tags:
|
||||||
x-displayName: >-
|
x-displayName: >-
|
||||||
Protocol for conversation management operations.
|
Protocol for conversation management operations.
|
||||||
- name: Files
|
- name: Files
|
||||||
description: ''
|
description: >-
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
APIs.
|
||||||
|
x-displayName: Files
|
||||||
- name: Inference
|
- name: Inference
|
||||||
description: >-
|
description: >-
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and
|
||||||
|
embeddings.
|
||||||
|
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models
|
This API provides the raw interface to the underlying models. Two kinds of models
|
||||||
are supported:
|
are supported:
|
||||||
|
|
||||||
|
|
@ -9759,23 +9806,25 @@ tags:
|
||||||
|
|
||||||
- Embedding models: these models generate embeddings to be used for semantic
|
- Embedding models: these models generate embeddings to be used for semantic
|
||||||
search.
|
search.
|
||||||
x-displayName: >-
|
x-displayName: Inference
|
||||||
Llama Stack Inference API for generating completions, chat completions, and
|
|
||||||
embeddings.
|
|
||||||
- name: Inspect
|
- name: Inspect
|
||||||
description: ''
|
description: >-
|
||||||
|
APIs for inspecting the Llama Stack service, including health status, available
|
||||||
|
API routes with methods and implementing providers.
|
||||||
|
x-displayName: Inspect
|
||||||
- name: Models
|
- name: Models
|
||||||
description: ''
|
description: ''
|
||||||
- name: Prompts
|
- name: Prompts
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Protocol for prompt management operations.
|
Protocol for prompt management operations.
|
||||||
|
x-displayName: Prompts
|
||||||
- name: Providers
|
- name: Providers
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||||
|
x-displayName: Providers
|
||||||
- name: Safety
|
- name: Safety
|
||||||
description: ''
|
description: OpenAI-compatible Moderations API.
|
||||||
|
x-displayName: Safety
|
||||||
- name: Scoring
|
- name: Scoring
|
||||||
description: ''
|
description: ''
|
||||||
- name: ScoringFunctions
|
- name: ScoringFunctions
|
||||||
|
|
|
||||||
145
docs/static/stainless-llama-stack-spec.html
vendored
145
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -69,8 +69,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "List all chat completions.",
|
"summary": "List chat completions.",
|
||||||
"description": "List all chat completions.",
|
"description": "List chat completions.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -146,8 +146,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"summary": "Create chat completions.",
|
||||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -191,8 +191,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Describe a chat completion by its ID.",
|
"summary": "Get chat completion.",
|
||||||
"description": "Describe a chat completion by its ID.",
|
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "completion_id",
|
"name": "completion_id",
|
||||||
|
|
@ -236,8 +236,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"summary": "Create completion.",
|
||||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -758,8 +758,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inference"
|
"Inference"
|
||||||
],
|
],
|
||||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"summary": "Create embeddings.",
|
||||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -803,8 +803,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns a list of files that belong to the user's organization.",
|
"summary": "List files.",
|
||||||
"description": "Returns a list of files that belong to the user's organization.",
|
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -873,8 +873,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Upload a file that can be used across various endpoints.",
|
"summary": "Upload file.",
|
||||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -934,8 +934,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns information about a specific file.",
|
"summary": "Retrieve file.",
|
||||||
"description": "Returns information about a specific file.",
|
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -977,8 +977,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Delete a file.",
|
"summary": "Delete file.",
|
||||||
"description": "Delete a file.",
|
"description": "Delete file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1022,8 +1022,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Files"
|
"Files"
|
||||||
],
|
],
|
||||||
"summary": "Returns the contents of the specified file.",
|
"summary": "Retrieve file content.",
|
||||||
"description": "Returns the contents of the specified file.",
|
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "file_id",
|
"name": "file_id",
|
||||||
|
|
@ -1067,8 +1067,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "Get the current health status of the service.",
|
"summary": "Get health status.",
|
||||||
"description": "Get the current health status of the service.",
|
"description": "Get health status.\nGet the current health status of the service.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1102,8 +1102,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "List all available API routes with their methods and implementing providers.",
|
"summary": "List routes.",
|
||||||
"description": "List all available API routes with their methods and implementing providers.",
|
"description": "List routes.\nList all available API routes with their methods and implementing providers.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1170,8 +1170,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Register a model.",
|
"summary": "Register model.",
|
||||||
"description": "Register a model.",
|
"description": "Register model.\nRegister a model.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1215,8 +1215,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Get a model by its identifier.",
|
"summary": "Get model.",
|
||||||
"description": "Get a model by its identifier.",
|
"description": "Get model.\nGet a model by its identifier.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "model_id",
|
"name": "model_id",
|
||||||
|
|
@ -1251,8 +1251,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Models"
|
"Models"
|
||||||
],
|
],
|
||||||
"summary": "Unregister a model.",
|
"summary": "Unregister model.",
|
||||||
"description": "Unregister a model.",
|
"description": "Unregister model.\nUnregister a model.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "model_id",
|
"name": "model_id",
|
||||||
|
|
@ -1296,8 +1296,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Safety"
|
"Safety"
|
||||||
],
|
],
|
||||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
"summary": "Create moderation.",
|
||||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1374,8 +1374,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Create a new prompt.",
|
"summary": "Create prompt.",
|
||||||
"description": "Create a new prompt.",
|
"description": "Create prompt.\nCreate a new prompt.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1419,8 +1419,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Get a prompt by its identifier and optional version.",
|
"summary": "Get prompt.",
|
||||||
"description": "Get a prompt by its identifier and optional version.",
|
"description": "Get prompt.\nGet a prompt by its identifier and optional version.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1471,8 +1471,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Update an existing prompt (increments version).",
|
"summary": "Update prompt.",
|
||||||
"description": "Update an existing prompt (increments version).",
|
"description": "Update prompt.\nUpdate an existing prompt (increments version).",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1517,8 +1517,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Delete a prompt.",
|
"summary": "Delete prompt.",
|
||||||
"description": "Delete a prompt.",
|
"description": "Delete prompt.\nDelete a prompt.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1562,8 +1562,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "Set which version of a prompt should be the default in get_prompt (latest).",
|
"summary": "Set prompt version.",
|
||||||
"description": "Set which version of a prompt should be the default in get_prompt (latest).",
|
"description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1617,8 +1617,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Prompts"
|
"Prompts"
|
||||||
],
|
],
|
||||||
"summary": "List all versions of a specific prompt.",
|
"summary": "List prompt versions.",
|
||||||
"description": "List all versions of a specific prompt.",
|
"description": "List prompt versions.\nList all versions of a specific prompt.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "prompt_id",
|
"name": "prompt_id",
|
||||||
|
|
@ -1662,8 +1662,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Providers"
|
"Providers"
|
||||||
],
|
],
|
||||||
"summary": "List all available providers.",
|
"summary": "List providers.",
|
||||||
"description": "List all available providers.",
|
"description": "List providers.\nList all available providers.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -1697,8 +1697,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Providers"
|
"Providers"
|
||||||
],
|
],
|
||||||
"summary": "Get detailed information about a specific provider.",
|
"summary": "Get provider.",
|
||||||
"description": "Get detailed information about a specific provider.",
|
"description": "Get provider.\nGet detailed information about a specific provider.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "provider_id",
|
"name": "provider_id",
|
||||||
|
|
@ -1742,8 +1742,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List all OpenAI responses.",
|
"summary": "List all responses.",
|
||||||
"description": "List all OpenAI responses.",
|
"description": "List all responses.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "after",
|
"name": "after",
|
||||||
|
|
@ -1817,8 +1817,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Create a new OpenAI response.",
|
"summary": "Create a model response.",
|
||||||
"description": "Create a new OpenAI response.",
|
"description": "Create a model response.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -1882,8 +1882,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Retrieve an OpenAI response by its ID.",
|
"summary": "Get a model response.",
|
||||||
"description": "Retrieve an OpenAI response by its ID.",
|
"description": "Get a model response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -1925,8 +1925,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "Delete an OpenAI response by its ID.",
|
"summary": "Delete a response.",
|
||||||
"description": "Delete an OpenAI response by its ID.",
|
"description": "Delete a response.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -1970,8 +1970,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"summary": "List input items for a given OpenAI response.",
|
"summary": "List input items.",
|
||||||
"description": "List input items for a given OpenAI response.",
|
"description": "List input items.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "response_id",
|
"name": "response_id",
|
||||||
|
|
@ -2063,8 +2063,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Safety"
|
"Safety"
|
||||||
],
|
],
|
||||||
"summary": "Run a shield.",
|
"summary": "Run shield.",
|
||||||
"description": "Run a shield.",
|
"description": "Run shield.\nRun a shield.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -4196,8 +4196,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Inspect"
|
"Inspect"
|
||||||
],
|
],
|
||||||
"summary": "Get the version of the service.",
|
"summary": "Get version.",
|
||||||
"description": "Get the version of the service.",
|
"description": "Get version.\nGet the version of the service.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"deprecated": false
|
"deprecated": false
|
||||||
}
|
}
|
||||||
|
|
@ -18487,16 +18487,18 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
"description": ""
|
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||||
|
"x-displayName": "Files"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Inference",
|
"name": "Inference",
|
||||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
"x-displayName": "Inference"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Inspect",
|
"name": "Inspect",
|
||||||
"description": ""
|
"description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
|
||||||
|
"x-displayName": "Inspect"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Models",
|
"name": "Models",
|
||||||
|
|
@ -18508,17 +18510,18 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Prompts",
|
"name": "Prompts",
|
||||||
"description": "",
|
"description": "Protocol for prompt management operations.",
|
||||||
"x-displayName": "Protocol for prompt management operations."
|
"x-displayName": "Prompts"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Providers",
|
"name": "Providers",
|
||||||
"description": "",
|
"description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
|
||||||
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
"x-displayName": "Providers"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Safety",
|
"name": "Safety",
|
||||||
"description": ""
|
"description": "OpenAI-compatible Moderations API.",
|
||||||
|
"x-displayName": "Safety"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Scoring",
|
"name": "Scoring",
|
||||||
|
|
|
||||||
203
docs/static/stainless-llama-stack-spec.yaml
vendored
203
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -36,8 +36,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: List all chat completions.
|
summary: List chat completions.
|
||||||
description: List all chat completions.
|
description: List chat completions.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -90,10 +90,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create chat completions.
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
|
||||||
the specified model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create chat completions.
|
||||||
|
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
Generate an OpenAI-compatible chat completion for the given messages using
|
||||||
the specified model.
|
the specified model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -125,8 +125,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: Describe a chat completion by its ID.
|
summary: Get chat completion.
|
||||||
description: Describe a chat completion by its ID.
|
description: >-
|
||||||
|
Get chat completion.
|
||||||
|
|
||||||
|
Describe a chat completion by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: completion_id
|
- name: completion_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -156,10 +159,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create completion.
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create completion.
|
||||||
|
|
||||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -606,10 +609,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inference
|
- Inference
|
||||||
summary: >-
|
summary: Create embeddings.
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
|
||||||
model.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create embeddings.
|
||||||
|
|
||||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||||
model.
|
model.
|
||||||
parameters: []
|
parameters: []
|
||||||
|
|
@ -642,9 +645,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: List files.
|
||||||
Returns a list of files that belong to the user's organization.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
List files.
|
||||||
|
|
||||||
Returns a list of files that belong to the user's organization.
|
Returns a list of files that belong to the user's organization.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
|
|
@ -702,11 +706,13 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Upload file.
|
||||||
Upload a file that can be used across various endpoints.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Upload file.
|
||||||
|
|
||||||
Upload a file that can be used across various endpoints.
|
Upload a file that can be used across various endpoints.
|
||||||
|
|
||||||
|
|
||||||
The file upload should be a multipart form request with:
|
The file upload should be a multipart form request with:
|
||||||
|
|
||||||
- file: The File object (not file name) to be uploaded.
|
- file: The File object (not file name) to be uploaded.
|
||||||
|
|
@ -755,9 +761,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file.
|
||||||
Returns information about a specific file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file.
|
||||||
|
|
||||||
Returns information about a specific file.
|
Returns information about a specific file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -789,8 +796,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: Delete a file.
|
summary: Delete file.
|
||||||
description: Delete a file.
|
description: Delete file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -822,9 +829,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Files
|
- Files
|
||||||
summary: >-
|
summary: Retrieve file content.
|
||||||
Returns the contents of the specified file.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Retrieve file content.
|
||||||
|
|
||||||
Returns the contents of the specified file.
|
Returns the contents of the specified file.
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
@ -857,9 +865,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: >-
|
summary: Get health status.
|
||||||
Get the current health status of the service.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get health status.
|
||||||
|
|
||||||
Get the current health status of the service.
|
Get the current health status of the service.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
|
|
@ -885,9 +894,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: >-
|
summary: List routes.
|
||||||
List all available API routes with their methods and implementing providers.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
List routes.
|
||||||
|
|
||||||
List all available API routes with their methods and implementing providers.
|
List all available API routes with their methods and implementing providers.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
|
|
@ -936,8 +946,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Register a model.
|
summary: Register model.
|
||||||
description: Register a model.
|
description: >-
|
||||||
|
Register model.
|
||||||
|
|
||||||
|
Register a model.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -967,8 +980,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Get a model by its identifier.
|
summary: Get model.
|
||||||
description: Get a model by its identifier.
|
description: >-
|
||||||
|
Get model.
|
||||||
|
|
||||||
|
Get a model by its identifier.
|
||||||
parameters:
|
parameters:
|
||||||
- name: model_id
|
- name: model_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -993,8 +1009,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
summary: Unregister a model.
|
summary: Unregister model.
|
||||||
description: Unregister a model.
|
description: >-
|
||||||
|
Unregister model.
|
||||||
|
|
||||||
|
Unregister a model.
|
||||||
parameters:
|
parameters:
|
||||||
- name: model_id
|
- name: model_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1025,9 +1044,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Safety
|
- Safety
|
||||||
summary: >-
|
summary: Create moderation.
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Create moderation.
|
||||||
|
|
||||||
Classifies if text and/or image inputs are potentially harmful.
|
Classifies if text and/or image inputs are potentially harmful.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
|
|
@ -1083,8 +1103,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: Create a new prompt.
|
summary: Create prompt.
|
||||||
description: Create a new prompt.
|
description: >-
|
||||||
|
Create prompt.
|
||||||
|
|
||||||
|
Create a new prompt.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -1114,9 +1137,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Get prompt.
|
||||||
Get a prompt by its identifier and optional version.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get prompt.
|
||||||
|
|
||||||
Get a prompt by its identifier and optional version.
|
Get a prompt by its identifier and optional version.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1154,9 +1178,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Update prompt.
|
||||||
Update an existing prompt (increments version).
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Update prompt.
|
||||||
|
|
||||||
Update an existing prompt (increments version).
|
Update an existing prompt (increments version).
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1188,8 +1213,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: Delete a prompt.
|
summary: Delete prompt.
|
||||||
description: Delete a prompt.
|
description: >-
|
||||||
|
Delete prompt.
|
||||||
|
|
||||||
|
Delete a prompt.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1220,9 +1248,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: >-
|
summary: Set prompt version.
|
||||||
Set which version of a prompt should be the default in get_prompt (latest).
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Set prompt version.
|
||||||
|
|
||||||
Set which version of a prompt should be the default in get_prompt (latest).
|
Set which version of a prompt should be the default in get_prompt (latest).
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
|
|
@ -1260,8 +1289,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Prompts
|
- Prompts
|
||||||
summary: List all versions of a specific prompt.
|
summary: List prompt versions.
|
||||||
description: List all versions of a specific prompt.
|
description: >-
|
||||||
|
List prompt versions.
|
||||||
|
|
||||||
|
List all versions of a specific prompt.
|
||||||
parameters:
|
parameters:
|
||||||
- name: prompt_id
|
- name: prompt_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1293,8 +1325,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Providers
|
- Providers
|
||||||
summary: List all available providers.
|
summary: List providers.
|
||||||
description: List all available providers.
|
description: >-
|
||||||
|
List providers.
|
||||||
|
|
||||||
|
List all available providers.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
/v1/providers/{provider_id}:
|
/v1/providers/{provider_id}:
|
||||||
|
|
@ -1319,9 +1354,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Providers
|
- Providers
|
||||||
summary: >-
|
summary: Get provider.
|
||||||
Get detailed information about a specific provider.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Get provider.
|
||||||
|
|
||||||
Get detailed information about a specific provider.
|
Get detailed information about a specific provider.
|
||||||
parameters:
|
parameters:
|
||||||
- name: provider_id
|
- name: provider_id
|
||||||
|
|
@ -1352,8 +1388,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: List all OpenAI responses.
|
summary: List all responses.
|
||||||
description: List all OpenAI responses.
|
description: List all responses.
|
||||||
parameters:
|
parameters:
|
||||||
- name: after
|
- name: after
|
||||||
in: query
|
in: query
|
||||||
|
|
@ -1404,8 +1440,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Create a new OpenAI response.
|
summary: Create a model response.
|
||||||
description: Create a new OpenAI response.
|
description: Create a model response.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -1447,8 +1483,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Retrieve an OpenAI response by its ID.
|
summary: Get a model response.
|
||||||
description: Retrieve an OpenAI response by its ID.
|
description: Get a model response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1478,8 +1514,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: Delete an OpenAI response by its ID.
|
summary: Delete a response.
|
||||||
description: Delete an OpenAI response by its ID.
|
description: Delete a response.
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1509,10 +1545,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
summary: >-
|
summary: List input items.
|
||||||
List input items for a given OpenAI response.
|
description: List input items.
|
||||||
description: >-
|
|
||||||
List input items for a given OpenAI response.
|
|
||||||
parameters:
|
parameters:
|
||||||
- name: response_id
|
- name: response_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -1581,8 +1615,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Safety
|
- Safety
|
||||||
summary: Run a shield.
|
summary: Run shield.
|
||||||
description: Run a shield.
|
description: >-
|
||||||
|
Run shield.
|
||||||
|
|
||||||
|
Run a shield.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -3138,8 +3175,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Inspect
|
- Inspect
|
||||||
summary: Get the version of the service.
|
summary: Get version.
|
||||||
description: Get the version of the service.
|
description: >-
|
||||||
|
Get version.
|
||||||
|
|
||||||
|
Get the version of the service.
|
||||||
parameters: []
|
parameters: []
|
||||||
deprecated: false
|
deprecated: false
|
||||||
/v1beta/datasetio/append-rows/{dataset_id}:
|
/v1beta/datasetio/append-rows/{dataset_id}:
|
||||||
|
|
@ -13795,9 +13835,16 @@ tags:
|
||||||
x-displayName: >-
|
x-displayName: >-
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
- name: Files
|
- name: Files
|
||||||
description: ''
|
description: >-
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
APIs.
|
||||||
|
x-displayName: Files
|
||||||
- name: Inference
|
- name: Inference
|
||||||
description: >-
|
description: >-
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and
|
||||||
|
embeddings.
|
||||||
|
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models
|
This API provides the raw interface to the underlying models. Two kinds of models
|
||||||
are supported:
|
are supported:
|
||||||
|
|
||||||
|
|
@ -13805,25 +13852,27 @@ tags:
|
||||||
|
|
||||||
- Embedding models: these models generate embeddings to be used for semantic
|
- Embedding models: these models generate embeddings to be used for semantic
|
||||||
search.
|
search.
|
||||||
x-displayName: >-
|
x-displayName: Inference
|
||||||
Llama Stack Inference API for generating completions, chat completions, and
|
|
||||||
embeddings.
|
|
||||||
- name: Inspect
|
- name: Inspect
|
||||||
description: ''
|
description: >-
|
||||||
|
APIs for inspecting the Llama Stack service, including health status, available
|
||||||
|
API routes with methods and implementing providers.
|
||||||
|
x-displayName: Inspect
|
||||||
- name: Models
|
- name: Models
|
||||||
description: ''
|
description: ''
|
||||||
- name: PostTraining (Coming Soon)
|
- name: PostTraining (Coming Soon)
|
||||||
description: ''
|
description: ''
|
||||||
- name: Prompts
|
- name: Prompts
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Protocol for prompt management operations.
|
Protocol for prompt management operations.
|
||||||
|
x-displayName: Prompts
|
||||||
- name: Providers
|
- name: Providers
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||||
|
x-displayName: Providers
|
||||||
- name: Safety
|
- name: Safety
|
||||||
description: ''
|
description: OpenAI-compatible Moderations API.
|
||||||
|
x-displayName: Safety
|
||||||
- name: Scoring
|
- name: Scoring
|
||||||
description: ''
|
description: ''
|
||||||
- name: ScoringFunctions
|
- name: ScoringFunctions
|
||||||
|
|
|
||||||
|
|
@ -88,7 +88,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
...
|
...
|
||||||
Build Successful!
|
Build Successful!
|
||||||
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
||||||
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter --image-type venv
|
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Set the ENV variables by exporting them to the terminal**:
|
3. **Set the ENV variables by exporting them to the terminal**:
|
||||||
|
|
@ -102,12 +102,11 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
3. **Run the Llama Stack**:
|
3. **Run the Llama Stack**:
|
||||||
Run the stack using uv:
|
Run the stack using uv:
|
||||||
```bash
|
```bash
|
||||||
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
|
SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
|
OLLAMA_URL=$OLLAMA_URL \
|
||||||
uv run --with llama-stack llama stack run starter \
|
uv run --with llama-stack llama stack run starter \
|
||||||
--image-type venv \
|
--port $LLAMA_STACK_PORT
|
||||||
--port $LLAMA_STACK_PORT \
|
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
|
||||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
|
||||||
--env OLLAMA_URL=$OLLAMA_URL
|
|
||||||
```
|
```
|
||||||
Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -797,7 +797,7 @@ class Agents(Protocol):
|
||||||
self,
|
self,
|
||||||
response_id: str,
|
response_id: str,
|
||||||
) -> OpenAIResponseObject:
|
) -> OpenAIResponseObject:
|
||||||
"""Retrieve an OpenAI response by its ID.
|
"""Get a model response.
|
||||||
|
|
||||||
:param response_id: The ID of the OpenAI response to retrieve.
|
:param response_id: The ID of the OpenAI response to retrieve.
|
||||||
:returns: An OpenAIResponseObject.
|
:returns: An OpenAIResponseObject.
|
||||||
|
|
@ -826,7 +826,7 @@ class Agents(Protocol):
|
||||||
),
|
),
|
||||||
] = None,
|
] = None,
|
||||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||||
"""Create a new OpenAI response.
|
"""Create a model response.
|
||||||
|
|
||||||
:param input: Input message(s) to create the response.
|
:param input: Input message(s) to create the response.
|
||||||
:param model: The underlying LLM used for completions.
|
:param model: The underlying LLM used for completions.
|
||||||
|
|
@ -846,7 +846,7 @@ class Agents(Protocol):
|
||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
order: Order | None = Order.desc,
|
order: Order | None = Order.desc,
|
||||||
) -> ListOpenAIResponseObject:
|
) -> ListOpenAIResponseObject:
|
||||||
"""List all OpenAI responses.
|
"""List all responses.
|
||||||
|
|
||||||
:param after: The ID of the last response to return.
|
:param after: The ID of the last response to return.
|
||||||
:param limit: The number of responses to return.
|
:param limit: The number of responses to return.
|
||||||
|
|
@ -869,7 +869,7 @@ class Agents(Protocol):
|
||||||
limit: int | None = 20,
|
limit: int | None = 20,
|
||||||
order: Order | None = Order.desc,
|
order: Order | None = Order.desc,
|
||||||
) -> ListOpenAIResponseInputItem:
|
) -> ListOpenAIResponseInputItem:
|
||||||
"""List input items for a given OpenAI response.
|
"""List input items.
|
||||||
|
|
||||||
:param response_id: The ID of the response to retrieve input items for.
|
:param response_id: The ID of the response to retrieve input items for.
|
||||||
:param after: An item ID to list items after, used for pagination.
|
:param after: An item ID to list items after, used for pagination.
|
||||||
|
|
@ -884,7 +884,7 @@ class Agents(Protocol):
|
||||||
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||||
"""Delete an OpenAI response by its ID.
|
"""Delete a response.
|
||||||
|
|
||||||
:param response_id: The ID of the OpenAI response to delete.
|
:param response_id: The ID of the OpenAI response to delete.
|
||||||
:returns: An OpenAIDeleteResponseObject
|
:returns: An OpenAIDeleteResponseObject
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,11 @@ class OpenAIFileDeleteResponse(BaseModel):
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@trace_protocol
|
@trace_protocol
|
||||||
class Files(Protocol):
|
class Files(Protocol):
|
||||||
|
"""Files
|
||||||
|
|
||||||
|
This API is used to upload documents that can be used with other Llama Stack APIs.
|
||||||
|
"""
|
||||||
|
|
||||||
# OpenAI Files API Endpoints
|
# OpenAI Files API Endpoints
|
||||||
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
|
|
@ -113,7 +118,8 @@ class Files(Protocol):
|
||||||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||||
expires_after: Annotated[ExpiresAfter | None, Form()] = None,
|
expires_after: Annotated[ExpiresAfter | None, Form()] = None,
|
||||||
) -> OpenAIFileObject:
|
) -> OpenAIFileObject:
|
||||||
"""
|
"""Upload file.
|
||||||
|
|
||||||
Upload a file that can be used across various endpoints.
|
Upload a file that can be used across various endpoints.
|
||||||
|
|
||||||
The file upload should be a multipart form request with:
|
The file upload should be a multipart form request with:
|
||||||
|
|
@ -137,7 +143,8 @@ class Files(Protocol):
|
||||||
order: Order | None = Order.desc,
|
order: Order | None = Order.desc,
|
||||||
purpose: OpenAIFilePurpose | None = None,
|
purpose: OpenAIFilePurpose | None = None,
|
||||||
) -> ListOpenAIFileResponse:
|
) -> ListOpenAIFileResponse:
|
||||||
"""
|
"""List files.
|
||||||
|
|
||||||
Returns a list of files that belong to the user's organization.
|
Returns a list of files that belong to the user's organization.
|
||||||
|
|
||||||
:param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
|
:param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
|
||||||
|
|
@ -154,7 +161,8 @@ class Files(Protocol):
|
||||||
self,
|
self,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
) -> OpenAIFileObject:
|
) -> OpenAIFileObject:
|
||||||
"""
|
"""Retrieve file.
|
||||||
|
|
||||||
Returns information about a specific file.
|
Returns information about a specific file.
|
||||||
|
|
||||||
:param file_id: The ID of the file to use for this request.
|
:param file_id: The ID of the file to use for this request.
|
||||||
|
|
@ -168,8 +176,7 @@ class Files(Protocol):
|
||||||
self,
|
self,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
) -> OpenAIFileDeleteResponse:
|
) -> OpenAIFileDeleteResponse:
|
||||||
"""
|
"""Delete file.
|
||||||
Delete a file.
|
|
||||||
|
|
||||||
:param file_id: The ID of the file to use for this request.
|
:param file_id: The ID of the file to use for this request.
|
||||||
:returns: An OpenAIFileDeleteResponse indicating successful deletion.
|
:returns: An OpenAIFileDeleteResponse indicating successful deletion.
|
||||||
|
|
@ -182,7 +189,8 @@ class Files(Protocol):
|
||||||
self,
|
self,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
) -> Response:
|
) -> Response:
|
||||||
"""
|
"""Retrieve file content.
|
||||||
|
|
||||||
Returns the contents of the specified file.
|
Returns the contents of the specified file.
|
||||||
|
|
||||||
:param file_id: The ID of the file to use for this request.
|
:param file_id: The ID of the file to use for this request.
|
||||||
|
|
|
||||||
|
|
@ -1053,7 +1053,9 @@ class InferenceProvider(Protocol):
|
||||||
# for fill-in-the-middle type completion
|
# for fill-in-the-middle type completion
|
||||||
suffix: str | None = None,
|
suffix: str | None = None,
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
"""Generate an OpenAI-compatible completion for the given prompt using the specified model.
|
"""Create completion.
|
||||||
|
|
||||||
|
Generate an OpenAI-compatible completion for the given prompt using the specified model.
|
||||||
|
|
||||||
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||||
:param prompt: The prompt to generate a completion for.
|
:param prompt: The prompt to generate a completion for.
|
||||||
|
|
@ -1105,7 +1107,9 @@ class InferenceProvider(Protocol):
|
||||||
top_p: float | None = None,
|
top_p: float | None = None,
|
||||||
user: str | None = None,
|
user: str | None = None,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
"""Generate an OpenAI-compatible chat completion for the given messages using the specified model.
|
"""Create chat completions.
|
||||||
|
|
||||||
|
Generate an OpenAI-compatible chat completion for the given messages using the specified model.
|
||||||
|
|
||||||
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||||
:param messages: List of messages in the conversation.
|
:param messages: List of messages in the conversation.
|
||||||
|
|
@ -1144,7 +1148,9 @@ class InferenceProvider(Protocol):
|
||||||
dimensions: int | None = None,
|
dimensions: int | None = None,
|
||||||
user: str | None = None,
|
user: str | None = None,
|
||||||
) -> OpenAIEmbeddingsResponse:
|
) -> OpenAIEmbeddingsResponse:
|
||||||
"""Generate OpenAI-compatible embeddings for the given input using the specified model.
|
"""Create embeddings.
|
||||||
|
|
||||||
|
Generate OpenAI-compatible embeddings for the given input using the specified model.
|
||||||
|
|
||||||
:param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
|
:param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
|
||||||
:param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
|
:param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
|
||||||
|
|
@ -1157,7 +1163,9 @@ class InferenceProvider(Protocol):
|
||||||
|
|
||||||
|
|
||||||
class Inference(InferenceProvider):
|
class Inference(InferenceProvider):
|
||||||
"""Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
"""Inference
|
||||||
|
|
||||||
|
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||||
|
|
@ -1173,7 +1181,7 @@ class Inference(InferenceProvider):
|
||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
order: Order | None = Order.desc,
|
order: Order | None = Order.desc,
|
||||||
) -> ListOpenAIChatCompletionResponse:
|
) -> ListOpenAIChatCompletionResponse:
|
||||||
"""List all chat completions.
|
"""List chat completions.
|
||||||
|
|
||||||
:param after: The ID of the last chat completion to return.
|
:param after: The ID of the last chat completion to return.
|
||||||
:param limit: The maximum number of chat completions to return.
|
:param limit: The maximum number of chat completions to return.
|
||||||
|
|
@ -1188,7 +1196,9 @@ class Inference(InferenceProvider):
|
||||||
)
|
)
|
||||||
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
||||||
"""Describe a chat completion by its ID.
|
"""Get chat completion.
|
||||||
|
|
||||||
|
Describe a chat completion by its ID.
|
||||||
|
|
||||||
:param completion_id: ID of the chat completion.
|
:param completion_id: ID of the chat completion.
|
||||||
:returns: A OpenAICompletionWithInputMessages.
|
:returns: A OpenAICompletionWithInputMessages.
|
||||||
|
|
|
||||||
|
|
@ -58,9 +58,16 @@ class ListRoutesResponse(BaseModel):
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class Inspect(Protocol):
|
class Inspect(Protocol):
|
||||||
|
"""Inspect
|
||||||
|
|
||||||
|
APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
|
||||||
|
"""
|
||||||
|
|
||||||
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_routes(self) -> ListRoutesResponse:
|
async def list_routes(self) -> ListRoutesResponse:
|
||||||
"""List all available API routes with their methods and implementing providers.
|
"""List routes.
|
||||||
|
|
||||||
|
List all available API routes with their methods and implementing providers.
|
||||||
|
|
||||||
:returns: Response containing information about all available routes.
|
:returns: Response containing information about all available routes.
|
||||||
"""
|
"""
|
||||||
|
|
@ -68,7 +75,9 @@ class Inspect(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def health(self) -> HealthInfo:
|
async def health(self) -> HealthInfo:
|
||||||
"""Get the current health status of the service.
|
"""Get health status.
|
||||||
|
|
||||||
|
Get the current health status of the service.
|
||||||
|
|
||||||
:returns: Health information indicating if the service is operational.
|
:returns: Health information indicating if the service is operational.
|
||||||
"""
|
"""
|
||||||
|
|
@ -76,7 +85,9 @@ class Inspect(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def version(self) -> VersionInfo:
|
async def version(self) -> VersionInfo:
|
||||||
"""Get the version of the service.
|
"""Get version.
|
||||||
|
|
||||||
|
Get the version of the service.
|
||||||
|
|
||||||
:returns: Version information containing the service version number.
|
:returns: Version information containing the service version number.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,9 @@ class Models(Protocol):
|
||||||
self,
|
self,
|
||||||
model_id: str,
|
model_id: str,
|
||||||
) -> Model:
|
) -> Model:
|
||||||
"""Get a model by its identifier.
|
"""Get model.
|
||||||
|
|
||||||
|
Get a model by its identifier.
|
||||||
|
|
||||||
:param model_id: The identifier of the model to get.
|
:param model_id: The identifier of the model to get.
|
||||||
:returns: A Model.
|
:returns: A Model.
|
||||||
|
|
@ -140,7 +142,9 @@ class Models(Protocol):
|
||||||
metadata: dict[str, Any] | None = None,
|
metadata: dict[str, Any] | None = None,
|
||||||
model_type: ModelType | None = None,
|
model_type: ModelType | None = None,
|
||||||
) -> Model:
|
) -> Model:
|
||||||
"""Register a model.
|
"""Register model.
|
||||||
|
|
||||||
|
Register a model.
|
||||||
|
|
||||||
:param model_id: The identifier of the model to register.
|
:param model_id: The identifier of the model to register.
|
||||||
:param provider_model_id: The identifier of the model in the provider.
|
:param provider_model_id: The identifier of the model in the provider.
|
||||||
|
|
@ -156,7 +160,9 @@ class Models(Protocol):
|
||||||
self,
|
self,
|
||||||
model_id: str,
|
model_id: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Unregister a model.
|
"""Unregister model.
|
||||||
|
|
||||||
|
Unregister a model.
|
||||||
|
|
||||||
:param model_id: The identifier of the model to unregister.
|
:param model_id: The identifier of the model to unregister.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,9 @@ class ListPromptsResponse(BaseModel):
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@trace_protocol
|
@trace_protocol
|
||||||
class Prompts(Protocol):
|
class Prompts(Protocol):
|
||||||
"""Protocol for prompt management operations."""
|
"""Prompts
|
||||||
|
|
||||||
|
Protocol for prompt management operations."""
|
||||||
|
|
||||||
@webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_prompts(self) -> ListPromptsResponse:
|
async def list_prompts(self) -> ListPromptsResponse:
|
||||||
|
|
@ -109,7 +111,9 @@ class Prompts(Protocol):
|
||||||
self,
|
self,
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
) -> ListPromptsResponse:
|
) -> ListPromptsResponse:
|
||||||
"""List all versions of a specific prompt.
|
"""List prompt versions.
|
||||||
|
|
||||||
|
List all versions of a specific prompt.
|
||||||
|
|
||||||
:param prompt_id: The identifier of the prompt to list versions for.
|
:param prompt_id: The identifier of the prompt to list versions for.
|
||||||
:returns: A ListPromptsResponse containing all versions of the prompt.
|
:returns: A ListPromptsResponse containing all versions of the prompt.
|
||||||
|
|
@ -122,7 +126,9 @@ class Prompts(Protocol):
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
version: int | None = None,
|
version: int | None = None,
|
||||||
) -> Prompt:
|
) -> Prompt:
|
||||||
"""Get a prompt by its identifier and optional version.
|
"""Get prompt.
|
||||||
|
|
||||||
|
Get a prompt by its identifier and optional version.
|
||||||
|
|
||||||
:param prompt_id: The identifier of the prompt to get.
|
:param prompt_id: The identifier of the prompt to get.
|
||||||
:param version: The version of the prompt to get (defaults to latest).
|
:param version: The version of the prompt to get (defaults to latest).
|
||||||
|
|
@ -136,7 +142,9 @@ class Prompts(Protocol):
|
||||||
prompt: str,
|
prompt: str,
|
||||||
variables: list[str] | None = None,
|
variables: list[str] | None = None,
|
||||||
) -> Prompt:
|
) -> Prompt:
|
||||||
"""Create a new prompt.
|
"""Create prompt.
|
||||||
|
|
||||||
|
Create a new prompt.
|
||||||
|
|
||||||
:param prompt: The prompt text content with variable placeholders.
|
:param prompt: The prompt text content with variable placeholders.
|
||||||
:param variables: List of variable names that can be used in the prompt template.
|
:param variables: List of variable names that can be used in the prompt template.
|
||||||
|
|
@ -153,7 +161,9 @@ class Prompts(Protocol):
|
||||||
variables: list[str] | None = None,
|
variables: list[str] | None = None,
|
||||||
set_as_default: bool = True,
|
set_as_default: bool = True,
|
||||||
) -> Prompt:
|
) -> Prompt:
|
||||||
"""Update an existing prompt (increments version).
|
"""Update prompt.
|
||||||
|
|
||||||
|
Update an existing prompt (increments version).
|
||||||
|
|
||||||
:param prompt_id: The identifier of the prompt to update.
|
:param prompt_id: The identifier of the prompt to update.
|
||||||
:param prompt: The updated prompt text content.
|
:param prompt: The updated prompt text content.
|
||||||
|
|
@ -169,7 +179,9 @@ class Prompts(Protocol):
|
||||||
self,
|
self,
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Delete a prompt.
|
"""Delete prompt.
|
||||||
|
|
||||||
|
Delete a prompt.
|
||||||
|
|
||||||
:param prompt_id: The identifier of the prompt to delete.
|
:param prompt_id: The identifier of the prompt to delete.
|
||||||
"""
|
"""
|
||||||
|
|
@ -181,7 +193,9 @@ class Prompts(Protocol):
|
||||||
prompt_id: str,
|
prompt_id: str,
|
||||||
version: int,
|
version: int,
|
||||||
) -> Prompt:
|
) -> Prompt:
|
||||||
"""Set which version of a prompt should be the default in get_prompt (latest).
|
"""Set prompt version.
|
||||||
|
|
||||||
|
Set which version of a prompt should be the default in get_prompt (latest).
|
||||||
|
|
||||||
:param prompt_id: The identifier of the prompt.
|
:param prompt_id: The identifier of the prompt.
|
||||||
:param version: The version to set as default.
|
:param version: The version to set as default.
|
||||||
|
|
|
||||||
|
|
@ -42,13 +42,16 @@ class ListProvidersResponse(BaseModel):
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class Providers(Protocol):
|
class Providers(Protocol):
|
||||||
"""
|
"""Providers
|
||||||
|
|
||||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def list_providers(self) -> ListProvidersResponse:
|
async def list_providers(self) -> ListProvidersResponse:
|
||||||
"""List all available providers.
|
"""List providers.
|
||||||
|
|
||||||
|
List all available providers.
|
||||||
|
|
||||||
:returns: A ListProvidersResponse containing information about all providers.
|
:returns: A ListProvidersResponse containing information about all providers.
|
||||||
"""
|
"""
|
||||||
|
|
@ -56,7 +59,9 @@ class Providers(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def inspect_provider(self, provider_id: str) -> ProviderInfo:
|
async def inspect_provider(self, provider_id: str) -> ProviderInfo:
|
||||||
"""Get detailed information about a specific provider.
|
"""Get provider.
|
||||||
|
|
||||||
|
Get detailed information about a specific provider.
|
||||||
|
|
||||||
:param provider_id: The ID of the provider to inspect.
|
:param provider_id: The ID of the provider to inspect.
|
||||||
:returns: A ProviderInfo object containing the provider's details.
|
:returns: A ProviderInfo object containing the provider's details.
|
||||||
|
|
|
||||||
|
|
@ -96,6 +96,11 @@ class ShieldStore(Protocol):
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@trace_protocol
|
@trace_protocol
|
||||||
class Safety(Protocol):
|
class Safety(Protocol):
|
||||||
|
"""Safety
|
||||||
|
|
||||||
|
OpenAI-compatible Moderations API.
|
||||||
|
"""
|
||||||
|
|
||||||
shield_store: ShieldStore
|
shield_store: ShieldStore
|
||||||
|
|
||||||
@webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
|
|
@ -105,7 +110,9 @@ class Safety(Protocol):
|
||||||
messages: list[Message],
|
messages: list[Message],
|
||||||
params: dict[str, Any],
|
params: dict[str, Any],
|
||||||
) -> RunShieldResponse:
|
) -> RunShieldResponse:
|
||||||
"""Run a shield.
|
"""Run shield.
|
||||||
|
|
||||||
|
Run a shield.
|
||||||
|
|
||||||
:param shield_id: The identifier of the shield to run.
|
:param shield_id: The identifier of the shield to run.
|
||||||
:param messages: The messages to run the shield on.
|
:param messages: The messages to run the shield on.
|
||||||
|
|
@ -117,7 +124,9 @@ class Safety(Protocol):
|
||||||
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
||||||
"""Classifies if text and/or image inputs are potentially harmful.
|
"""Create moderation.
|
||||||
|
|
||||||
|
Classifies if text and/or image inputs are potentially harmful.
|
||||||
:param input: Input (or inputs) to classify.
|
:param input: Input (or inputs) to classify.
|
||||||
Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
|
Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
|
||||||
:param model: The content moderation model you would like to use.
|
:param model: The content moderation model you would like to use.
|
||||||
|
|
|
||||||
|
|
@ -444,12 +444,24 @@ def _run_stack_build_command_from_build_config(
|
||||||
|
|
||||||
cprint("Build Successful!", color="green", file=sys.stderr)
|
cprint("Build Successful!", color="green", file=sys.stderr)
|
||||||
cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
|
cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
|
||||||
cprint(
|
if build_config.image_type == LlamaStackImageType.VENV:
|
||||||
"You can run the new Llama Stack distro via: "
|
cprint(
|
||||||
+ colored(f"llama stack run {run_config_file} --image-type {build_config.image_type}", "blue"),
|
"You can run the new Llama Stack distro (after activating "
|
||||||
color="green",
|
+ colored(image_name, "cyan")
|
||||||
file=sys.stderr,
|
+ ") via: "
|
||||||
)
|
+ colored(f"llama stack run {run_config_file}", "blue"),
|
||||||
|
color="green",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
elif build_config.image_type == LlamaStackImageType.CONTAINER:
|
||||||
|
cprint(
|
||||||
|
"You can run the container with: "
|
||||||
|
+ colored(
|
||||||
|
f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"
|
||||||
|
),
|
||||||
|
color="green",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
return distro_path
|
return distro_path
|
||||||
else:
|
else:
|
||||||
return _generate_run_config(build_config, build_dir, image_name)
|
return _generate_run_config(build_config, build_dir, image_name)
|
||||||
|
|
|
||||||
|
|
@ -6,11 +6,18 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
|
import ssl
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import uvicorn
|
||||||
|
import yaml
|
||||||
|
|
||||||
from llama_stack.cli.stack.utils import ImageType
|
from llama_stack.cli.stack.utils import ImageType
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
from llama_stack.core.datatypes import LoggingConfig, StackRunConfig
|
||||||
|
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
|
||||||
|
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
||||||
|
|
@ -48,18 +55,12 @@ class StackRun(Subcommand):
|
||||||
"--image-name",
|
"--image-name",
|
||||||
type=str,
|
type=str,
|
||||||
default=None,
|
default=None,
|
||||||
help="Name of the image to run. Defaults to the current environment",
|
help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
|
||||||
)
|
|
||||||
self.parser.add_argument(
|
|
||||||
"--env",
|
|
||||||
action="append",
|
|
||||||
help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.",
|
|
||||||
metavar="KEY=VALUE",
|
|
||||||
)
|
)
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
"--image-type",
|
"--image-type",
|
||||||
type=str,
|
type=str,
|
||||||
help="Image Type used during the build. This can be only venv.",
|
help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
|
||||||
choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
|
choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
|
||||||
)
|
)
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
|
|
@ -68,48 +69,22 @@ class StackRun(Subcommand):
|
||||||
help="Start the UI server",
|
help="Start the UI server",
|
||||||
)
|
)
|
||||||
|
|
||||||
def _resolve_config_and_distro(self, args: argparse.Namespace) -> tuple[Path | None, str | None]:
|
|
||||||
"""Resolve config file path and distribution name from args.config"""
|
|
||||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
|
||||||
|
|
||||||
if not args.config:
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
config_file = Path(args.config)
|
|
||||||
has_yaml_suffix = args.config.endswith(".yaml")
|
|
||||||
distro_name = None
|
|
||||||
|
|
||||||
if not config_file.exists() and not has_yaml_suffix:
|
|
||||||
# check if this is a distribution
|
|
||||||
config_file = Path(REPO_ROOT) / "llama_stack" / "distributions" / args.config / "run.yaml"
|
|
||||||
if config_file.exists():
|
|
||||||
distro_name = args.config
|
|
||||||
|
|
||||||
if not config_file.exists() and not has_yaml_suffix:
|
|
||||||
# check if it's a build config saved to ~/.llama dir
|
|
||||||
config_file = Path(DISTRIBS_BASE_DIR / f"llamastack-{args.config}" / f"{args.config}-run.yaml")
|
|
||||||
|
|
||||||
if not config_file.exists():
|
|
||||||
self.parser.error(
|
|
||||||
f"File {str(config_file)} does not exist.\n\nPlease run `llama stack build` to generate (and optionally edit) a run.yaml file"
|
|
||||||
)
|
|
||||||
|
|
||||||
if not config_file.is_file():
|
|
||||||
self.parser.error(
|
|
||||||
f"Config file must be a valid file path, '{config_file}' is not a file: type={type(config_file)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return config_file, distro_name
|
|
||||||
|
|
||||||
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||||
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
|
||||||
|
if args.image_type or args.image_name:
|
||||||
|
self.parser.error(
|
||||||
|
"The --image-type and --image-name flags are no longer supported.\n\n"
|
||||||
|
"Please activate your virtual environment manually before running `llama stack run`.\n\n"
|
||||||
|
"For example:\n"
|
||||||
|
" source /path/to/venv/bin/activate\n"
|
||||||
|
" llama stack run <config>\n"
|
||||||
|
)
|
||||||
|
|
||||||
if args.enable_ui:
|
if args.enable_ui:
|
||||||
self._start_ui_development_server(args.port)
|
self._start_ui_development_server(args.port)
|
||||||
image_type, image_name = args.image_type, args.image_name
|
|
||||||
|
|
||||||
if args.config:
|
if args.config:
|
||||||
try:
|
try:
|
||||||
|
|
@ -121,10 +96,6 @@ class StackRun(Subcommand):
|
||||||
else:
|
else:
|
||||||
config_file = None
|
config_file = None
|
||||||
|
|
||||||
# Check if config is required based on image type
|
|
||||||
if image_type == ImageType.VENV.value and not config_file:
|
|
||||||
self.parser.error("Config file is required for venv environment")
|
|
||||||
|
|
||||||
if config_file:
|
if config_file:
|
||||||
logger.info(f"Using run configuration: {config_file}")
|
logger.info(f"Using run configuration: {config_file}")
|
||||||
|
|
||||||
|
|
@ -139,50 +110,67 @@ class StackRun(Subcommand):
|
||||||
os.makedirs(str(config.external_providers_dir), exist_ok=True)
|
os.makedirs(str(config.external_providers_dir), exist_ok=True)
|
||||||
except AttributeError as e:
|
except AttributeError as e:
|
||||||
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
|
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
|
||||||
|
|
||||||
|
self._uvicorn_run(config_file, args)
|
||||||
|
|
||||||
|
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
||||||
|
if not config_file:
|
||||||
|
self.parser.error("Config file is required")
|
||||||
|
|
||||||
|
config_file = resolve_config_or_distro(str(config_file), Mode.RUN)
|
||||||
|
with open(config_file) as fp:
|
||||||
|
config_contents = yaml.safe_load(fp)
|
||||||
|
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
||||||
|
logger_config = LoggingConfig(**cfg)
|
||||||
|
else:
|
||||||
|
logger_config = None
|
||||||
|
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
||||||
|
|
||||||
|
port = args.port or config.server.port
|
||||||
|
host = config.server.host or ["::", "0.0.0.0"]
|
||||||
|
|
||||||
|
# Set the config file in environment so create_app can find it
|
||||||
|
os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
|
||||||
|
|
||||||
|
uvicorn_config = {
|
||||||
|
"factory": True,
|
||||||
|
"host": host,
|
||||||
|
"port": port,
|
||||||
|
"lifespan": "on",
|
||||||
|
"log_level": logger.getEffectiveLevel(),
|
||||||
|
"log_config": logger_config,
|
||||||
|
}
|
||||||
|
|
||||||
|
keyfile = config.server.tls_keyfile
|
||||||
|
certfile = config.server.tls_certfile
|
||||||
|
if keyfile and certfile:
|
||||||
|
uvicorn_config["ssl_keyfile"] = config.server.tls_keyfile
|
||||||
|
uvicorn_config["ssl_certfile"] = config.server.tls_certfile
|
||||||
|
if config.server.tls_cafile:
|
||||||
|
uvicorn_config["ssl_ca_certs"] = config.server.tls_cafile
|
||||||
|
uvicorn_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
config = None
|
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
||||||
|
|
||||||
# If neither image type nor image name is provided, assume the server should be run directly
|
logger.info(f"Listening on {host}:{port}")
|
||||||
# using the current environment packages.
|
|
||||||
if not image_type and not image_name:
|
|
||||||
logger.info("No image type or image name provided. Assuming environment packages.")
|
|
||||||
from llama_stack.core.server.server import main as server_main
|
|
||||||
|
|
||||||
# Build the server args from the current args passed to the CLI
|
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
||||||
server_args = argparse.Namespace()
|
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
||||||
for arg in vars(args):
|
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
||||||
# If this is a function, avoid passing it
|
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
||||||
# "args" contains:
|
# SIGTERM (kill -15) works fine without this because Python doesn't
|
||||||
# func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
|
# have a default handler for it.
|
||||||
if callable(getattr(args, arg)):
|
#
|
||||||
continue
|
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
||||||
if arg == "config":
|
# signal handling but this is quite intrusive and not worth the effort.
|
||||||
server_args.config = str(config_file)
|
try:
|
||||||
else:
|
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config)
|
||||||
setattr(server_args, arg, getattr(args, arg))
|
except (KeyboardInterrupt, SystemExit):
|
||||||
|
logger.info("Received interrupt signal, shutting down gracefully...")
|
||||||
# Run the server
|
|
||||||
server_main(server_args)
|
|
||||||
else:
|
|
||||||
run_args = formulate_run_args(image_type, image_name)
|
|
||||||
|
|
||||||
run_args.extend([str(args.port)])
|
|
||||||
|
|
||||||
if config_file:
|
|
||||||
run_args.extend(["--config", str(config_file)])
|
|
||||||
|
|
||||||
if args.env:
|
|
||||||
for env_var in args.env:
|
|
||||||
if "=" not in env_var:
|
|
||||||
self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format")
|
|
||||||
return
|
|
||||||
key, value = env_var.split("=", 1) # split on first = only
|
|
||||||
if not key:
|
|
||||||
self.parser.error(f"Environment variable '{env_var}' has empty key")
|
|
||||||
return
|
|
||||||
run_args.extend(["--env", f"{key}={value}"])
|
|
||||||
|
|
||||||
run_command(run_args)
|
|
||||||
|
|
||||||
def _start_ui_development_server(self, stack_server_port: int):
|
def _start_ui_development_server(self, stack_server_port: int):
|
||||||
logger.info("Attempting to start UI development server...")
|
logger.info("Attempting to start UI development server...")
|
||||||
|
|
|
||||||
|
|
@ -324,14 +324,14 @@ fi
|
||||||
RUN pip uninstall -y uv
|
RUN pip uninstall -y uv
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# If a run config is provided, we use the --config flag
|
# If a run config is provided, we use the llama stack CLI
|
||||||
if [[ -n "$run_config" ]]; then
|
if [[ -n "$run_config" ]]; then
|
||||||
add_to_container << EOF
|
add_to_container << EOF
|
||||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$RUN_CONFIG_PATH"]
|
ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"]
|
||||||
EOF
|
EOF
|
||||||
elif [[ "$distro_or_config" != *.yaml ]]; then
|
elif [[ "$distro_or_config" != *.yaml ]]; then
|
||||||
add_to_container << EOF
|
add_to_container << EOF
|
||||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$distro_or_config"]
|
ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"]
|
||||||
EOF
|
EOF
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ from llama_stack.providers.utils.sqlstore.sqlstore import (
|
||||||
sqlstore_impl,
|
sqlstore_impl,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="openai::conversations")
|
logger = get_logger(name=__name__, category="openai_conversations")
|
||||||
|
|
||||||
|
|
||||||
class ConversationServiceConfig(BaseModel):
|
class ConversationServiceConfig(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -243,6 +243,7 @@ def get_external_providers_from_module(
|
||||||
spec = module.get_provider_spec()
|
spec = module.get_provider_spec()
|
||||||
else:
|
else:
|
||||||
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
|
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
|
||||||
|
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
||||||
spec = ProviderSpec(
|
spec = ProviderSpec(
|
||||||
api=Api(provider_api),
|
api=Api(provider_api),
|
||||||
provider_type=provider.provider_type,
|
provider_type=provider.provider_type,
|
||||||
|
|
@ -251,9 +252,20 @@ def get_external_providers_from_module(
|
||||||
config_class="",
|
config_class="",
|
||||||
)
|
)
|
||||||
provider_type = provider.provider_type
|
provider_type = provider.provider_type
|
||||||
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
if isinstance(spec, list):
|
||||||
# return a partially filled out spec that the build script will populate.
|
# optionally allow people to pass inline and remote provider specs as a returned list.
|
||||||
registry[Api(provider_api)][provider_type] = spec
|
# with the old method, users could pass in directories of specs using overlapping code
|
||||||
|
# we want to ensure we preserve that flexibility in this method.
|
||||||
|
logger.info(
|
||||||
|
f"Detected a list of external provider specs from {provider.module} adding all to the registry"
|
||||||
|
)
|
||||||
|
for provider_spec in spec:
|
||||||
|
if provider_spec.provider_type != provider.provider_type:
|
||||||
|
continue
|
||||||
|
logger.info(f"Adding {provider.provider_type} to registry")
|
||||||
|
registry[Api(provider_api)][provider.provider_type] = provider_spec
|
||||||
|
else:
|
||||||
|
registry[Api(provider_api)][provider_type] = spec
|
||||||
except ModuleNotFoundError as exc:
|
except ModuleNotFoundError as exc:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"get_provider_spec not found. If specifying an external provider via `module` in the Provider spec, the Provider must have the `provider.get_provider_spec` module available"
|
"get_provider_spec not found. If specifying an external provider via `module` in the Provider spec, the Provider must have the `provider.get_provider_spec` module available"
|
||||||
|
|
|
||||||
|
|
@ -611,7 +611,7 @@ class InferenceRouter(Inference):
|
||||||
completion_text += "".join(choice_data["content_parts"])
|
completion_text += "".join(choice_data["content_parts"])
|
||||||
|
|
||||||
# Add metrics to the chunk
|
# Add metrics to the chunk
|
||||||
if self.telemetry and chunk.usage:
|
if self.telemetry and hasattr(chunk, "usage") and chunk.usage:
|
||||||
metrics = self._construct_metrics(
|
metrics = self._construct_metrics(
|
||||||
prompt_tokens=chunk.usage.prompt_tokens,
|
prompt_tokens=chunk.usage.prompt_tokens,
|
||||||
completion_tokens=chunk.usage.completion_tokens,
|
completion_tokens=chunk.usage.completion_tokens,
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
||||||
try:
|
try:
|
||||||
models = await provider.list_models()
|
models = await provider.list_models()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Model refresh failed for provider {provider_id}: {e}")
|
logger.debug(f"Model refresh failed for provider {provider_id}: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.listed_providers.add(provider_id)
|
self.listed_providers.add(provider_id)
|
||||||
|
|
@ -67,6 +67,19 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
||||||
raise ValueError(f"Provider {model.provider_id} not found in the routing table")
|
raise ValueError(f"Provider {model.provider_id} not found in the routing table")
|
||||||
return self.impls_by_provider_id[model.provider_id]
|
return self.impls_by_provider_id[model.provider_id]
|
||||||
|
|
||||||
|
async def has_model(self, model_id: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a model exists in the routing table.
|
||||||
|
|
||||||
|
:param model_id: The model identifier to check
|
||||||
|
:return: True if the model exists, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
await lookup_model(self, model_id)
|
||||||
|
return True
|
||||||
|
except ModelNotFoundError:
|
||||||
|
return False
|
||||||
|
|
||||||
async def register_model(
|
async def register_model(
|
||||||
self,
|
self,
|
||||||
model_id: str,
|
model_id: str,
|
||||||
|
|
|
||||||
|
|
@ -245,3 +245,65 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
||||||
vector_store_id=vector_store_id,
|
vector_store_id=vector_store_id,
|
||||||
file_id=file_id,
|
file_id=file_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def openai_create_vector_store_file_batch(
|
||||||
|
self,
|
||||||
|
vector_store_id: str,
|
||||||
|
file_ids: list[str],
|
||||||
|
attributes: dict[str, Any] | None = None,
|
||||||
|
chunking_strategy: Any | None = None,
|
||||||
|
):
|
||||||
|
await self.assert_action_allowed("update", "vector_db", vector_store_id)
|
||||||
|
provider = await self.get_provider_impl(vector_store_id)
|
||||||
|
return await provider.openai_create_vector_store_file_batch(
|
||||||
|
vector_store_id=vector_store_id,
|
||||||
|
file_ids=file_ids,
|
||||||
|
attributes=attributes,
|
||||||
|
chunking_strategy=chunking_strategy,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def openai_retrieve_vector_store_file_batch(
|
||||||
|
self,
|
||||||
|
batch_id: str,
|
||||||
|
vector_store_id: str,
|
||||||
|
):
|
||||||
|
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||||
|
provider = await self.get_provider_impl(vector_store_id)
|
||||||
|
return await provider.openai_retrieve_vector_store_file_batch(
|
||||||
|
batch_id=batch_id,
|
||||||
|
vector_store_id=vector_store_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def openai_list_files_in_vector_store_file_batch(
|
||||||
|
self,
|
||||||
|
batch_id: str,
|
||||||
|
vector_store_id: str,
|
||||||
|
after: str | None = None,
|
||||||
|
before: str | None = None,
|
||||||
|
filter: str | None = None,
|
||||||
|
limit: int | None = 20,
|
||||||
|
order: str | None = "desc",
|
||||||
|
):
|
||||||
|
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||||
|
provider = await self.get_provider_impl(vector_store_id)
|
||||||
|
return await provider.openai_list_files_in_vector_store_file_batch(
|
||||||
|
batch_id=batch_id,
|
||||||
|
vector_store_id=vector_store_id,
|
||||||
|
after=after,
|
||||||
|
before=before,
|
||||||
|
filter=filter,
|
||||||
|
limit=limit,
|
||||||
|
order=order,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def openai_cancel_vector_store_file_batch(
|
||||||
|
self,
|
||||||
|
batch_id: str,
|
||||||
|
vector_store_id: str,
|
||||||
|
):
|
||||||
|
await self.assert_action_allowed("update", "vector_db", vector_store_id)
|
||||||
|
provider = await self.get_provider_impl(vector_store_id)
|
||||||
|
return await provider.openai_cancel_vector_store_file_batch(
|
||||||
|
batch_id=batch_id,
|
||||||
|
vector_store_id=vector_store_id,
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import argparse
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import functools
|
import functools
|
||||||
|
|
@ -12,7 +11,6 @@ import inspect
|
||||||
import json
|
import json
|
||||||
import logging # allow-direct-logging
|
import logging # allow-direct-logging
|
||||||
import os
|
import os
|
||||||
import ssl
|
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
import warnings
|
import warnings
|
||||||
|
|
@ -35,7 +33,6 @@ from pydantic import BaseModel, ValidationError
|
||||||
|
|
||||||
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
||||||
from llama_stack.apis.common.responses import PaginatedResponse
|
from llama_stack.apis.common.responses import PaginatedResponse
|
||||||
from llama_stack.cli.utils import add_config_distro_args, get_config_from_args
|
|
||||||
from llama_stack.core.access_control.access_control import AccessDeniedError
|
from llama_stack.core.access_control.access_control import AccessDeniedError
|
||||||
from llama_stack.core.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AuthenticationRequiredError,
|
AuthenticationRequiredError,
|
||||||
|
|
@ -55,7 +52,6 @@ from llama_stack.core.stack import (
|
||||||
Stack,
|
Stack,
|
||||||
cast_image_name_to_string,
|
cast_image_name_to_string,
|
||||||
replace_env_vars,
|
replace_env_vars,
|
||||||
validate_env_pair,
|
|
||||||
)
|
)
|
||||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||||
|
|
@ -333,23 +329,18 @@ class ClientVersionMiddleware:
|
||||||
return await self.app(scope, receive, send)
|
return await self.app(scope, receive, send)
|
||||||
|
|
||||||
|
|
||||||
def create_app(
|
def create_app() -> StackApp:
|
||||||
config_file: str | None = None,
|
|
||||||
env_vars: list[str] | None = None,
|
|
||||||
) -> StackApp:
|
|
||||||
"""Create and configure the FastAPI application.
|
"""Create and configure the FastAPI application.
|
||||||
|
|
||||||
Args:
|
This factory function reads configuration from environment variables:
|
||||||
config_file: Path to config file. If None, uses LLAMA_STACK_CONFIG env var or default resolution.
|
- LLAMA_STACK_CONFIG: Path to config file (required)
|
||||||
env_vars: List of environment variables in KEY=value format.
|
|
||||||
disable_version_check: Whether to disable version checking. If None, uses LLAMA_STACK_DISABLE_VERSION_CHECK env var.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Configured StackApp instance.
|
Configured StackApp instance.
|
||||||
"""
|
"""
|
||||||
config_file = config_file or os.getenv("LLAMA_STACK_CONFIG")
|
config_file = os.getenv("LLAMA_STACK_CONFIG")
|
||||||
if config_file is None:
|
if config_file is None:
|
||||||
raise ValueError("No config file provided and LLAMA_STACK_CONFIG env var is not set")
|
raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
|
||||||
|
|
||||||
config_file = resolve_config_or_distro(config_file, Mode.RUN)
|
config_file = resolve_config_or_distro(config_file, Mode.RUN)
|
||||||
|
|
||||||
|
|
@ -361,16 +352,6 @@ def create_app(
|
||||||
logger_config = LoggingConfig(**cfg)
|
logger_config = LoggingConfig(**cfg)
|
||||||
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
||||||
|
|
||||||
if env_vars:
|
|
||||||
for env_pair in env_vars:
|
|
||||||
try:
|
|
||||||
key, value = validate_env_pair(env_pair)
|
|
||||||
logger.info(f"Setting environment variable {key} => {value}")
|
|
||||||
os.environ[key] = value
|
|
||||||
except ValueError as e:
|
|
||||||
logger.error(f"Error: {str(e)}")
|
|
||||||
raise ValueError(f"Invalid environment variable format: {env_pair}") from e
|
|
||||||
|
|
||||||
config = replace_env_vars(config_contents)
|
config = replace_env_vars(config_contents)
|
||||||
config = StackRunConfig(**cast_image_name_to_string(config))
|
config = StackRunConfig(**cast_image_name_to_string(config))
|
||||||
|
|
||||||
|
|
@ -494,101 +475,6 @@ def create_app(
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
def main(args: argparse.Namespace | None = None):
|
|
||||||
"""Start the LlamaStack server."""
|
|
||||||
parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
|
|
||||||
|
|
||||||
add_config_distro_args(parser)
|
|
||||||
parser.add_argument(
|
|
||||||
"--port",
|
|
||||||
type=int,
|
|
||||||
default=int(os.getenv("LLAMA_STACK_PORT", 8321)),
|
|
||||||
help="Port to listen on",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--env",
|
|
||||||
action="append",
|
|
||||||
help="Environment variables in KEY=value format. Can be specified multiple times.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Determine whether the server args are being passed by the "run" command, if this is the case
|
|
||||||
# the args will be passed as a Namespace object to the main function, otherwise they will be
|
|
||||||
# parsed from the command line
|
|
||||||
if args is None:
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
config_or_distro = get_config_from_args(args)
|
|
||||||
|
|
||||||
try:
|
|
||||||
app = create_app(
|
|
||||||
config_file=config_or_distro,
|
|
||||||
env_vars=args.env,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error creating app: {str(e)}")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
config_file = resolve_config_or_distro(config_or_distro, Mode.RUN)
|
|
||||||
with open(config_file) as fp:
|
|
||||||
config_contents = yaml.safe_load(fp)
|
|
||||||
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
|
||||||
logger_config = LoggingConfig(**cfg)
|
|
||||||
else:
|
|
||||||
logger_config = None
|
|
||||||
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
|
||||||
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
# Configure SSL if certificates are provided
|
|
||||||
port = args.port or config.server.port
|
|
||||||
|
|
||||||
ssl_config = None
|
|
||||||
keyfile = config.server.tls_keyfile
|
|
||||||
certfile = config.server.tls_certfile
|
|
||||||
|
|
||||||
if keyfile and certfile:
|
|
||||||
ssl_config = {
|
|
||||||
"ssl_keyfile": keyfile,
|
|
||||||
"ssl_certfile": certfile,
|
|
||||||
}
|
|
||||||
if config.server.tls_cafile:
|
|
||||||
ssl_config["ssl_ca_certs"] = config.server.tls_cafile
|
|
||||||
ssl_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
|
||||||
logger.info(
|
|
||||||
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
|
||||||
|
|
||||||
listen_host = config.server.host or ["::", "0.0.0.0"]
|
|
||||||
logger.info(f"Listening on {listen_host}:{port}")
|
|
||||||
|
|
||||||
uvicorn_config = {
|
|
||||||
"app": app,
|
|
||||||
"host": listen_host,
|
|
||||||
"port": port,
|
|
||||||
"lifespan": "on",
|
|
||||||
"log_level": logger.getEffectiveLevel(),
|
|
||||||
"log_config": logger_config,
|
|
||||||
}
|
|
||||||
if ssl_config:
|
|
||||||
uvicorn_config.update(ssl_config)
|
|
||||||
|
|
||||||
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
|
||||||
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
|
||||||
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
|
||||||
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
|
||||||
# SIGTERM (kill -15) works fine without this because Python doesn't
|
|
||||||
# have a default handler for it.
|
|
||||||
#
|
|
||||||
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
|
||||||
# signal handling but this is quite intrusive and not worth the effort.
|
|
||||||
try:
|
|
||||||
asyncio.run(uvicorn.Server(uvicorn.Config(**uvicorn_config)).serve())
|
|
||||||
except (KeyboardInterrupt, SystemExit):
|
|
||||||
logger.info("Received interrupt signal, shutting down gracefully...")
|
|
||||||
|
|
||||||
|
|
||||||
def _log_run_config(run_config: StackRunConfig):
|
def _log_run_config(run_config: StackRunConfig):
|
||||||
"""Logs the run config with redacted fields and disabled providers removed."""
|
"""Logs the run config with redacted fields and disabled providers removed."""
|
||||||
logger.info("Run configuration:")
|
logger.info("Run configuration:")
|
||||||
|
|
@ -615,7 +501,3 @@ def remove_disabled_providers(obj):
|
||||||
return [item for item in (remove_disabled_providers(i) for i in obj) if item is not None]
|
return [item for item in (remove_disabled_providers(i) for i in obj) if item is not None]
|
||||||
else:
|
else:
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
|
||||||
|
|
@ -274,22 +274,6 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
|
||||||
return config_dict
|
return config_dict
|
||||||
|
|
||||||
|
|
||||||
def validate_env_pair(env_pair: str) -> tuple[str, str]:
|
|
||||||
"""Validate and split an environment variable key-value pair."""
|
|
||||||
try:
|
|
||||||
key, value = env_pair.split("=", 1)
|
|
||||||
key = key.strip()
|
|
||||||
if not key:
|
|
||||||
raise ValueError(f"Empty key in environment variable pair: {env_pair}")
|
|
||||||
if not all(c.isalnum() or c == "_" for c in key):
|
|
||||||
raise ValueError(f"Key must contain only alphanumeric characters and underscores: {key}")
|
|
||||||
return key, value
|
|
||||||
except ValueError as e:
|
|
||||||
raise ValueError(
|
|
||||||
f"Invalid environment variable format '{env_pair}': {str(e)}. Expected format: KEY=value"
|
|
||||||
) from e
|
|
||||||
|
|
||||||
|
|
||||||
def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None:
|
def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None:
|
||||||
"""Add internal implementations (inspect and providers) to the implementations dictionary.
|
"""Add internal implementations (inspect and providers) to the implementations dictionary.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ error_handler() {
|
||||||
trap 'error_handler ${LINENO}' ERR
|
trap 'error_handler ${LINENO}' ERR
|
||||||
|
|
||||||
if [ $# -lt 3 ]; then
|
if [ $# -lt 3 ]; then
|
||||||
echo "Usage: $0 <env_type> <env_path_or_name> <port> [--config <yaml_config>] [--env KEY=VALUE]..."
|
echo "Usage: $0 <env_type> <env_path_or_name> <port> [--config <yaml_config>]"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -43,7 +43,6 @@ SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
||||||
|
|
||||||
# Initialize variables
|
# Initialize variables
|
||||||
yaml_config=""
|
yaml_config=""
|
||||||
env_vars=""
|
|
||||||
other_args=""
|
other_args=""
|
||||||
|
|
||||||
# Process remaining arguments
|
# Process remaining arguments
|
||||||
|
|
@ -58,15 +57,6 @@ while [[ $# -gt 0 ]]; do
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
--env)
|
|
||||||
if [[ -n "$2" ]]; then
|
|
||||||
env_vars="$env_vars --env $2"
|
|
||||||
shift 2
|
|
||||||
else
|
|
||||||
echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
*)
|
*)
|
||||||
other_args="$other_args $1"
|
other_args="$other_args $1"
|
||||||
shift
|
shift
|
||||||
|
|
@ -116,10 +106,9 @@ if [[ "$env_type" == "venv" ]]; then
|
||||||
yaml_config_arg=""
|
yaml_config_arg=""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
$PYTHON_BINARY -m llama_stack.core.server.server \
|
llama stack run \
|
||||||
$yaml_config_arg \
|
$yaml_config_arg \
|
||||||
--port "$port" \
|
--port "$port" \
|
||||||
$env_vars \
|
|
||||||
$other_args
|
$other_args
|
||||||
elif [[ "$env_type" == "container" ]]; then
|
elif [[ "$env_type" == "container" ]]; then
|
||||||
echo -e "${RED}Warning: Llama Stack no longer supports running Containers via the 'llama stack run' command.${NC}"
|
echo -e "${RED}Warning: Llama Stack no longer supports running Containers via the 'llama stack run' command.${NC}"
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,10 @@ class DiskDistributionRegistry(DistributionRegistry):
|
||||||
existing_obj = await self.get(obj.type, obj.identifier)
|
existing_obj = await self.get(obj.type, obj.identifier)
|
||||||
# dont register if the object's providerid already exists
|
# dont register if the object's providerid already exists
|
||||||
if existing_obj and existing_obj.provider_id == obj.provider_id:
|
if existing_obj and existing_obj.provider_id == obj.provider_id:
|
||||||
return False
|
raise ValueError(
|
||||||
|
f"Provider '{obj.provider_id}' is already registered."
|
||||||
|
f"Unregister the existing provider first before registering it again."
|
||||||
|
)
|
||||||
|
|
||||||
await self.kvstore.set(
|
await self.kvstore.set(
|
||||||
KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
|
KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
|
||||||
|
|
|
||||||
|
|
@ -117,11 +117,11 @@ docker run -it \
|
||||||
# NOTE: mount the llama-stack directory if testing local changes else not needed
|
# NOTE: mount the llama-stack directory if testing local changes else not needed
|
||||||
-v $HOME/git/llama-stack:/app/llama-stack-source \
|
-v $HOME/git/llama-stack:/app/llama-stack-source \
|
||||||
# localhost/distribution-dell:dev if building / testing locally
|
# localhost/distribution-dell:dev if building / testing locally
|
||||||
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
|
-e DEH_URL=$DEH_URL \
|
||||||
|
-e CHROMA_URL=$CHROMA_URL \
|
||||||
llamastack/distribution-{{ name }}\
|
llamastack/distribution-{{ name }}\
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
|
||||||
--env DEH_URL=$DEH_URL \
|
|
||||||
--env CHROMA_URL=$CHROMA_URL
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -142,14 +142,14 @@ docker run \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $HOME/.llama:/root/.llama \
|
-v $HOME/.llama:/root/.llama \
|
||||||
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||||
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
|
-e DEH_URL=$DEH_URL \
|
||||||
|
-e SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
|
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||||
|
-e CHROMA_URL=$CHROMA_URL \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
|
||||||
--env DEH_URL=$DEH_URL \
|
|
||||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
|
||||||
--env DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
|
||||||
--env CHROMA_URL=$CHROMA_URL
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via Conda
|
### Via Conda
|
||||||
|
|
@ -158,21 +158,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro {{ name }} --image-type conda
|
llama stack build --distro {{ name }} --image-type conda
|
||||||
llama stack run {{ name }}
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
--port $LLAMA_STACK_PORT \
|
DEH_URL=$DEH_URL \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
CHROMA_URL=$CHROMA_URL \
|
||||||
--env DEH_URL=$DEH_URL \
|
llama stack run {{ name }} \
|
||||||
--env CHROMA_URL=$CHROMA_URL
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
|
DEH_URL=$DEH_URL \
|
||||||
|
SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
|
DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||||
|
CHROMA_URL=$CHROMA_URL \
|
||||||
llama stack run ./run-with-safety.yaml \
|
llama stack run ./run-with-safety.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
|
||||||
--env DEH_URL=$DEH_URL \
|
|
||||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
|
||||||
--env DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
|
||||||
--env CHROMA_URL=$CHROMA_URL
|
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -72,9 +72,9 @@ docker run \
|
||||||
--gpu all \
|
--gpu all \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
|
-e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
@ -86,10 +86,10 @@ docker run \
|
||||||
--gpu all \
|
--gpu all \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
|
-e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
|
-e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
@ -98,16 +98,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro {{ name }} --image-type venv
|
llama stack build --distro {{ name }} --image-type venv
|
||||||
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llama stack run distributions/{{ name }}/run.yaml \
|
llama stack run distributions/{{ name }}/run.yaml \
|
||||||
--port 8321 \
|
--port 8321
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
|
SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
|
||||||
llama stack run distributions/{{ name }}/run-with-safety.yaml \
|
llama stack run distributions/{{ name }}/run-with-safety.yaml \
|
||||||
--port 8321 \
|
--port 8321
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -118,10 +118,10 @@ docker run \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ./run.yaml:/root/my-run.yaml \
|
-v ./run.yaml:/root/my-run.yaml \
|
||||||
|
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT
|
||||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
@ -131,10 +131,10 @@ If you've set up your local development environment, you can also build the imag
|
||||||
```bash
|
```bash
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
llama stack build --distro nvidia --image-type venv
|
llama stack build --distro nvidia --image-type venv
|
||||||
|
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./run.yaml \
|
||||||
--port 8321 \
|
--port 8321
|
||||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Example Notebooks
|
## Example Notebooks
|
||||||
|
|
|
||||||
|
|
@ -3,3 +3,5 @@
|
||||||
#
|
#
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from .watsonx import get_distribution_template # noqa: F401
|
||||||
|
|
|
||||||
|
|
@ -3,44 +3,33 @@ distribution_spec:
|
||||||
description: Use watsonx for running LLM inference
|
description: Use watsonx for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: watsonx
|
- provider_type: remote::watsonx
|
||||||
provider_type: remote::watsonx
|
- provider_type: inline::sentence-transformers
|
||||||
- provider_id: sentence-transformers
|
|
||||||
provider_type: inline::sentence-transformers
|
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_type: inline::faiss
|
||||||
provider_type: inline::faiss
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_type: inline::llama-guard
|
||||||
provider_type: inline::llama-guard
|
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
eval:
|
eval:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_type: remote::huggingface
|
||||||
provider_type: remote::huggingface
|
- provider_type: inline::localfs
|
||||||
- provider_id: localfs
|
|
||||||
provider_type: inline::localfs
|
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_type: inline::basic
|
||||||
provider_type: inline::basic
|
- provider_type: inline::llm-as-judge
|
||||||
- provider_id: llm-as-judge
|
- provider_type: inline::braintrust
|
||||||
provider_type: inline::llm-as-judge
|
|
||||||
- provider_id: braintrust
|
|
||||||
provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_type: remote::brave-search
|
- provider_type: remote::brave-search
|
||||||
- provider_type: remote::tavily-search
|
- provider_type: remote::tavily-search
|
||||||
- provider_type: inline::rag-runtime
|
- provider_type: inline::rag-runtime
|
||||||
- provider_type: remote::model-context-protocol
|
- provider_type: remote::model-context-protocol
|
||||||
|
files:
|
||||||
|
- provider_type: inline::localfs
|
||||||
image_type: venv
|
image_type: venv
|
||||||
additional_pip_packages:
|
additional_pip_packages:
|
||||||
|
- aiosqlite
|
||||||
- sqlalchemy[asyncio]
|
- sqlalchemy[asyncio]
|
||||||
- aiosqlite
|
|
||||||
- aiosqlite
|
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,13 @@ apis:
|
||||||
- agents
|
- agents
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
|
- files
|
||||||
- inference
|
- inference
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
- vector_io
|
- vector_io
|
||||||
- files
|
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: watsonx
|
- provider_id: watsonx
|
||||||
|
|
@ -19,8 +19,6 @@ providers:
|
||||||
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||||
api_key: ${env.WATSONX_API_KEY:=}
|
api_key: ${env.WATSONX_API_KEY:=}
|
||||||
project_id: ${env.WATSONX_PROJECT_ID:=}
|
project_id: ${env.WATSONX_PROJECT_ID:=}
|
||||||
- provider_id: sentence-transformers
|
|
||||||
provider_type: inline::sentence-transformers
|
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
|
|
@ -48,7 +46,7 @@ providers:
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config:
|
config:
|
||||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
|
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
|
||||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||||
eval:
|
eval:
|
||||||
|
|
@ -109,102 +107,7 @@ metadata_store:
|
||||||
inference_store:
|
inference_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
|
||||||
models:
|
models: []
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/llama-3-3-70b-instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-3-70b-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-3-70b-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/llama-2-13b-chat
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-2-13b-chat
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/Llama-2-13b
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-2-13b-chat
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/llama-3-1-70b-instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-1-70b-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-1-70b-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/llama-3-1-8b-instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-1-8b-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-1-8b-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/llama-3-2-11b-vision-instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/llama-3-2-1b-instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-2-1b-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-2-1b-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/llama-3-2-3b-instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-2-3b-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-2-3b-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/llama-3-2-90b-vision-instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/llama-guard-3-11b-vision
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-guard-3-11b-vision
|
|
||||||
model_type: llm
|
|
||||||
- metadata: {}
|
|
||||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
|
||||||
provider_id: watsonx
|
|
||||||
provider_model_id: meta-llama/llama-guard-3-11b-vision
|
|
||||||
model_type: llm
|
|
||||||
- metadata:
|
|
||||||
embedding_dimension: 384
|
|
||||||
model_id: all-MiniLM-L6-v2
|
|
||||||
provider_id: sentence-transformers
|
|
||||||
model_type: embedding
|
|
||||||
shields: []
|
shields: []
|
||||||
vector_dbs: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
|
|
|
||||||
|
|
@ -4,17 +4,11 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
|
||||||
from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
|
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
||||||
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
|
||||||
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
|
||||||
SentenceTransformersInferenceConfig,
|
|
||||||
)
|
|
||||||
from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
|
from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
|
||||||
from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
|
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
||||||
|
|
@ -52,15 +46,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
||||||
config=WatsonXConfig.sample_run_config(),
|
config=WatsonXConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
|
||||||
embedding_provider = Provider(
|
|
||||||
provider_id="sentence-transformers",
|
|
||||||
provider_type="inline::sentence-transformers",
|
|
||||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
|
||||||
)
|
|
||||||
|
|
||||||
available_models = {
|
|
||||||
"watsonx": MODEL_ENTRIES,
|
|
||||||
}
|
|
||||||
default_tool_groups = [
|
default_tool_groups = [
|
||||||
ToolGroupInput(
|
ToolGroupInput(
|
||||||
toolgroup_id="builtin::websearch",
|
toolgroup_id="builtin::websearch",
|
||||||
|
|
@ -72,36 +57,25 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
embedding_model = ModelInput(
|
|
||||||
model_id="all-MiniLM-L6-v2",
|
|
||||||
provider_id="sentence-transformers",
|
|
||||||
model_type=ModelType.embedding,
|
|
||||||
metadata={
|
|
||||||
"embedding_dimension": 384,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
files_provider = Provider(
|
files_provider = Provider(
|
||||||
provider_id="meta-reference-files",
|
provider_id="meta-reference-files",
|
||||||
provider_type="inline::localfs",
|
provider_type="inline::localfs",
|
||||||
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||||
)
|
)
|
||||||
default_models, _ = get_model_registry(available_models)
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name=name,
|
name=name,
|
||||||
distro_type="remote_hosted",
|
distro_type="remote_hosted",
|
||||||
description="Use watsonx for running LLM inference",
|
description="Use watsonx for running LLM inference",
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
available_models_by_provider=available_models,
|
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider],
|
||||||
"files": [files_provider],
|
"files": [files_provider],
|
||||||
},
|
},
|
||||||
default_models=default_models + [embedding_model],
|
default_models=[],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -31,12 +31,17 @@ CATEGORIES = [
|
||||||
"client",
|
"client",
|
||||||
"telemetry",
|
"telemetry",
|
||||||
"openai_responses",
|
"openai_responses",
|
||||||
|
"openai_conversations",
|
||||||
"testing",
|
"testing",
|
||||||
"providers",
|
"providers",
|
||||||
"models",
|
"models",
|
||||||
"files",
|
"files",
|
||||||
"vector_io",
|
"vector_io",
|
||||||
"tool_runtime",
|
"tool_runtime",
|
||||||
|
"cli",
|
||||||
|
"post_training",
|
||||||
|
"scoring",
|
||||||
|
"tests",
|
||||||
]
|
]
|
||||||
UNCATEGORIZED = "uncategorized"
|
UNCATEGORIZED = "uncategorized"
|
||||||
|
|
||||||
|
|
@ -128,7 +133,10 @@ def strip_rich_markup(text):
|
||||||
|
|
||||||
class CustomRichHandler(RichHandler):
|
class CustomRichHandler(RichHandler):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
kwargs["console"] = Console()
|
# Set a reasonable default width for console output, especially when redirected to files
|
||||||
|
console_width = int(os.environ.get("LLAMA_STACK_LOG_WIDTH", "120"))
|
||||||
|
# Don't force terminal codes to avoid ANSI escape codes in log files
|
||||||
|
kwargs["console"] = Console(width=console_width)
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
def emit(self, record):
|
def emit(self, record):
|
||||||
|
|
@ -261,11 +269,12 @@ def get_logger(
|
||||||
if root_category in _category_levels:
|
if root_category in _category_levels:
|
||||||
log_level = _category_levels[root_category]
|
log_level = _category_levels[root_category]
|
||||||
else:
|
else:
|
||||||
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
|
|
||||||
if category != UNCATEGORIZED:
|
if category != UNCATEGORIZED:
|
||||||
logging.warning(
|
raise ValueError(
|
||||||
f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}"
|
f"Unknown logging category: {category}. To resolve, choose a valid category from the CATEGORIES list "
|
||||||
|
f"or add it to the CATEGORIES list. Available categories: {CATEGORIES}"
|
||||||
)
|
)
|
||||||
|
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
|
||||||
logger.setLevel(log_level)
|
logger.setLevel(log_level)
|
||||||
return logging.LoggerAdapter(logger, {"category": category})
|
return logging.LoggerAdapter(logger, {"category": category})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,19 +11,13 @@
|
||||||
# top-level folder for each specific model found within the models/ directory at
|
# top-level folder for each specific model found within the models/ directory at
|
||||||
# the top-level of this source tree.
|
# the top-level of this source tree.
|
||||||
|
|
||||||
import json
|
|
||||||
import textwrap
|
import textwrap
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack.models.llama.datatypes import (
|
from llama_stack.models.llama.datatypes import (
|
||||||
RawContent,
|
RawContent,
|
||||||
RawMediaItem,
|
|
||||||
RawMessage,
|
RawMessage,
|
||||||
RawTextItem,
|
|
||||||
StopReason,
|
|
||||||
ToolCall,
|
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.llama4.tokenizer import Tokenizer
|
from llama_stack.models.llama.llama4.tokenizer import Tokenizer
|
||||||
|
|
@ -175,25 +169,6 @@ def llama3_1_builtin_code_interpreter_dialog(tool_prompt_format=ToolPromptFormat
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
|
||||||
def llama3_1_builtin_tool_call_with_image_dialog(
|
|
||||||
tool_prompt_format=ToolPromptFormat.json,
|
|
||||||
):
|
|
||||||
this_dir = Path(__file__).parent
|
|
||||||
with open(this_dir / "llama3/dog.jpg", "rb") as f:
|
|
||||||
img = f.read()
|
|
||||||
|
|
||||||
interface = LLama31Interface(tool_prompt_format)
|
|
||||||
|
|
||||||
messages = interface.system_messages(**system_message_builtin_tools_only())
|
|
||||||
messages += interface.user_message(content=[RawMediaItem(data=img), RawTextItem(text="What is this dog breed?")])
|
|
||||||
messages += interface.assistant_response_messages(
|
|
||||||
"Based on the description of the dog in the image, it appears to be a small breed dog, possibly a terrier mix",
|
|
||||||
StopReason.end_of_turn,
|
|
||||||
)
|
|
||||||
messages += interface.user_message("Search the web for some food recommendations for the indentified breed")
|
|
||||||
return messages
|
|
||||||
|
|
||||||
|
|
||||||
def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
|
def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
|
||||||
interface = LLama31Interface(tool_prompt_format)
|
interface = LLama31Interface(tool_prompt_format)
|
||||||
|
|
||||||
|
|
@ -202,35 +177,6 @@ def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
|
||||||
def llama3_1_e2e_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
|
|
||||||
tool_response = json.dumps(["great song1", "awesome song2", "cool song3"])
|
|
||||||
interface = LLama31Interface(tool_prompt_format)
|
|
||||||
|
|
||||||
messages = interface.system_messages(**system_message_custom_tools_only())
|
|
||||||
messages += interface.user_message(content="Use tools to get latest trending songs")
|
|
||||||
messages.append(
|
|
||||||
RawMessage(
|
|
||||||
role="assistant",
|
|
||||||
content="",
|
|
||||||
stop_reason=StopReason.end_of_message,
|
|
||||||
tool_calls=[
|
|
||||||
ToolCall(
|
|
||||||
call_id="call_id",
|
|
||||||
tool_name="trending_songs",
|
|
||||||
arguments={"n": "10", "genre": "latest"},
|
|
||||||
)
|
|
||||||
],
|
|
||||||
),
|
|
||||||
)
|
|
||||||
messages.append(
|
|
||||||
RawMessage(
|
|
||||||
role="assistant",
|
|
||||||
content=tool_response,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return messages
|
|
||||||
|
|
||||||
|
|
||||||
def llama3_2_user_assistant_conversation():
|
def llama3_2_user_assistant_conversation():
|
||||||
return UseCase(
|
return UseCase(
|
||||||
title="User and assistant conversation",
|
title="User and assistant conversation",
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
logger = get_logger(__name__, "tokenizer_utils")
|
logger = get_logger(__name__, "models")
|
||||||
|
|
||||||
|
|
||||||
def load_bpe_file(model_path: Path) -> dict[bytes, int]:
|
def load_bpe_file(model_path: Path) -> dict[bytes, int]:
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
|
||||||
deps[Api.tool_runtime],
|
deps[Api.tool_runtime],
|
||||||
deps[Api.tool_groups],
|
deps[Api.tool_groups],
|
||||||
policy,
|
policy,
|
||||||
|
Api.telemetry in deps,
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -7,8 +7,6 @@
|
||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import secrets
|
|
||||||
import string
|
|
||||||
import uuid
|
import uuid
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
|
|
@ -84,11 +82,6 @@ from llama_stack.providers.utils.telemetry import tracing
|
||||||
from .persistence import AgentPersistence
|
from .persistence import AgentPersistence
|
||||||
from .safety import SafetyException, ShieldRunnerMixin
|
from .safety import SafetyException, ShieldRunnerMixin
|
||||||
|
|
||||||
|
|
||||||
def make_random_string(length: int = 8):
|
|
||||||
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
|
|
||||||
|
|
||||||
|
|
||||||
TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
|
TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
|
||||||
MEMORY_QUERY_TOOL = "knowledge_search"
|
MEMORY_QUERY_TOOL = "knowledge_search"
|
||||||
WEB_SEARCH_TOOL = "web_search"
|
WEB_SEARCH_TOOL = "web_search"
|
||||||
|
|
@ -110,6 +103,7 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
persistence_store: KVStore,
|
persistence_store: KVStore,
|
||||||
created_at: str,
|
created_at: str,
|
||||||
policy: list[AccessRule],
|
policy: list[AccessRule],
|
||||||
|
telemetry_enabled: bool = False,
|
||||||
):
|
):
|
||||||
self.agent_id = agent_id
|
self.agent_id = agent_id
|
||||||
self.agent_config = agent_config
|
self.agent_config = agent_config
|
||||||
|
|
@ -120,6 +114,7 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
self.tool_runtime_api = tool_runtime_api
|
self.tool_runtime_api = tool_runtime_api
|
||||||
self.tool_groups_api = tool_groups_api
|
self.tool_groups_api = tool_groups_api
|
||||||
self.created_at = created_at
|
self.created_at = created_at
|
||||||
|
self.telemetry_enabled = telemetry_enabled
|
||||||
|
|
||||||
ShieldRunnerMixin.__init__(
|
ShieldRunnerMixin.__init__(
|
||||||
self,
|
self,
|
||||||
|
|
@ -188,28 +183,30 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
|
|
||||||
async def create_and_execute_turn(self, request: AgentTurnCreateRequest) -> AsyncGenerator:
|
async def create_and_execute_turn(self, request: AgentTurnCreateRequest) -> AsyncGenerator:
|
||||||
turn_id = str(uuid.uuid4())
|
turn_id = str(uuid.uuid4())
|
||||||
span = tracing.get_current_span()
|
if self.telemetry_enabled:
|
||||||
if span:
|
span = tracing.get_current_span()
|
||||||
span.set_attribute("session_id", request.session_id)
|
if span is not None:
|
||||||
span.set_attribute("agent_id", self.agent_id)
|
span.set_attribute("session_id", request.session_id)
|
||||||
span.set_attribute("request", request.model_dump_json())
|
span.set_attribute("agent_id", self.agent_id)
|
||||||
span.set_attribute("turn_id", turn_id)
|
span.set_attribute("request", request.model_dump_json())
|
||||||
if self.agent_config.name:
|
span.set_attribute("turn_id", turn_id)
|
||||||
span.set_attribute("agent_name", self.agent_config.name)
|
if self.agent_config.name:
|
||||||
|
span.set_attribute("agent_name", self.agent_config.name)
|
||||||
|
|
||||||
await self._initialize_tools(request.toolgroups)
|
await self._initialize_tools(request.toolgroups)
|
||||||
async for chunk in self._run_turn(request, turn_id):
|
async for chunk in self._run_turn(request, turn_id):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
async def resume_turn(self, request: AgentTurnResumeRequest) -> AsyncGenerator:
|
async def resume_turn(self, request: AgentTurnResumeRequest) -> AsyncGenerator:
|
||||||
span = tracing.get_current_span()
|
if self.telemetry_enabled:
|
||||||
if span:
|
span = tracing.get_current_span()
|
||||||
span.set_attribute("agent_id", self.agent_id)
|
if span is not None:
|
||||||
span.set_attribute("session_id", request.session_id)
|
span.set_attribute("agent_id", self.agent_id)
|
||||||
span.set_attribute("request", request.model_dump_json())
|
span.set_attribute("session_id", request.session_id)
|
||||||
span.set_attribute("turn_id", request.turn_id)
|
span.set_attribute("request", request.model_dump_json())
|
||||||
if self.agent_config.name:
|
span.set_attribute("turn_id", request.turn_id)
|
||||||
span.set_attribute("agent_name", self.agent_config.name)
|
if self.agent_config.name:
|
||||||
|
span.set_attribute("agent_name", self.agent_config.name)
|
||||||
|
|
||||||
await self._initialize_tools()
|
await self._initialize_tools()
|
||||||
async for chunk in self._run_turn(request):
|
async for chunk in self._run_turn(request):
|
||||||
|
|
@ -395,9 +392,12 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
touchpoint: str,
|
touchpoint: str,
|
||||||
) -> AsyncGenerator:
|
) -> AsyncGenerator:
|
||||||
async with tracing.span("run_shields") as span:
|
async with tracing.span("run_shields") as span:
|
||||||
span.set_attribute("input", [m.model_dump_json() for m in messages])
|
if self.telemetry_enabled and span is not None:
|
||||||
|
span.set_attribute("input", [m.model_dump_json() for m in messages])
|
||||||
|
if len(shields) == 0:
|
||||||
|
span.set_attribute("output", "no shields")
|
||||||
|
|
||||||
if len(shields) == 0:
|
if len(shields) == 0:
|
||||||
span.set_attribute("output", "no shields")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
step_id = str(uuid.uuid4())
|
step_id = str(uuid.uuid4())
|
||||||
|
|
@ -430,7 +430,8 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
span.set_attribute("output", e.violation.model_dump_json())
|
if self.telemetry_enabled and span is not None:
|
||||||
|
span.set_attribute("output", e.violation.model_dump_json())
|
||||||
|
|
||||||
yield CompletionMessage(
|
yield CompletionMessage(
|
||||||
content=str(e),
|
content=str(e),
|
||||||
|
|
@ -453,7 +454,8 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
span.set_attribute("output", "no violations")
|
if self.telemetry_enabled and span is not None:
|
||||||
|
span.set_attribute("output", "no violations")
|
||||||
|
|
||||||
async def _run(
|
async def _run(
|
||||||
self,
|
self,
|
||||||
|
|
@ -518,8 +520,9 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
stop_reason: StopReason | None = None
|
stop_reason: StopReason | None = None
|
||||||
|
|
||||||
async with tracing.span("inference") as span:
|
async with tracing.span("inference") as span:
|
||||||
if self.agent_config.name:
|
if self.telemetry_enabled and span is not None:
|
||||||
span.set_attribute("agent_name", self.agent_config.name)
|
if self.agent_config.name:
|
||||||
|
span.set_attribute("agent_name", self.agent_config.name)
|
||||||
|
|
||||||
def _serialize_nested(value):
|
def _serialize_nested(value):
|
||||||
"""Recursively serialize nested Pydantic models to dicts."""
|
"""Recursively serialize nested Pydantic models to dicts."""
|
||||||
|
|
@ -637,18 +640,19 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unexpected delta type {type(delta)}")
|
raise ValueError(f"Unexpected delta type {type(delta)}")
|
||||||
|
|
||||||
span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn)
|
if self.telemetry_enabled and span is not None:
|
||||||
span.set_attribute(
|
span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn)
|
||||||
"input",
|
span.set_attribute(
|
||||||
json.dumps([json.loads(m.model_dump_json()) for m in input_messages]),
|
"input",
|
||||||
)
|
json.dumps([json.loads(m.model_dump_json()) for m in input_messages]),
|
||||||
output_attr = json.dumps(
|
)
|
||||||
{
|
output_attr = json.dumps(
|
||||||
"content": content,
|
{
|
||||||
"tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
|
"content": content,
|
||||||
}
|
"tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
|
||||||
)
|
}
|
||||||
span.set_attribute("output", output_attr)
|
)
|
||||||
|
span.set_attribute("output", output_attr)
|
||||||
|
|
||||||
n_iter += 1
|
n_iter += 1
|
||||||
await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter)
|
await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter)
|
||||||
|
|
@ -756,7 +760,9 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
{
|
{
|
||||||
"tool_name": tool_call.tool_name,
|
"tool_name": tool_call.tool_name,
|
||||||
"input": message.model_dump_json(),
|
"input": message.model_dump_json(),
|
||||||
},
|
}
|
||||||
|
if self.telemetry_enabled
|
||||||
|
else {},
|
||||||
) as span:
|
) as span:
|
||||||
tool_execution_start_time = datetime.now(UTC).isoformat()
|
tool_execution_start_time = datetime.now(UTC).isoformat()
|
||||||
tool_result = await self.execute_tool_call_maybe(
|
tool_result = await self.execute_tool_call_maybe(
|
||||||
|
|
@ -771,7 +777,8 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
call_id=tool_call.call_id,
|
call_id=tool_call.call_id,
|
||||||
content=tool_result.content,
|
content=tool_result.content,
|
||||||
)
|
)
|
||||||
span.set_attribute("output", result_message.model_dump_json())
|
if self.telemetry_enabled and span is not None:
|
||||||
|
span.set_attribute("output", result_message.model_dump_json())
|
||||||
|
|
||||||
# Store tool execution step
|
# Store tool execution step
|
||||||
tool_execution_step = ToolExecutionStep(
|
tool_execution_step = ToolExecutionStep(
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,7 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
tool_runtime_api: ToolRuntime,
|
tool_runtime_api: ToolRuntime,
|
||||||
tool_groups_api: ToolGroups,
|
tool_groups_api: ToolGroups,
|
||||||
policy: list[AccessRule],
|
policy: list[AccessRule],
|
||||||
|
telemetry_enabled: bool = False,
|
||||||
):
|
):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
|
|
@ -71,6 +72,7 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
self.safety_api = safety_api
|
self.safety_api = safety_api
|
||||||
self.tool_runtime_api = tool_runtime_api
|
self.tool_runtime_api = tool_runtime_api
|
||||||
self.tool_groups_api = tool_groups_api
|
self.tool_groups_api = tool_groups_api
|
||||||
|
self.telemetry_enabled = telemetry_enabled
|
||||||
|
|
||||||
self.in_memory_store = InmemoryKVStoreImpl()
|
self.in_memory_store = InmemoryKVStoreImpl()
|
||||||
self.openai_responses_impl: OpenAIResponsesImpl | None = None
|
self.openai_responses_impl: OpenAIResponsesImpl | None = None
|
||||||
|
|
@ -135,6 +137,7 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
),
|
),
|
||||||
created_at=agent_info.created_at,
|
created_at=agent_info.created_at,
|
||||||
policy=self.policy,
|
policy=self.policy,
|
||||||
|
telemetry_enabled=self.telemetry_enabled,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def create_agent_session(
|
async def create_agent_session(
|
||||||
|
|
|
||||||
|
|
@ -269,7 +269,7 @@ class OpenAIResponsesImpl:
|
||||||
response_tools=tools,
|
response_tools=tools,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
response_format=response_format,
|
response_format=response_format,
|
||||||
inputs=input,
|
inputs=all_input,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create orchestrator and delegate streaming logic
|
# Create orchestrator and delegate streaming logic
|
||||||
|
|
|
||||||
|
|
@ -97,6 +97,8 @@ class StreamingResponseOrchestrator:
|
||||||
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = {}
|
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = {}
|
||||||
# Track final messages after all tool executions
|
# Track final messages after all tool executions
|
||||||
self.final_messages: list[OpenAIMessageParam] = []
|
self.final_messages: list[OpenAIMessageParam] = []
|
||||||
|
# mapping for annotations
|
||||||
|
self.citation_files: dict[str, str] = {}
|
||||||
|
|
||||||
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||||
# Initialize output messages
|
# Initialize output messages
|
||||||
|
|
@ -126,6 +128,7 @@ class StreamingResponseOrchestrator:
|
||||||
# Text is the default response format for chat completion so don't need to pass it
|
# Text is the default response format for chat completion so don't need to pass it
|
||||||
# (some providers don't support non-empty response_format when tools are present)
|
# (some providers don't support non-empty response_format when tools are present)
|
||||||
response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
|
response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
|
||||||
|
logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}")
|
||||||
completion_result = await self.inference_api.openai_chat_completion(
|
completion_result = await self.inference_api.openai_chat_completion(
|
||||||
model=self.ctx.model,
|
model=self.ctx.model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
|
@ -160,7 +163,7 @@ class StreamingResponseOrchestrator:
|
||||||
# Handle choices with no tool calls
|
# Handle choices with no tool calls
|
||||||
for choice in current_response.choices:
|
for choice in current_response.choices:
|
||||||
if not (choice.message.tool_calls and self.ctx.response_tools):
|
if not (choice.message.tool_calls and self.ctx.response_tools):
|
||||||
output_messages.append(await convert_chat_choice_to_response_message(choice))
|
output_messages.append(await convert_chat_choice_to_response_message(choice, self.citation_files))
|
||||||
|
|
||||||
# Execute tool calls and coordinate results
|
# Execute tool calls and coordinate results
|
||||||
async for stream_event in self._coordinate_tool_execution(
|
async for stream_event in self._coordinate_tool_execution(
|
||||||
|
|
@ -172,6 +175,8 @@ class StreamingResponseOrchestrator:
|
||||||
):
|
):
|
||||||
yield stream_event
|
yield stream_event
|
||||||
|
|
||||||
|
messages = next_turn_messages
|
||||||
|
|
||||||
if not function_tool_calls and not non_function_tool_calls:
|
if not function_tool_calls and not non_function_tool_calls:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
@ -184,9 +189,7 @@ class StreamingResponseOrchestrator:
|
||||||
logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}")
|
logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}")
|
||||||
break
|
break
|
||||||
|
|
||||||
messages = next_turn_messages
|
self.final_messages = messages.copy()
|
||||||
|
|
||||||
self.final_messages = messages.copy() + [current_response.choices[0].message]
|
|
||||||
|
|
||||||
# Create final response
|
# Create final response
|
||||||
final_response = OpenAIResponseObject(
|
final_response = OpenAIResponseObject(
|
||||||
|
|
@ -211,6 +214,8 @@ class StreamingResponseOrchestrator:
|
||||||
|
|
||||||
for choice in current_response.choices:
|
for choice in current_response.choices:
|
||||||
next_turn_messages.append(choice.message)
|
next_turn_messages.append(choice.message)
|
||||||
|
logger.debug(f"Choice message content: {choice.message.content}")
|
||||||
|
logger.debug(f"Choice message tool_calls: {choice.message.tool_calls}")
|
||||||
|
|
||||||
if choice.message.tool_calls and self.ctx.response_tools:
|
if choice.message.tool_calls and self.ctx.response_tools:
|
||||||
for tool_call in choice.message.tool_calls:
|
for tool_call in choice.message.tool_calls:
|
||||||
|
|
@ -227,9 +232,11 @@ class StreamingResponseOrchestrator:
|
||||||
non_function_tool_calls.append(tool_call)
|
non_function_tool_calls.append(tool_call)
|
||||||
else:
|
else:
|
||||||
logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}")
|
logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}")
|
||||||
|
next_turn_messages.pop()
|
||||||
else:
|
else:
|
||||||
logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}")
|
logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}")
|
||||||
approvals.append(tool_call)
|
approvals.append(tool_call)
|
||||||
|
next_turn_messages.pop()
|
||||||
else:
|
else:
|
||||||
non_function_tool_calls.append(tool_call)
|
non_function_tool_calls.append(tool_call)
|
||||||
|
|
||||||
|
|
@ -470,6 +477,8 @@ class StreamingResponseOrchestrator:
|
||||||
tool_call_log = result.final_output_message
|
tool_call_log = result.final_output_message
|
||||||
tool_response_message = result.final_input_message
|
tool_response_message = result.final_input_message
|
||||||
self.sequence_number = result.sequence_number
|
self.sequence_number = result.sequence_number
|
||||||
|
if result.citation_files:
|
||||||
|
self.citation_files.update(result.citation_files)
|
||||||
|
|
||||||
if tool_call_log:
|
if tool_call_log:
|
||||||
output_messages.append(tool_call_log)
|
output_messages.append(tool_call_log)
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,10 @@ class ToolExecutor:
|
||||||
|
|
||||||
# Yield the final result
|
# Yield the final result
|
||||||
yield ToolExecutionResult(
|
yield ToolExecutionResult(
|
||||||
sequence_number=sequence_number, final_output_message=output_message, final_input_message=input_message
|
sequence_number=sequence_number,
|
||||||
|
final_output_message=output_message,
|
||||||
|
final_input_message=input_message,
|
||||||
|
citation_files=result.metadata.get("citation_files") if result and result.metadata else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _execute_knowledge_search_via_vector_store(
|
async def _execute_knowledge_search_via_vector_store(
|
||||||
|
|
@ -129,8 +132,6 @@ class ToolExecutor:
|
||||||
for results in all_results:
|
for results in all_results:
|
||||||
search_results.extend(results)
|
search_results.extend(results)
|
||||||
|
|
||||||
# Convert search results to tool result format matching memory.py
|
|
||||||
# Format the results as interleaved content similar to memory.py
|
|
||||||
content_items = []
|
content_items = []
|
||||||
content_items.append(
|
content_items.append(
|
||||||
TextContentItem(
|
TextContentItem(
|
||||||
|
|
@ -138,27 +139,58 @@ class ToolExecutor:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
unique_files = set()
|
||||||
for i, result_item in enumerate(search_results):
|
for i, result_item in enumerate(search_results):
|
||||||
chunk_text = result_item.content[0].text if result_item.content else ""
|
chunk_text = result_item.content[0].text if result_item.content else ""
|
||||||
metadata_text = f"document_id: {result_item.file_id}, score: {result_item.score}"
|
# Get file_id from attributes if result_item.file_id is empty
|
||||||
|
file_id = result_item.file_id or (
|
||||||
|
result_item.attributes.get("document_id") if result_item.attributes else None
|
||||||
|
)
|
||||||
|
metadata_text = f"document_id: {file_id}, score: {result_item.score}"
|
||||||
if result_item.attributes:
|
if result_item.attributes:
|
||||||
metadata_text += f", attributes: {result_item.attributes}"
|
metadata_text += f", attributes: {result_item.attributes}"
|
||||||
text_content = f"[{i + 1}] {metadata_text}\n{chunk_text}\n"
|
|
||||||
|
text_content = f"[{i + 1}] {metadata_text} (cite as <|{file_id}|>)\n{chunk_text}\n"
|
||||||
content_items.append(TextContentItem(text=text_content))
|
content_items.append(TextContentItem(text=text_content))
|
||||||
|
unique_files.add(file_id)
|
||||||
|
|
||||||
content_items.append(TextContentItem(text="END of knowledge_search tool results.\n"))
|
content_items.append(TextContentItem(text="END of knowledge_search tool results.\n"))
|
||||||
|
|
||||||
|
citation_instruction = ""
|
||||||
|
if unique_files:
|
||||||
|
citation_instruction = (
|
||||||
|
" Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). "
|
||||||
|
"Do not add extra punctuation. Use only the file IDs provided (do not invent new ones)."
|
||||||
|
)
|
||||||
|
|
||||||
content_items.append(
|
content_items.append(
|
||||||
TextContentItem(
|
TextContentItem(
|
||||||
text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.\n',
|
text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{citation_instruction}\n',
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# handling missing attributes for old versions
|
||||||
|
citation_files = {}
|
||||||
|
for result in search_results:
|
||||||
|
file_id = result.file_id
|
||||||
|
if not file_id and result.attributes:
|
||||||
|
file_id = result.attributes.get("document_id")
|
||||||
|
|
||||||
|
filename = result.filename
|
||||||
|
if not filename and result.attributes:
|
||||||
|
filename = result.attributes.get("filename")
|
||||||
|
if not filename:
|
||||||
|
filename = "unknown"
|
||||||
|
|
||||||
|
citation_files[file_id] = filename
|
||||||
|
|
||||||
return ToolInvocationResult(
|
return ToolInvocationResult(
|
||||||
content=content_items,
|
content=content_items,
|
||||||
metadata={
|
metadata={
|
||||||
"document_ids": [r.file_id for r in search_results],
|
"document_ids": [r.file_id for r in search_results],
|
||||||
"chunks": [r.content[0].text if r.content else "" for r in search_results],
|
"chunks": [r.content[0].text if r.content else "" for r in search_results],
|
||||||
"scores": [r.score for r in search_results],
|
"scores": [r.score for r in search_results],
|
||||||
|
"citation_files": citation_files,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ class ToolExecutionResult(BaseModel):
|
||||||
sequence_number: int
|
sequence_number: int
|
||||||
final_output_message: OpenAIResponseOutput | None = None
|
final_output_message: OpenAIResponseOutput | None = None
|
||||||
final_input_message: OpenAIMessageParam | None = None
|
final_input_message: OpenAIMessageParam | None = None
|
||||||
|
citation_files: dict[str, str] | None = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,11 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import re
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from llama_stack.apis.agents.openai_responses import (
|
from llama_stack.apis.agents.openai_responses import (
|
||||||
|
OpenAIResponseAnnotationFileCitation,
|
||||||
OpenAIResponseInput,
|
OpenAIResponseInput,
|
||||||
OpenAIResponseInputFunctionToolCallOutput,
|
OpenAIResponseInputFunctionToolCallOutput,
|
||||||
OpenAIResponseInputMessageContent,
|
OpenAIResponseInputMessageContent,
|
||||||
|
|
@ -45,7 +47,9 @@ from llama_stack.apis.inference import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenAIResponseMessage:
|
async def convert_chat_choice_to_response_message(
|
||||||
|
choice: OpenAIChoice, citation_files: dict[str, str] | None = None
|
||||||
|
) -> OpenAIResponseMessage:
|
||||||
"""Convert an OpenAI Chat Completion choice into an OpenAI Response output message."""
|
"""Convert an OpenAI Chat Completion choice into an OpenAI Response output message."""
|
||||||
output_content = ""
|
output_content = ""
|
||||||
if isinstance(choice.message.content, str):
|
if isinstance(choice.message.content, str):
|
||||||
|
|
@ -57,9 +61,11 @@ async def convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenA
|
||||||
f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}"
|
f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
annotations, clean_text = _extract_citations_from_text(output_content, citation_files or {})
|
||||||
|
|
||||||
return OpenAIResponseMessage(
|
return OpenAIResponseMessage(
|
||||||
id=f"msg_{uuid.uuid4()}",
|
id=f"msg_{uuid.uuid4()}",
|
||||||
content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
|
content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=annotations)],
|
||||||
status="completed",
|
status="completed",
|
||||||
role="assistant",
|
role="assistant",
|
||||||
)
|
)
|
||||||
|
|
@ -200,6 +206,53 @@ async def get_message_type_by_role(role: str):
|
||||||
return role_to_type.get(role)
|
return role_to_type.get(role)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_citations_from_text(
|
||||||
|
text: str, citation_files: dict[str, str]
|
||||||
|
) -> tuple[list[OpenAIResponseAnnotationFileCitation], str]:
|
||||||
|
"""Extract citation markers from text and create annotations
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The text containing citation markers like [file-Cn3MSNn72ENTiiq11Qda4A]
|
||||||
|
citation_files: Dictionary mapping file_id to filename
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (annotations_list, clean_text_without_markers)
|
||||||
|
"""
|
||||||
|
file_id_regex = re.compile(r"<\|(?P<file_id>file-[A-Za-z0-9_-]+)\|>")
|
||||||
|
|
||||||
|
annotations = []
|
||||||
|
parts = []
|
||||||
|
total_len = 0
|
||||||
|
last_end = 0
|
||||||
|
|
||||||
|
for m in file_id_regex.finditer(text):
|
||||||
|
# segment before the marker
|
||||||
|
prefix = text[last_end : m.start()]
|
||||||
|
|
||||||
|
# drop one space if it exists (since marker is at sentence end)
|
||||||
|
if prefix.endswith(" "):
|
||||||
|
prefix = prefix[:-1]
|
||||||
|
|
||||||
|
parts.append(prefix)
|
||||||
|
total_len += len(prefix)
|
||||||
|
|
||||||
|
fid = m.group(1)
|
||||||
|
if fid in citation_files:
|
||||||
|
annotations.append(
|
||||||
|
OpenAIResponseAnnotationFileCitation(
|
||||||
|
file_id=fid,
|
||||||
|
filename=citation_files[fid],
|
||||||
|
index=total_len, # index points to punctuation
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
last_end = m.end()
|
||||||
|
|
||||||
|
parts.append(text[last_end:])
|
||||||
|
cleaned_text = "".join(parts)
|
||||||
|
return annotations, cleaned_text
|
||||||
|
|
||||||
|
|
||||||
def is_function_tool_call(
|
def is_function_tool_call(
|
||||||
tool_call: OpenAIChatCompletionToolCall,
|
tool_call: OpenAIChatCompletionToolCall,
|
||||||
tools: list[OpenAIResponseInputTool],
|
tools: list[OpenAIResponseInputTool],
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,6 @@ import asyncio
|
||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import secrets
|
|
||||||
import string
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
@ -52,10 +50,6 @@ from .context_retriever import generate_rag_query
|
||||||
log = get_logger(name=__name__, category="tool_runtime")
|
log = get_logger(name=__name__, category="tool_runtime")
|
||||||
|
|
||||||
|
|
||||||
def make_random_string(length: int = 8):
|
|
||||||
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
|
|
||||||
|
|
||||||
|
|
||||||
async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
||||||
"""Get raw binary data and mime type from a RAGDocument for file upload."""
|
"""Get raw binary data and mime type from a RAGDocument for file upload."""
|
||||||
if isinstance(doc.content, URL):
|
if isinstance(doc.content, URL):
|
||||||
|
|
@ -331,5 +325,8 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
||||||
|
|
||||||
return ToolInvocationResult(
|
return ToolInvocationResult(
|
||||||
content=result.content or [],
|
content=result.content or [],
|
||||||
metadata=result.metadata,
|
metadata={
|
||||||
|
**(result.metadata or {}),
|
||||||
|
"citation_files": getattr(result, "citation_files", None),
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -200,12 +200,10 @@ class FaissIndex(EmbeddingIndex):
|
||||||
|
|
||||||
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||||
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
|
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
|
||||||
|
super().__init__(files_api=files_api, kvstore=None)
|
||||||
self.config = config
|
self.config = config
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.files_api = files_api
|
|
||||||
self.cache: dict[str, VectorDBWithIndex] = {}
|
self.cache: dict[str, VectorDBWithIndex] = {}
|
||||||
self.kvstore: KVStore | None = None
|
|
||||||
self.openai_vector_stores: dict[str, dict[str, Any]] = {}
|
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
self.kvstore = await kvstore_impl(self.config.kvstore)
|
self.kvstore = await kvstore_impl(self.config.kvstore)
|
||||||
|
|
@ -227,8 +225,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
|
||||||
await self.initialize_openai_vector_stores()
|
await self.initialize_openai_vector_stores()
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
# Cleanup if needed
|
# Clean up mixin resources (file batch tasks)
|
||||||
pass
|
await super().shutdown()
|
||||||
|
|
||||||
async def health(self) -> HealthResponse:
|
async def health(self) -> HealthResponse:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue