mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 18:00:36 +00:00
Merge branch 'llamastack:main' into llama_stack_how_to_documentation
This commit is contained in:
commit
c83343de84
1030 changed files with 249861 additions and 104196 deletions
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
|
@ -2,4 +2,4 @@
|
|||
|
||||
# These owners will be the default owners for everything in
|
||||
# the repo. Unless a later match takes precedence,
|
||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1
|
||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
|
||||
|
|
|
|||
1
.github/TRIAGERS.md
vendored
1
.github/TRIAGERS.md
vendored
|
|
@ -1,2 +1 @@
|
|||
# This file documents Triage members in the Llama Stack community
|
||||
@franciscojavierarceo
|
||||
|
|
|
|||
4
.github/workflows/integration-auth-tests.yml
vendored
4
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -85,8 +85,8 @@ jobs:
|
|||
cat $run_dir/run.yaml
|
||||
|
||||
# avoid line breaks in the server log, especially because we grep it below.
|
||||
export COLUMNS=1984
|
||||
nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
|
||||
export LLAMA_STACK_LOG_WIDTH=200
|
||||
nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 &
|
||||
|
||||
- name: Wait for Llama Stack server to be ready
|
||||
run: |
|
||||
|
|
|
|||
25
.github/workflows/integration-tests.yml
vendored
25
.github/workflows/integration-tests.yml
vendored
|
|
@ -42,18 +42,27 @@ jobs:
|
|||
|
||||
run-replay-mode-tests:
|
||||
runs-on: ubuntu-latest
|
||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }}
|
||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
client-type: [library, server]
|
||||
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
|
||||
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
|
||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||
suite: [base, vision]
|
||||
# Define (setup, suite) pairs - they are always matched and cannot be independent
|
||||
# Weekly schedule (Sun 1 AM): vllm+base
|
||||
# Input test-setup=ollama-vision: ollama-vision+vision
|
||||
# Default (including test-setup=ollama): both ollama+base and ollama-vision+vision
|
||||
config: >-
|
||||
${{
|
||||
github.event.schedule == '1 0 * * 0'
|
||||
&& fromJSON('[{"setup": "vllm", "suite": "base"}]')
|
||||
|| github.event.inputs.test-setup == 'ollama-vision'
|
||||
&& fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
|
||||
|| fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]')
|
||||
}}
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
|
|
@ -64,14 +73,14 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
client-version: ${{ matrix.client-version }}
|
||||
setup: ${{ matrix.setup }}
|
||||
suite: ${{ matrix.suite }}
|
||||
setup: ${{ matrix.config.setup }}
|
||||
suite: ${{ matrix.config.suite }}
|
||||
inference-mode: 'replay'
|
||||
|
||||
- name: Run tests
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
with:
|
||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||
setup: ${{ matrix.setup }}
|
||||
setup: ${{ matrix.config.setup }}
|
||||
inference-mode: 'replay'
|
||||
suite: ${{ matrix.suite }}
|
||||
suite: ${{ matrix.config.suite }}
|
||||
|
|
|
|||
12
.github/workflows/precommit-trigger.yml
vendored
12
.github/workflows/precommit-trigger.yml
vendored
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
steps:
|
||||
- name: Check comment author and get PR details
|
||||
id: check_author
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
|
@ -78,7 +78,7 @@ jobs:
|
|||
|
||||
- name: React to comment
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
|
@ -91,7 +91,7 @@ jobs:
|
|||
|
||||
- name: Comment starting
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
|
@ -189,7 +189,7 @@ jobs:
|
|||
|
||||
- name: Comment success with changes
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
|
@ -202,7 +202,7 @@ jobs:
|
|||
|
||||
- name: Comment success without changes
|
||||
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success'
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
|
@ -215,7 +215,7 @@ jobs:
|
|||
|
||||
- name: Comment failure
|
||||
if: failure()
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
|
|
|||
4
.github/workflows/providers-build.yml
vendored
4
.github/workflows/providers-build.yml
vendored
|
|
@ -112,7 +112,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -150,7 +150,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
|||
2
.github/workflows/python-build-test.yml
vendored
2
.github/workflows/python-build-test.yml
vendored
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@b75a909f75acd358c2196fb9a5f1299a9a8868a4 # v6.7.0
|
||||
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6.8.0
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
activate-environment: true
|
||||
|
|
|
|||
2
.github/workflows/stale_bot.yml
vendored
2
.github/workflows/stale_bot.yml
vendored
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stale Action
|
||||
uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
|
||||
uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
|
||||
with:
|
||||
stale-issue-label: 'stale'
|
||||
stale-issue-message: >
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ jobs:
|
|||
# Use the virtual environment created by the build step (name comes from build config)
|
||||
source ramalama-stack-test/bin/activate
|
||||
uv pip list
|
||||
nohup llama stack run tests/external/ramalama-stack/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
|
||||
nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 &
|
||||
|
||||
- name: Wait for Llama Stack server to be ready
|
||||
run: |
|
||||
|
|
|
|||
2
.github/workflows/test-external.yml
vendored
2
.github/workflows/test-external.yml
vendored
|
|
@ -59,7 +59,7 @@ jobs:
|
|||
# Use the virtual environment created by the build step (name comes from build config)
|
||||
source ci-test/bin/activate
|
||||
uv pip list
|
||||
nohup llama stack run tests/external/run-byoa.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
|
||||
nohup llama stack run tests/external/run-byoa.yaml > server.log 2>&1 &
|
||||
|
||||
- name: Wait for Llama Stack server to be ready
|
||||
run: |
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
||||
|
||||
[**Quick Start**](https://llamastack.github.io/latest/getting_started/index.html) | [**Documentation**](https://llamastack.github.io/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||
[**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||
|
||||
|
||||
### ✨🎉 Llama 4 Support 🎉✨
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ You can access the HuggingFace trainer via the `starter` distribution:
|
|||
|
||||
```bash
|
||||
llama stack build --distro starter --image-type venv
|
||||
llama stack run --image-type venv ~/.llama/distributions/starter/starter-run.yaml
|
||||
llama stack run ~/.llama/distributions/starter/starter-run.yaml
|
||||
```
|
||||
|
||||
### Usage Example
|
||||
|
|
|
|||
|
|
@ -187,21 +187,21 @@ Configure telemetry behavior using environment variables:
|
|||
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
|
||||
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `console,sqlite`)
|
||||
|
||||
## Visualization with Jaeger
|
||||
### Quick Setup: Complete Telemetry Stack
|
||||
|
||||
The `otel_trace` sink works with any service compatible with the OpenTelemetry collector. Traces and metrics use separate endpoints but can share the same collector.
|
||||
|
||||
### Starting Jaeger
|
||||
|
||||
Start a Jaeger instance with OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686:
|
||||
Use the automated setup script to launch the complete telemetry stack (Jaeger, OpenTelemetry Collector, Prometheus, and Grafana):
|
||||
|
||||
```bash
|
||||
docker run --pull always --rm --name jaeger \
|
||||
-p 16686:16686 -p 4318:4318 \
|
||||
jaegertracing/jaeger:2.1.0
|
||||
./scripts/telemetry/setup_telemetry.sh
|
||||
```
|
||||
|
||||
Once running, you can visualize traces by navigating to [http://localhost:16686/](http://localhost:16686/).
|
||||
This sets up:
|
||||
- **Jaeger UI**: http://localhost:16686 (traces visualization)
|
||||
- **Prometheus**: http://localhost:9090 (metrics)
|
||||
- **Grafana**: http://localhost:3000 (dashboards with auto-configured data sources)
|
||||
- **OTEL Collector**: http://localhost:4318 (OTLP endpoint)
|
||||
|
||||
Once running, you can visualize traces by navigating to [Grafana](http://localhost:3000/) and login with login `admin` and password `admin`.
|
||||
|
||||
## Querying Metrics
|
||||
|
||||
|
|
|
|||
|
|
@ -219,13 +219,10 @@ group_tools = client.tools.list_tools(toolgroup_id="search_tools")
|
|||
<TabItem value="setup" label="Setup & Configuration">
|
||||
|
||||
1. Start by registering a Tavily API key at [Tavily](https://tavily.com/).
|
||||
2. [Optional] Provide the API key directly to the Llama Stack server
|
||||
2. [Optional] Set the API key in your environment before starting the Llama Stack server
|
||||
```bash
|
||||
export TAVILY_SEARCH_API_KEY="your key"
|
||||
```
|
||||
```bash
|
||||
--env TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY}
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="implementation" label="Implementation">
|
||||
|
|
@ -273,9 +270,9 @@ for log in EventLogger().log(response):
|
|||
<TabItem value="setup" label="Setup & Configuration">
|
||||
|
||||
1. Start by registering for a WolframAlpha API key at [WolframAlpha Developer Portal](https://developer.wolframalpha.com/access).
|
||||
2. Provide the API key either when starting the Llama Stack server:
|
||||
2. Provide the API key either by setting it in your environment before starting the Llama Stack server:
|
||||
```bash
|
||||
--env WOLFRAM_ALPHA_API_KEY=${WOLFRAM_ALPHA_API_KEY}
|
||||
export WOLFRAM_ALPHA_API_KEY="your key"
|
||||
```
|
||||
or from the client side:
|
||||
```python
|
||||
|
|
|
|||
|
|
@ -357,7 +357,7 @@ server:
|
|||
8. Run the server:
|
||||
|
||||
```bash
|
||||
python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
|
||||
llama stack run ~/.llama/run-byoa.yaml
|
||||
```
|
||||
|
||||
9. Test the API:
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ Integration tests are located in [tests/integration](https://github.com/meta-lla
|
|||
Consult [tests/integration/README.md](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more details on how to run the tests.
|
||||
|
||||
Note that each provider's `sample_run_config()` method (in the configuration class for that provider)
|
||||
typically references some environment variables for specifying API keys and the like. You can set these in the environment or pass these via the `--env` flag to the test command.
|
||||
typically references some environment variables for specifying API keys and the like. You can set these in the environment before running the test command.
|
||||
|
||||
|
||||
### 2. Unit Testing
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ spec:
|
|||
- name: llama-stack
|
||||
image: localhost/llama-stack-run-k8s:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
|
||||
command: ["llama", "stack", "run", "/app/config.yaml"]
|
||||
ports:
|
||||
- containerPort: 5000
|
||||
volumeMounts:
|
||||
|
|
|
|||
|
|
@ -289,10 +289,10 @@ After this step is successful, you should be able to find the built container im
|
|||
docker run -d \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
-e OLLAMA_URL=http://host.docker.internal:11434 \
|
||||
localhost/distribution-ollama:dev \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env OLLAMA_URL=http://host.docker.internal:11434
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
Here are the docker flags and their uses:
|
||||
|
|
@ -305,12 +305,12 @@ Here are the docker flags and their uses:
|
|||
|
||||
* `localhost/distribution-ollama:dev`: The name and tag of the container image to run
|
||||
|
||||
* `-e INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the INFERENCE_MODEL environment variable in the container
|
||||
|
||||
* `-e OLLAMA_URL=http://host.docker.internal:11434`: Sets the OLLAMA_URL environment variable in the container
|
||||
|
||||
* `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
|
||||
|
||||
* `--env INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the model to use for inference
|
||||
|
||||
* `--env OLLAMA_URL=http://host.docker.internal:11434`: Configures the URL for the Ollama service
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
|
@ -320,23 +320,22 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
|
|||
|
||||
```
|
||||
llama stack run -h
|
||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE]
|
||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
|
||||
[--image-type {venv}] [--enable-ui]
|
||||
[config | template]
|
||||
[config | distro]
|
||||
|
||||
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
||||
|
||||
positional arguments:
|
||||
config | template Path to config file to use for the run or name of known template (`llama stack list` for a list). (default: None)
|
||||
config | distro Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None)
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
||||
--image-name IMAGE_NAME
|
||||
Name of the image to run. Defaults to the current environment (default: None)
|
||||
--env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: None)
|
||||
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||
--image-type {venv}
|
||||
Image Type used during the build. This should be venv. (default: None)
|
||||
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||
--enable-ui Start the UI server (default: False)
|
||||
```
|
||||
|
||||
|
|
@ -348,9 +347,6 @@ llama stack run tgi
|
|||
|
||||
# Start using config file
|
||||
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||
|
||||
# Start using a venv
|
||||
llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||
```
|
||||
|
||||
```
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ A few things to note:
|
|||
- The id is a string you can choose freely.
|
||||
- You can instantiate any number of provider instances of the same type.
|
||||
- The configuration dictionary is provider-specific.
|
||||
- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
|
||||
- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server, you can set environment variables in your shell before running `llama stack run` to override the default values.
|
||||
|
||||
### Environment Variable Substitution
|
||||
|
||||
|
|
@ -173,13 +173,10 @@ optional_token: ${env.OPTIONAL_TOKEN:+}
|
|||
|
||||
#### Runtime Override
|
||||
|
||||
You can override environment variables at runtime when starting the server:
|
||||
You can override environment variables at runtime by setting them in your shell before starting the server:
|
||||
|
||||
```bash
|
||||
# Override specific environment variables
|
||||
llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com
|
||||
|
||||
# Or set them in your shell
|
||||
# Set environment variables in your shell
|
||||
export API_KEY=sk-123
|
||||
export BASE_URL=https://custom-api.com
|
||||
llama stack run --config run.yaml
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ spec:
|
|||
value: "${SAFETY_MODEL}"
|
||||
- name: TAVILY_SEARCH_API_KEY
|
||||
value: "${TAVILY_SEARCH_API_KEY}"
|
||||
command: ["python", "-m", "llama_stack.core.server.server", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||
command: ["llama", "stack", "run", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||
ports:
|
||||
- containerPort: 8321
|
||||
volumeMounts:
|
||||
|
|
|
|||
|
|
@ -69,10 +69,10 @@ docker run \
|
|||
-it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
-e WATSONX_API_KEY=$WATSONX_API_KEY \
|
||||
-e WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
||||
-e WATSONX_BASE_URL=$WATSONX_BASE_URL \
|
||||
llamastack/distribution-watsonx \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env WATSONX_API_KEY=$WATSONX_API_KEY \
|
||||
--env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
||||
--env WATSONX_BASE_URL=$WATSONX_BASE_URL
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
|
|
|||
|
|
@ -129,11 +129,11 @@ docker run -it \
|
|||
# NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
|
||||
-v $HOME/git/llama-stack:/app/llama-stack-source -v $HOME/git/llama-models:/app/llama-models-source \
|
||||
# localhost/distribution-dell:dev if building / testing locally
|
||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
-e DEH_URL=$DEH_URL \
|
||||
-e CHROMA_URL=$CHROMA_URL \
|
||||
llamastack/distribution-dell \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
--port $LLAMA_STACK_PORT
|
||||
|
||||
```
|
||||
|
||||
|
|
@ -154,14 +154,14 @@ docker run \
|
|||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
-e DEH_URL=$DEH_URL \
|
||||
-e SAFETY_MODEL=$SAFETY_MODEL \
|
||||
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||
-e CHROMA_URL=$CHROMA_URL \
|
||||
llamastack/distribution-dell \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||
--env DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
### Via venv
|
||||
|
|
@ -170,21 +170,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
|
|||
|
||||
```bash
|
||||
llama stack build --distro dell --image-type venv
|
||||
llama stack run dell
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
DEH_URL=$DEH_URL \
|
||||
CHROMA_URL=$CHROMA_URL \
|
||||
llama stack run dell \
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
||||
```bash
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
DEH_URL=$DEH_URL \
|
||||
SAFETY_MODEL=$SAFETY_MODEL \
|
||||
DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||
CHROMA_URL=$CHROMA_URL \
|
||||
llama stack run ./run-with-safety.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||
--env DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
|
|
|||
|
|
@ -84,9 +84,9 @@ docker run \
|
|||
--gpu all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
|
@ -98,10 +98,10 @@ docker run \
|
|||
--gpu all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
-e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
### Via venv
|
||||
|
|
@ -110,16 +110,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
|
|||
|
||||
```bash
|
||||
llama stack build --distro meta-reference-gpu --image-type venv
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
||||
--port 8321 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
--port 8321
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
||||
```bash
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
|
||||
llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
|
||||
--port 8321 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
--port 8321
|
||||
```
|
||||
|
|
|
|||
|
|
@ -129,10 +129,10 @@ docker run \
|
|||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
llamastack/distribution-nvidia \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
### Via venv
|
||||
|
|
@ -142,10 +142,10 @@ If you've set up your local development environment, you can also build the imag
|
|||
```bash
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
llama stack build --distro nvidia --image-type venv
|
||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
llama stack run ./run.yaml \
|
||||
--port 8321 \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL
|
||||
--port 8321
|
||||
```
|
||||
|
||||
## Example Notebooks
|
||||
|
|
|
|||
|
|
@ -86,9 +86,9 @@ docker run -it \
|
|||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-e OLLAMA_URL=http://host.docker.internal:11434 \
|
||||
llamastack/distribution-starter \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env OLLAMA_URL=http://host.docker.internal:11434
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
Note to start the container with Podman, you can do the same but replace `docker` at the start of the command with
|
||||
`podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL`
|
||||
|
|
@ -106,9 +106,9 @@ docker run -it \
|
|||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
--network=host \
|
||||
-e OLLAMA_URL=http://localhost:11434 \
|
||||
llamastack/distribution-starter \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env OLLAMA_URL=http://localhost:11434
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
:::
|
||||
You will see output like below:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
---
|
||||
description: "Files
|
||||
|
||||
This API is used to upload documents that can be used with other Llama Stack APIs."
|
||||
sidebar_label: Files
|
||||
title: Files
|
||||
---
|
||||
|
|
@ -7,4 +10,8 @@ title: Files
|
|||
|
||||
## Overview
|
||||
|
||||
Files
|
||||
|
||||
This API is used to upload documents that can be used with other Llama Stack APIs.
|
||||
|
||||
This section contains documentation for all available providers for the **files** API.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
---
|
||||
description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
description: "Inference
|
||||
|
||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
|
||||
|
|
@ -12,6 +14,8 @@ title: Inference
|
|||
|
||||
## Overview
|
||||
|
||||
Inference
|
||||
|
||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `str \| None` | No | | API key for Anthropic models |
|
||||
|
||||
## Sample Configuration
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Azure API key for Azure |
|
||||
| `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
|
||||
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
|
||||
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Cerebras API Key |
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,8 @@ Databricks inference provider for running models on Databricks' unified analytic
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `url` | `<class 'str'>` | No | | The URL for the Databricks model serving endpoint |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint |
|
||||
| `api_token` | `<class 'pydantic.types.SecretStr'>` | No | | The Databricks API token |
|
||||
|
||||
## Sample Configuration
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Fireworks.ai API Key |
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `str \| None` | No | | API key for Gemini models |
|
||||
|
||||
## Sample Configuration
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `str \| None` | No | | The Groq API key |
|
||||
| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `str \| None` | No | | The Llama API key |
|
||||
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The NVIDIA API key, only needed of using the hosted service |
|
||||
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@ Ollama inference provider for running local models through the Ollama runtime.
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `<class 'str'>` | No | http://localhost:11434 | |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `str \| None` | No | | API key for OpenAI models |
|
||||
| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ Passthrough inference provider for connecting to any external inference service
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint |
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint |
|
||||
| `api_token` | `str \| None` | No | | The API token |
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `<class 'str'>` | No | | The URL for the TGI serving endpoint |
|
||||
|
||||
## Sample Configuration
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ Together AI inference provider for open-source models and collaborative AI devel
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Together AI API Key |
|
||||
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ Available Models:
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `project` | `<class 'str'>` | No | | Google Cloud project ID for Vertex AI |
|
||||
| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
|
||||
|
||||
|
|
|
|||
|
|
@ -15,11 +15,11 @@ Remote vLLM inference provider for connecting to vLLM servers.
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint |
|
||||
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
|
||||
| `api_token` | `str \| None` | No | fake | The API token |
|
||||
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
|
|
|
|||
|
|
@ -15,9 +15,10 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key |
|
||||
| `project_id` | `str \| None` | No | | The Project ID key |
|
||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx.ai API key |
|
||||
| `project_id` | `str \| None` | No | | The watsonx.ai project ID |
|
||||
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||
|
||||
## Sample Configuration
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
---
|
||||
description: "Safety
|
||||
|
||||
OpenAI-compatible Moderations API."
|
||||
sidebar_label: Safety
|
||||
title: Safety
|
||||
---
|
||||
|
|
@ -7,4 +10,8 @@ title: Safety
|
|||
|
||||
## Overview
|
||||
|
||||
Safety
|
||||
|
||||
OpenAI-compatible Moderations API.
|
||||
|
||||
This section contains documentation for all available providers for the **safety** API.
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
|
|||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||
|
|
|
|||
|
|
@ -123,12 +123,12 @@
|
|||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
||||
"!uv run --with llama-stack llama stack build --distro together --image-type venv\n",
|
||||
"!uv run --with llama-stack llama stack build --distro together\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" \"uv run --with llama-stack llama stack run together --image-type venv\",\n",
|
||||
" \"uv run --with llama-stack llama stack run together\",\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
|
|
|
|||
|
|
@ -233,12 +233,12 @@
|
|||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||
"!uv run --with llama-stack llama stack build --distro meta-reference-gpu --image-type venv\n",
|
||||
"!uv run --with llama-stack llama stack build --distro meta-reference-gpu\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" f\"uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv --env INFERENCE_MODEL={model_id}\",\n",
|
||||
" f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu\",\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
|
|
|
|||
|
|
@ -223,12 +223,12 @@
|
|||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||
"!uv run --with llama-stack llama stack build --distro llama_api --image-type venv\n",
|
||||
"!uv run --with llama-stack llama stack build --distro llama_api\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" \"uv run --with llama-stack llama stack run llama_api --image-type venv\",\n",
|
||||
" \"uv run --with llama-stack llama stack run llama_api\",\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
|
|
|
|||
|
|
@ -145,12 +145,12 @@
|
|||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
||||
"!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
|
||||
"!uv run --with llama-stack llama stack build --distro starter\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter --image-type venv\n",
|
||||
" f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
|
|
|
|||
74
docs/static/deprecated-llama-stack-spec.html
vendored
74
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -1443,8 +1443,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "List all chat completions.",
|
||||
"description": "List all chat completions.",
|
||||
"summary": "List chat completions.",
|
||||
"description": "List chat completions.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -1520,8 +1520,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"summary": "Create chat completions.",
|
||||
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1565,8 +1565,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Describe a chat completion by its ID.",
|
||||
"description": "Describe a chat completion by its ID.",
|
||||
"summary": "Get chat completion.",
|
||||
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "completion_id",
|
||||
|
|
@ -1610,8 +1610,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"summary": "Create completion.",
|
||||
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1655,8 +1655,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"summary": "Create embeddings.",
|
||||
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1700,8 +1700,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns a list of files that belong to the user's organization.",
|
||||
"description": "Returns a list of files that belong to the user's organization.",
|
||||
"summary": "List files.",
|
||||
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -1770,8 +1770,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Upload a file that can be used across various endpoints.",
|
||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"summary": "Upload file.",
|
||||
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1831,8 +1831,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns information about a specific file.",
|
||||
"description": "Returns information about a specific file.",
|
||||
"summary": "Retrieve file.",
|
||||
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1874,8 +1874,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Delete a file.",
|
||||
"description": "Delete a file.",
|
||||
"summary": "Delete file.",
|
||||
"description": "Delete file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1919,8 +1919,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns the contents of the specified file.",
|
||||
"description": "Returns the contents of the specified file.",
|
||||
"summary": "Retrieve file content.",
|
||||
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1999,8 +1999,8 @@
|
|||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"summary": "Create moderation.",
|
||||
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -2044,8 +2044,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List all OpenAI responses.",
|
||||
"description": "List all OpenAI responses.",
|
||||
"summary": "List all responses.",
|
||||
"description": "List all responses.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -2119,8 +2119,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Create a new OpenAI response.",
|
||||
"description": "Create a new OpenAI response.",
|
||||
"summary": "Create a model response.",
|
||||
"description": "Create a model response.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -2184,8 +2184,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Retrieve an OpenAI response by its ID.",
|
||||
"description": "Retrieve an OpenAI response by its ID.",
|
||||
"summary": "Get a model response.",
|
||||
"description": "Get a model response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -2227,8 +2227,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Delete an OpenAI response by its ID.",
|
||||
"description": "Delete an OpenAI response by its ID.",
|
||||
"summary": "Delete a response.",
|
||||
"description": "Delete a response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -2272,8 +2272,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List input items for a given OpenAI response.",
|
||||
"description": "List input items for a given OpenAI response.",
|
||||
"summary": "List input items.",
|
||||
"description": "List input items.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -13366,12 +13366,13 @@
|
|||
},
|
||||
{
|
||||
"name": "Files",
|
||||
"description": ""
|
||||
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||
"x-displayName": "Files"
|
||||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "Models",
|
||||
|
|
@ -13383,7 +13384,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Safety",
|
||||
"description": ""
|
||||
"description": "OpenAI-compatible Moderations API.",
|
||||
"x-displayName": "Safety"
|
||||
},
|
||||
{
|
||||
"name": "Telemetry",
|
||||
|
|
|
|||
97
docs/static/deprecated-llama-stack-spec.yaml
vendored
97
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -1033,8 +1033,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: List all chat completions.
|
||||
description: List all chat completions.
|
||||
summary: List chat completions.
|
||||
description: List chat completions.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -1087,10 +1087,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
summary: Create chat completions.
|
||||
description: >-
|
||||
Create chat completions.
|
||||
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
parameters: []
|
||||
|
|
@ -1122,8 +1122,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: Describe a chat completion by its ID.
|
||||
description: Describe a chat completion by its ID.
|
||||
summary: Get chat completion.
|
||||
description: >-
|
||||
Get chat completion.
|
||||
|
||||
Describe a chat completion by its ID.
|
||||
parameters:
|
||||
- name: completion_id
|
||||
in: path
|
||||
|
|
@ -1153,10 +1156,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
summary: Create completion.
|
||||
description: >-
|
||||
Create completion.
|
||||
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -1189,10 +1192,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
summary: Create embeddings.
|
||||
description: >-
|
||||
Create embeddings.
|
||||
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -1225,9 +1228,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns a list of files that belong to the user's organization.
|
||||
summary: List files.
|
||||
description: >-
|
||||
List files.
|
||||
|
||||
Returns a list of files that belong to the user's organization.
|
||||
parameters:
|
||||
- name: after
|
||||
|
|
@ -1285,11 +1289,13 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Upload a file that can be used across various endpoints.
|
||||
summary: Upload file.
|
||||
description: >-
|
||||
Upload file.
|
||||
|
||||
Upload a file that can be used across various endpoints.
|
||||
|
||||
|
||||
The file upload should be a multipart form request with:
|
||||
|
||||
- file: The File object (not file name) to be uploaded.
|
||||
|
|
@ -1338,9 +1344,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns information about a specific file.
|
||||
summary: Retrieve file.
|
||||
description: >-
|
||||
Retrieve file.
|
||||
|
||||
Returns information about a specific file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -1372,8 +1379,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: Delete a file.
|
||||
description: Delete a file.
|
||||
summary: Delete file.
|
||||
description: Delete file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
in: path
|
||||
|
|
@ -1405,9 +1412,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns the contents of the specified file.
|
||||
summary: Retrieve file content.
|
||||
description: >-
|
||||
Retrieve file content.
|
||||
|
||||
Returns the contents of the specified file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -1464,9 +1472,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
summary: >-
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
summary: Create moderation.
|
||||
description: >-
|
||||
Create moderation.
|
||||
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
parameters: []
|
||||
requestBody:
|
||||
|
|
@ -1497,8 +1506,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: List all OpenAI responses.
|
||||
description: List all OpenAI responses.
|
||||
summary: List all responses.
|
||||
description: List all responses.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -1549,8 +1558,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Create a new OpenAI response.
|
||||
description: Create a new OpenAI response.
|
||||
summary: Create a model response.
|
||||
description: Create a model response.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -1592,8 +1601,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Retrieve an OpenAI response by its ID.
|
||||
description: Retrieve an OpenAI response by its ID.
|
||||
summary: Get a model response.
|
||||
description: Get a model response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1623,8 +1632,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Delete an OpenAI response by its ID.
|
||||
description: Delete an OpenAI response by its ID.
|
||||
summary: Delete a response.
|
||||
description: Delete a response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1654,10 +1663,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: >-
|
||||
List input items for a given OpenAI response.
|
||||
description: >-
|
||||
List input items for a given OpenAI response.
|
||||
summary: List input items.
|
||||
description: List input items.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -10011,9 +10018,16 @@ tags:
|
|||
x-displayName: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
- name: Files
|
||||
description: ''
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
APIs.
|
||||
x-displayName: Files
|
||||
- name: Inference
|
||||
description: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
are supported:
|
||||
|
||||
|
|
@ -10021,15 +10035,14 @@ tags:
|
|||
|
||||
- Embedding models: these models generate embeddings to be used for semantic
|
||||
search.
|
||||
x-displayName: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
x-displayName: Inference
|
||||
- name: Models
|
||||
description: ''
|
||||
- name: PostTraining (Coming Soon)
|
||||
description: ''
|
||||
- name: Safety
|
||||
description: ''
|
||||
description: OpenAI-compatible Moderations API.
|
||||
x-displayName: Safety
|
||||
- name: Telemetry
|
||||
description: ''
|
||||
- name: VectorIO
|
||||
|
|
|
|||
145
docs/static/llama-stack-spec.html
vendored
145
docs/static/llama-stack-spec.html
vendored
|
|
@ -69,8 +69,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "List all chat completions.",
|
||||
"description": "List all chat completions.",
|
||||
"summary": "List chat completions.",
|
||||
"description": "List chat completions.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -146,8 +146,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"summary": "Create chat completions.",
|
||||
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -191,8 +191,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Describe a chat completion by its ID.",
|
||||
"description": "Describe a chat completion by its ID.",
|
||||
"summary": "Get chat completion.",
|
||||
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "completion_id",
|
||||
|
|
@ -236,8 +236,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"summary": "Create completion.",
|
||||
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -758,8 +758,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"summary": "Create embeddings.",
|
||||
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -803,8 +803,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns a list of files that belong to the user's organization.",
|
||||
"description": "Returns a list of files that belong to the user's organization.",
|
||||
"summary": "List files.",
|
||||
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -873,8 +873,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Upload a file that can be used across various endpoints.",
|
||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"summary": "Upload file.",
|
||||
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -934,8 +934,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns information about a specific file.",
|
||||
"description": "Returns information about a specific file.",
|
||||
"summary": "Retrieve file.",
|
||||
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -977,8 +977,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Delete a file.",
|
||||
"description": "Delete a file.",
|
||||
"summary": "Delete file.",
|
||||
"description": "Delete file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1022,8 +1022,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns the contents of the specified file.",
|
||||
"description": "Returns the contents of the specified file.",
|
||||
"summary": "Retrieve file content.",
|
||||
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1067,8 +1067,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "Get the current health status of the service.",
|
||||
"description": "Get the current health status of the service.",
|
||||
"summary": "Get health status.",
|
||||
"description": "Get health status.\nGet the current health status of the service.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1102,8 +1102,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "List all available API routes with their methods and implementing providers.",
|
||||
"description": "List all available API routes with their methods and implementing providers.",
|
||||
"summary": "List routes.",
|
||||
"description": "List routes.\nList all available API routes with their methods and implementing providers.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1170,8 +1170,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Register a model.",
|
||||
"description": "Register a model.",
|
||||
"summary": "Register model.",
|
||||
"description": "Register model.\nRegister a model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1215,8 +1215,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Get a model by its identifier.",
|
||||
"description": "Get a model by its identifier.",
|
||||
"summary": "Get model.",
|
||||
"description": "Get model.\nGet a model by its identifier.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "model_id",
|
||||
|
|
@ -1251,8 +1251,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Unregister a model.",
|
||||
"description": "Unregister a model.",
|
||||
"summary": "Unregister model.",
|
||||
"description": "Unregister model.\nUnregister a model.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "model_id",
|
||||
|
|
@ -1296,8 +1296,8 @@
|
|||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"summary": "Create moderation.",
|
||||
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1374,8 +1374,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Create a new prompt.",
|
||||
"description": "Create a new prompt.",
|
||||
"summary": "Create prompt.",
|
||||
"description": "Create prompt.\nCreate a new prompt.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1419,8 +1419,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Get a prompt by its identifier and optional version.",
|
||||
"description": "Get a prompt by its identifier and optional version.",
|
||||
"summary": "Get prompt.",
|
||||
"description": "Get prompt.\nGet a prompt by its identifier and optional version.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1471,8 +1471,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Update an existing prompt (increments version).",
|
||||
"description": "Update an existing prompt (increments version).",
|
||||
"summary": "Update prompt.",
|
||||
"description": "Update prompt.\nUpdate an existing prompt (increments version).",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1517,8 +1517,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Delete a prompt.",
|
||||
"description": "Delete a prompt.",
|
||||
"summary": "Delete prompt.",
|
||||
"description": "Delete prompt.\nDelete a prompt.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1562,8 +1562,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Set which version of a prompt should be the default in get_prompt (latest).",
|
||||
"description": "Set which version of a prompt should be the default in get_prompt (latest).",
|
||||
"summary": "Set prompt version.",
|
||||
"description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1617,8 +1617,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "List all versions of a specific prompt.",
|
||||
"description": "List all versions of a specific prompt.",
|
||||
"summary": "List prompt versions.",
|
||||
"description": "List prompt versions.\nList all versions of a specific prompt.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1662,8 +1662,8 @@
|
|||
"tags": [
|
||||
"Providers"
|
||||
],
|
||||
"summary": "List all available providers.",
|
||||
"description": "List all available providers.",
|
||||
"summary": "List providers.",
|
||||
"description": "List providers.\nList all available providers.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1697,8 +1697,8 @@
|
|||
"tags": [
|
||||
"Providers"
|
||||
],
|
||||
"summary": "Get detailed information about a specific provider.",
|
||||
"description": "Get detailed information about a specific provider.",
|
||||
"summary": "Get provider.",
|
||||
"description": "Get provider.\nGet detailed information about a specific provider.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "provider_id",
|
||||
|
|
@ -1742,8 +1742,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List all OpenAI responses.",
|
||||
"description": "List all OpenAI responses.",
|
||||
"summary": "List all responses.",
|
||||
"description": "List all responses.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -1817,8 +1817,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Create a new OpenAI response.",
|
||||
"description": "Create a new OpenAI response.",
|
||||
"summary": "Create a model response.",
|
||||
"description": "Create a model response.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1882,8 +1882,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Retrieve an OpenAI response by its ID.",
|
||||
"description": "Retrieve an OpenAI response by its ID.",
|
||||
"summary": "Get a model response.",
|
||||
"description": "Get a model response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -1925,8 +1925,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Delete an OpenAI response by its ID.",
|
||||
"description": "Delete an OpenAI response by its ID.",
|
||||
"summary": "Delete a response.",
|
||||
"description": "Delete a response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -1970,8 +1970,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List input items for a given OpenAI response.",
|
||||
"description": "List input items for a given OpenAI response.",
|
||||
"summary": "List input items.",
|
||||
"description": "List input items.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -2063,8 +2063,8 @@
|
|||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"summary": "Run a shield.",
|
||||
"description": "Run a shield.",
|
||||
"summary": "Run shield.",
|
||||
"description": "Run shield.\nRun a shield.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -4196,8 +4196,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "Get the version of the service.",
|
||||
"description": "Get the version of the service.",
|
||||
"summary": "Get version.",
|
||||
"description": "Get version.\nGet the version of the service.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -12914,16 +12914,18 @@
|
|||
},
|
||||
{
|
||||
"name": "Files",
|
||||
"description": ""
|
||||
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||
"x-displayName": "Files"
|
||||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "Inspect",
|
||||
"description": ""
|
||||
"description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
|
||||
"x-displayName": "Inspect"
|
||||
},
|
||||
{
|
||||
"name": "Models",
|
||||
|
|
@ -12931,17 +12933,18 @@
|
|||
},
|
||||
{
|
||||
"name": "Prompts",
|
||||
"description": "",
|
||||
"x-displayName": "Protocol for prompt management operations."
|
||||
"description": "Protocol for prompt management operations.",
|
||||
"x-displayName": "Prompts"
|
||||
},
|
||||
{
|
||||
"name": "Providers",
|
||||
"description": "",
|
||||
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
||||
"description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
|
||||
"x-displayName": "Providers"
|
||||
},
|
||||
{
|
||||
"name": "Safety",
|
||||
"description": ""
|
||||
"description": "OpenAI-compatible Moderations API.",
|
||||
"x-displayName": "Safety"
|
||||
},
|
||||
{
|
||||
"name": "Scoring",
|
||||
|
|
|
|||
203
docs/static/llama-stack-spec.yaml
vendored
203
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -33,8 +33,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: List all chat completions.
|
||||
description: List all chat completions.
|
||||
summary: List chat completions.
|
||||
description: List chat completions.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -87,10 +87,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
summary: Create chat completions.
|
||||
description: >-
|
||||
Create chat completions.
|
||||
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
parameters: []
|
||||
|
|
@ -122,8 +122,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: Describe a chat completion by its ID.
|
||||
description: Describe a chat completion by its ID.
|
||||
summary: Get chat completion.
|
||||
description: >-
|
||||
Get chat completion.
|
||||
|
||||
Describe a chat completion by its ID.
|
||||
parameters:
|
||||
- name: completion_id
|
||||
in: path
|
||||
|
|
@ -153,10 +156,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
summary: Create completion.
|
||||
description: >-
|
||||
Create completion.
|
||||
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -603,10 +606,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
summary: Create embeddings.
|
||||
description: >-
|
||||
Create embeddings.
|
||||
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -639,9 +642,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns a list of files that belong to the user's organization.
|
||||
summary: List files.
|
||||
description: >-
|
||||
List files.
|
||||
|
||||
Returns a list of files that belong to the user's organization.
|
||||
parameters:
|
||||
- name: after
|
||||
|
|
@ -699,11 +703,13 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Upload a file that can be used across various endpoints.
|
||||
summary: Upload file.
|
||||
description: >-
|
||||
Upload file.
|
||||
|
||||
Upload a file that can be used across various endpoints.
|
||||
|
||||
|
||||
The file upload should be a multipart form request with:
|
||||
|
||||
- file: The File object (not file name) to be uploaded.
|
||||
|
|
@ -752,9 +758,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns information about a specific file.
|
||||
summary: Retrieve file.
|
||||
description: >-
|
||||
Retrieve file.
|
||||
|
||||
Returns information about a specific file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -786,8 +793,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: Delete a file.
|
||||
description: Delete a file.
|
||||
summary: Delete file.
|
||||
description: Delete file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
in: path
|
||||
|
|
@ -819,9 +826,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns the contents of the specified file.
|
||||
summary: Retrieve file content.
|
||||
description: >-
|
||||
Retrieve file content.
|
||||
|
||||
Returns the contents of the specified file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -854,9 +862,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: >-
|
||||
Get the current health status of the service.
|
||||
summary: Get health status.
|
||||
description: >-
|
||||
Get health status.
|
||||
|
||||
Get the current health status of the service.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
|
|
@ -882,9 +891,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: >-
|
||||
List all available API routes with their methods and implementing providers.
|
||||
summary: List routes.
|
||||
description: >-
|
||||
List routes.
|
||||
|
||||
List all available API routes with their methods and implementing providers.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
|
|
@ -933,8 +943,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Register a model.
|
||||
description: Register a model.
|
||||
summary: Register model.
|
||||
description: >-
|
||||
Register model.
|
||||
|
||||
Register a model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -964,8 +977,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Get a model by its identifier.
|
||||
description: Get a model by its identifier.
|
||||
summary: Get model.
|
||||
description: >-
|
||||
Get model.
|
||||
|
||||
Get a model by its identifier.
|
||||
parameters:
|
||||
- name: model_id
|
||||
in: path
|
||||
|
|
@ -990,8 +1006,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Unregister a model.
|
||||
description: Unregister a model.
|
||||
summary: Unregister model.
|
||||
description: >-
|
||||
Unregister model.
|
||||
|
||||
Unregister a model.
|
||||
parameters:
|
||||
- name: model_id
|
||||
in: path
|
||||
|
|
@ -1022,9 +1041,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
summary: >-
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
summary: Create moderation.
|
||||
description: >-
|
||||
Create moderation.
|
||||
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
parameters: []
|
||||
requestBody:
|
||||
|
|
@ -1080,8 +1100,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: Create a new prompt.
|
||||
description: Create a new prompt.
|
||||
summary: Create prompt.
|
||||
description: >-
|
||||
Create prompt.
|
||||
|
||||
Create a new prompt.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -1111,9 +1134,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Get a prompt by its identifier and optional version.
|
||||
summary: Get prompt.
|
||||
description: >-
|
||||
Get prompt.
|
||||
|
||||
Get a prompt by its identifier and optional version.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1151,9 +1175,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Update an existing prompt (increments version).
|
||||
summary: Update prompt.
|
||||
description: >-
|
||||
Update prompt.
|
||||
|
||||
Update an existing prompt (increments version).
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1185,8 +1210,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: Delete a prompt.
|
||||
description: Delete a prompt.
|
||||
summary: Delete prompt.
|
||||
description: >-
|
||||
Delete prompt.
|
||||
|
||||
Delete a prompt.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
|
|
@ -1217,9 +1245,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
summary: Set prompt version.
|
||||
description: >-
|
||||
Set prompt version.
|
||||
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1257,8 +1286,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: List all versions of a specific prompt.
|
||||
description: List all versions of a specific prompt.
|
||||
summary: List prompt versions.
|
||||
description: >-
|
||||
List prompt versions.
|
||||
|
||||
List all versions of a specific prompt.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
|
|
@ -1290,8 +1322,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Providers
|
||||
summary: List all available providers.
|
||||
description: List all available providers.
|
||||
summary: List providers.
|
||||
description: >-
|
||||
List providers.
|
||||
|
||||
List all available providers.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
/v1/providers/{provider_id}:
|
||||
|
|
@ -1316,9 +1351,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Providers
|
||||
summary: >-
|
||||
Get detailed information about a specific provider.
|
||||
summary: Get provider.
|
||||
description: >-
|
||||
Get provider.
|
||||
|
||||
Get detailed information about a specific provider.
|
||||
parameters:
|
||||
- name: provider_id
|
||||
|
|
@ -1349,8 +1385,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: List all OpenAI responses.
|
||||
description: List all OpenAI responses.
|
||||
summary: List all responses.
|
||||
description: List all responses.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -1401,8 +1437,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Create a new OpenAI response.
|
||||
description: Create a new OpenAI response.
|
||||
summary: Create a model response.
|
||||
description: Create a model response.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -1444,8 +1480,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Retrieve an OpenAI response by its ID.
|
||||
description: Retrieve an OpenAI response by its ID.
|
||||
summary: Get a model response.
|
||||
description: Get a model response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1475,8 +1511,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Delete an OpenAI response by its ID.
|
||||
description: Delete an OpenAI response by its ID.
|
||||
summary: Delete a response.
|
||||
description: Delete a response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1506,10 +1542,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: >-
|
||||
List input items for a given OpenAI response.
|
||||
description: >-
|
||||
List input items for a given OpenAI response.
|
||||
summary: List input items.
|
||||
description: List input items.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1578,8 +1612,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
summary: Run a shield.
|
||||
description: Run a shield.
|
||||
summary: Run shield.
|
||||
description: >-
|
||||
Run shield.
|
||||
|
||||
Run a shield.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -3135,8 +3172,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: Get the version of the service.
|
||||
description: Get the version of the service.
|
||||
summary: Get version.
|
||||
description: >-
|
||||
Get version.
|
||||
|
||||
Get the version of the service.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
jsonSchemaDialect: >-
|
||||
|
|
@ -9749,9 +9789,16 @@ tags:
|
|||
x-displayName: >-
|
||||
Protocol for conversation management operations.
|
||||
- name: Files
|
||||
description: ''
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
APIs.
|
||||
x-displayName: Files
|
||||
- name: Inference
|
||||
description: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
are supported:
|
||||
|
||||
|
|
@ -9759,23 +9806,25 @@ tags:
|
|||
|
||||
- Embedding models: these models generate embeddings to be used for semantic
|
||||
search.
|
||||
x-displayName: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
x-displayName: Inference
|
||||
- name: Inspect
|
||||
description: ''
|
||||
description: >-
|
||||
APIs for inspecting the Llama Stack service, including health status, available
|
||||
API routes with methods and implementing providers.
|
||||
x-displayName: Inspect
|
||||
- name: Models
|
||||
description: ''
|
||||
- name: Prompts
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Protocol for prompt management operations.
|
||||
x-displayName: Prompts
|
||||
- name: Providers
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||
x-displayName: Providers
|
||||
- name: Safety
|
||||
description: ''
|
||||
description: OpenAI-compatible Moderations API.
|
||||
x-displayName: Safety
|
||||
- name: Scoring
|
||||
description: ''
|
||||
- name: ScoringFunctions
|
||||
|
|
|
|||
145
docs/static/stainless-llama-stack-spec.html
vendored
145
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -69,8 +69,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "List all chat completions.",
|
||||
"description": "List all chat completions.",
|
||||
"summary": "List chat completions.",
|
||||
"description": "List chat completions.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -146,8 +146,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"summary": "Create chat completions.",
|
||||
"description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -191,8 +191,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Describe a chat completion by its ID.",
|
||||
"description": "Describe a chat completion by its ID.",
|
||||
"summary": "Get chat completion.",
|
||||
"description": "Get chat completion.\nDescribe a chat completion by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "completion_id",
|
||||
|
|
@ -236,8 +236,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"summary": "Create completion.",
|
||||
"description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -758,8 +758,8 @@
|
|||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"summary": "Create embeddings.",
|
||||
"description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -803,8 +803,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns a list of files that belong to the user's organization.",
|
||||
"description": "Returns a list of files that belong to the user's organization.",
|
||||
"summary": "List files.",
|
||||
"description": "List files.\nReturns a list of files that belong to the user's organization.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -873,8 +873,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Upload a file that can be used across various endpoints.",
|
||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"summary": "Upload file.",
|
||||
"description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -934,8 +934,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns information about a specific file.",
|
||||
"description": "Returns information about a specific file.",
|
||||
"summary": "Retrieve file.",
|
||||
"description": "Retrieve file.\nReturns information about a specific file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -977,8 +977,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Delete a file.",
|
||||
"description": "Delete a file.",
|
||||
"summary": "Delete file.",
|
||||
"description": "Delete file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1022,8 +1022,8 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"summary": "Returns the contents of the specified file.",
|
||||
"description": "Returns the contents of the specified file.",
|
||||
"summary": "Retrieve file content.",
|
||||
"description": "Retrieve file content.\nReturns the contents of the specified file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "file_id",
|
||||
|
|
@ -1067,8 +1067,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "Get the current health status of the service.",
|
||||
"description": "Get the current health status of the service.",
|
||||
"summary": "Get health status.",
|
||||
"description": "Get health status.\nGet the current health status of the service.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1102,8 +1102,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "List all available API routes with their methods and implementing providers.",
|
||||
"description": "List all available API routes with their methods and implementing providers.",
|
||||
"summary": "List routes.",
|
||||
"description": "List routes.\nList all available API routes with their methods and implementing providers.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1170,8 +1170,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Register a model.",
|
||||
"description": "Register a model.",
|
||||
"summary": "Register model.",
|
||||
"description": "Register model.\nRegister a model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1215,8 +1215,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Get a model by its identifier.",
|
||||
"description": "Get a model by its identifier.",
|
||||
"summary": "Get model.",
|
||||
"description": "Get model.\nGet a model by its identifier.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "model_id",
|
||||
|
|
@ -1251,8 +1251,8 @@
|
|||
"tags": [
|
||||
"Models"
|
||||
],
|
||||
"summary": "Unregister a model.",
|
||||
"description": "Unregister a model.",
|
||||
"summary": "Unregister model.",
|
||||
"description": "Unregister model.\nUnregister a model.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "model_id",
|
||||
|
|
@ -1296,8 +1296,8 @@
|
|||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"summary": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"summary": "Create moderation.",
|
||||
"description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1374,8 +1374,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Create a new prompt.",
|
||||
"description": "Create a new prompt.",
|
||||
"summary": "Create prompt.",
|
||||
"description": "Create prompt.\nCreate a new prompt.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1419,8 +1419,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Get a prompt by its identifier and optional version.",
|
||||
"description": "Get a prompt by its identifier and optional version.",
|
||||
"summary": "Get prompt.",
|
||||
"description": "Get prompt.\nGet a prompt by its identifier and optional version.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1471,8 +1471,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Update an existing prompt (increments version).",
|
||||
"description": "Update an existing prompt (increments version).",
|
||||
"summary": "Update prompt.",
|
||||
"description": "Update prompt.\nUpdate an existing prompt (increments version).",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1517,8 +1517,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Delete a prompt.",
|
||||
"description": "Delete a prompt.",
|
||||
"summary": "Delete prompt.",
|
||||
"description": "Delete prompt.\nDelete a prompt.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1562,8 +1562,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "Set which version of a prompt should be the default in get_prompt (latest).",
|
||||
"description": "Set which version of a prompt should be the default in get_prompt (latest).",
|
||||
"summary": "Set prompt version.",
|
||||
"description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1617,8 +1617,8 @@
|
|||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"summary": "List all versions of a specific prompt.",
|
||||
"description": "List all versions of a specific prompt.",
|
||||
"summary": "List prompt versions.",
|
||||
"description": "List prompt versions.\nList all versions of a specific prompt.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
|
|
@ -1662,8 +1662,8 @@
|
|||
"tags": [
|
||||
"Providers"
|
||||
],
|
||||
"summary": "List all available providers.",
|
||||
"description": "List all available providers.",
|
||||
"summary": "List providers.",
|
||||
"description": "List providers.\nList all available providers.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -1697,8 +1697,8 @@
|
|||
"tags": [
|
||||
"Providers"
|
||||
],
|
||||
"summary": "Get detailed information about a specific provider.",
|
||||
"description": "Get detailed information about a specific provider.",
|
||||
"summary": "Get provider.",
|
||||
"description": "Get provider.\nGet detailed information about a specific provider.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "provider_id",
|
||||
|
|
@ -1742,8 +1742,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List all OpenAI responses.",
|
||||
"description": "List all OpenAI responses.",
|
||||
"summary": "List all responses.",
|
||||
"description": "List all responses.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
|
|
@ -1817,8 +1817,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Create a new OpenAI response.",
|
||||
"description": "Create a new OpenAI response.",
|
||||
"summary": "Create a model response.",
|
||||
"description": "Create a model response.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -1882,8 +1882,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Retrieve an OpenAI response by its ID.",
|
||||
"description": "Retrieve an OpenAI response by its ID.",
|
||||
"summary": "Get a model response.",
|
||||
"description": "Get a model response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -1925,8 +1925,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "Delete an OpenAI response by its ID.",
|
||||
"description": "Delete an OpenAI response by its ID.",
|
||||
"summary": "Delete a response.",
|
||||
"description": "Delete a response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -1970,8 +1970,8 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"summary": "List input items for a given OpenAI response.",
|
||||
"description": "List input items for a given OpenAI response.",
|
||||
"summary": "List input items.",
|
||||
"description": "List input items.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
|
|
@ -2063,8 +2063,8 @@
|
|||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"summary": "Run a shield.",
|
||||
"description": "Run a shield.",
|
||||
"summary": "Run shield.",
|
||||
"description": "Run shield.\nRun a shield.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -4196,8 +4196,8 @@
|
|||
"tags": [
|
||||
"Inspect"
|
||||
],
|
||||
"summary": "Get the version of the service.",
|
||||
"description": "Get the version of the service.",
|
||||
"summary": "Get version.",
|
||||
"description": "Get version.\nGet the version of the service.",
|
||||
"parameters": [],
|
||||
"deprecated": false
|
||||
}
|
||||
|
|
@ -18487,16 +18487,18 @@
|
|||
},
|
||||
{
|
||||
"name": "Files",
|
||||
"description": ""
|
||||
"description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
|
||||
"x-displayName": "Files"
|
||||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
|
||||
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
"x-displayName": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "Inspect",
|
||||
"description": ""
|
||||
"description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
|
||||
"x-displayName": "Inspect"
|
||||
},
|
||||
{
|
||||
"name": "Models",
|
||||
|
|
@ -18508,17 +18510,18 @@
|
|||
},
|
||||
{
|
||||
"name": "Prompts",
|
||||
"description": "",
|
||||
"x-displayName": "Protocol for prompt management operations."
|
||||
"description": "Protocol for prompt management operations.",
|
||||
"x-displayName": "Prompts"
|
||||
},
|
||||
{
|
||||
"name": "Providers",
|
||||
"description": "",
|
||||
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
||||
"description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
|
||||
"x-displayName": "Providers"
|
||||
},
|
||||
{
|
||||
"name": "Safety",
|
||||
"description": ""
|
||||
"description": "OpenAI-compatible Moderations API.",
|
||||
"x-displayName": "Safety"
|
||||
},
|
||||
{
|
||||
"name": "Scoring",
|
||||
|
|
|
|||
203
docs/static/stainless-llama-stack-spec.yaml
vendored
203
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -36,8 +36,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: List all chat completions.
|
||||
description: List all chat completions.
|
||||
summary: List chat completions.
|
||||
description: List chat completions.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -90,10 +90,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
summary: Create chat completions.
|
||||
description: >-
|
||||
Create chat completions.
|
||||
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
parameters: []
|
||||
|
|
@ -125,8 +125,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: Describe a chat completion by its ID.
|
||||
description: Describe a chat completion by its ID.
|
||||
summary: Get chat completion.
|
||||
description: >-
|
||||
Get chat completion.
|
||||
|
||||
Describe a chat completion by its ID.
|
||||
parameters:
|
||||
- name: completion_id
|
||||
in: path
|
||||
|
|
@ -156,10 +159,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
summary: Create completion.
|
||||
description: >-
|
||||
Create completion.
|
||||
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -606,10 +609,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
summary: >-
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
summary: Create embeddings.
|
||||
description: >-
|
||||
Create embeddings.
|
||||
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
parameters: []
|
||||
|
|
@ -642,9 +645,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns a list of files that belong to the user's organization.
|
||||
summary: List files.
|
||||
description: >-
|
||||
List files.
|
||||
|
||||
Returns a list of files that belong to the user's organization.
|
||||
parameters:
|
||||
- name: after
|
||||
|
|
@ -702,11 +706,13 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Upload a file that can be used across various endpoints.
|
||||
summary: Upload file.
|
||||
description: >-
|
||||
Upload file.
|
||||
|
||||
Upload a file that can be used across various endpoints.
|
||||
|
||||
|
||||
The file upload should be a multipart form request with:
|
||||
|
||||
- file: The File object (not file name) to be uploaded.
|
||||
|
|
@ -755,9 +761,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns information about a specific file.
|
||||
summary: Retrieve file.
|
||||
description: >-
|
||||
Retrieve file.
|
||||
|
||||
Returns information about a specific file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -789,8 +796,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: Delete a file.
|
||||
description: Delete a file.
|
||||
summary: Delete file.
|
||||
description: Delete file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
in: path
|
||||
|
|
@ -822,9 +829,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files
|
||||
summary: >-
|
||||
Returns the contents of the specified file.
|
||||
summary: Retrieve file content.
|
||||
description: >-
|
||||
Retrieve file content.
|
||||
|
||||
Returns the contents of the specified file.
|
||||
parameters:
|
||||
- name: file_id
|
||||
|
|
@ -857,9 +865,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: >-
|
||||
Get the current health status of the service.
|
||||
summary: Get health status.
|
||||
description: >-
|
||||
Get health status.
|
||||
|
||||
Get the current health status of the service.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
|
|
@ -885,9 +894,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: >-
|
||||
List all available API routes with their methods and implementing providers.
|
||||
summary: List routes.
|
||||
description: >-
|
||||
List routes.
|
||||
|
||||
List all available API routes with their methods and implementing providers.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
|
|
@ -936,8 +946,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Register a model.
|
||||
description: Register a model.
|
||||
summary: Register model.
|
||||
description: >-
|
||||
Register model.
|
||||
|
||||
Register a model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -967,8 +980,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Get a model by its identifier.
|
||||
description: Get a model by its identifier.
|
||||
summary: Get model.
|
||||
description: >-
|
||||
Get model.
|
||||
|
||||
Get a model by its identifier.
|
||||
parameters:
|
||||
- name: model_id
|
||||
in: path
|
||||
|
|
@ -993,8 +1009,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
summary: Unregister a model.
|
||||
description: Unregister a model.
|
||||
summary: Unregister model.
|
||||
description: >-
|
||||
Unregister model.
|
||||
|
||||
Unregister a model.
|
||||
parameters:
|
||||
- name: model_id
|
||||
in: path
|
||||
|
|
@ -1025,9 +1044,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
summary: >-
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
summary: Create moderation.
|
||||
description: >-
|
||||
Create moderation.
|
||||
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
parameters: []
|
||||
requestBody:
|
||||
|
|
@ -1083,8 +1103,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: Create a new prompt.
|
||||
description: Create a new prompt.
|
||||
summary: Create prompt.
|
||||
description: >-
|
||||
Create prompt.
|
||||
|
||||
Create a new prompt.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -1114,9 +1137,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Get a prompt by its identifier and optional version.
|
||||
summary: Get prompt.
|
||||
description: >-
|
||||
Get prompt.
|
||||
|
||||
Get a prompt by its identifier and optional version.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1154,9 +1178,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Update an existing prompt (increments version).
|
||||
summary: Update prompt.
|
||||
description: >-
|
||||
Update prompt.
|
||||
|
||||
Update an existing prompt (increments version).
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1188,8 +1213,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: Delete a prompt.
|
||||
description: Delete a prompt.
|
||||
summary: Delete prompt.
|
||||
description: >-
|
||||
Delete prompt.
|
||||
|
||||
Delete a prompt.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
|
|
@ -1220,9 +1248,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: >-
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
summary: Set prompt version.
|
||||
description: >-
|
||||
Set prompt version.
|
||||
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
|
|
@ -1260,8 +1289,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
summary: List all versions of a specific prompt.
|
||||
description: List all versions of a specific prompt.
|
||||
summary: List prompt versions.
|
||||
description: >-
|
||||
List prompt versions.
|
||||
|
||||
List all versions of a specific prompt.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
|
|
@ -1293,8 +1325,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Providers
|
||||
summary: List all available providers.
|
||||
description: List all available providers.
|
||||
summary: List providers.
|
||||
description: >-
|
||||
List providers.
|
||||
|
||||
List all available providers.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
/v1/providers/{provider_id}:
|
||||
|
|
@ -1319,9 +1354,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Providers
|
||||
summary: >-
|
||||
Get detailed information about a specific provider.
|
||||
summary: Get provider.
|
||||
description: >-
|
||||
Get provider.
|
||||
|
||||
Get detailed information about a specific provider.
|
||||
parameters:
|
||||
- name: provider_id
|
||||
|
|
@ -1352,8 +1388,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: List all OpenAI responses.
|
||||
description: List all OpenAI responses.
|
||||
summary: List all responses.
|
||||
description: List all responses.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
|
|
@ -1404,8 +1440,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Create a new OpenAI response.
|
||||
description: Create a new OpenAI response.
|
||||
summary: Create a model response.
|
||||
description: Create a model response.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -1447,8 +1483,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Retrieve an OpenAI response by its ID.
|
||||
description: Retrieve an OpenAI response by its ID.
|
||||
summary: Get a model response.
|
||||
description: Get a model response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1478,8 +1514,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: Delete an OpenAI response by its ID.
|
||||
description: Delete an OpenAI response by its ID.
|
||||
summary: Delete a response.
|
||||
description: Delete a response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1509,10 +1545,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
summary: >-
|
||||
List input items for a given OpenAI response.
|
||||
description: >-
|
||||
List input items for a given OpenAI response.
|
||||
summary: List input items.
|
||||
description: List input items.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
|
|
@ -1581,8 +1615,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
summary: Run a shield.
|
||||
description: Run a shield.
|
||||
summary: Run shield.
|
||||
description: >-
|
||||
Run shield.
|
||||
|
||||
Run a shield.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -3138,8 +3175,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
summary: Get the version of the service.
|
||||
description: Get the version of the service.
|
||||
summary: Get version.
|
||||
description: >-
|
||||
Get version.
|
||||
|
||||
Get the version of the service.
|
||||
parameters: []
|
||||
deprecated: false
|
||||
/v1beta/datasetio/append-rows/{dataset_id}:
|
||||
|
|
@ -13795,9 +13835,16 @@ tags:
|
|||
x-displayName: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
- name: Files
|
||||
description: ''
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
APIs.
|
||||
x-displayName: Files
|
||||
- name: Inference
|
||||
description: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
are supported:
|
||||
|
||||
|
|
@ -13805,25 +13852,27 @@ tags:
|
|||
|
||||
- Embedding models: these models generate embeddings to be used for semantic
|
||||
search.
|
||||
x-displayName: >-
|
||||
Llama Stack Inference API for generating completions, chat completions, and
|
||||
embeddings.
|
||||
x-displayName: Inference
|
||||
- name: Inspect
|
||||
description: ''
|
||||
description: >-
|
||||
APIs for inspecting the Llama Stack service, including health status, available
|
||||
API routes with methods and implementing providers.
|
||||
x-displayName: Inspect
|
||||
- name: Models
|
||||
description: ''
|
||||
- name: PostTraining (Coming Soon)
|
||||
description: ''
|
||||
- name: Prompts
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Protocol for prompt management operations.
|
||||
x-displayName: Prompts
|
||||
- name: Providers
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||
x-displayName: Providers
|
||||
- name: Safety
|
||||
description: ''
|
||||
description: OpenAI-compatible Moderations API.
|
||||
x-displayName: Safety
|
||||
- name: Scoring
|
||||
description: ''
|
||||
- name: ScoringFunctions
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
|||
...
|
||||
Build Successful!
|
||||
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
||||
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter --image-type venv
|
||||
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
|
||||
```
|
||||
|
||||
3. **Set the ENV variables by exporting them to the terminal**:
|
||||
|
|
@ -102,12 +102,11 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
|||
3. **Run the Llama Stack**:
|
||||
Run the stack using uv:
|
||||
```bash
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
SAFETY_MODEL=$SAFETY_MODEL \
|
||||
OLLAMA_URL=$OLLAMA_URL \
|
||||
uv run --with llama-stack llama stack run starter \
|
||||
--image-type venv \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||
--env OLLAMA_URL=$OLLAMA_URL
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
||||
|
||||
|
|
|
|||
|
|
@ -797,7 +797,7 @@ class Agents(Protocol):
|
|||
self,
|
||||
response_id: str,
|
||||
) -> OpenAIResponseObject:
|
||||
"""Retrieve an OpenAI response by its ID.
|
||||
"""Get a model response.
|
||||
|
||||
:param response_id: The ID of the OpenAI response to retrieve.
|
||||
:returns: An OpenAIResponseObject.
|
||||
|
|
@ -826,7 +826,7 @@ class Agents(Protocol):
|
|||
),
|
||||
] = None,
|
||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||
"""Create a new OpenAI response.
|
||||
"""Create a model response.
|
||||
|
||||
:param input: Input message(s) to create the response.
|
||||
:param model: The underlying LLM used for completions.
|
||||
|
|
@ -846,7 +846,7 @@ class Agents(Protocol):
|
|||
model: str | None = None,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIResponseObject:
|
||||
"""List all OpenAI responses.
|
||||
"""List all responses.
|
||||
|
||||
:param after: The ID of the last response to return.
|
||||
:param limit: The number of responses to return.
|
||||
|
|
@ -869,7 +869,7 @@ class Agents(Protocol):
|
|||
limit: int | None = 20,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIResponseInputItem:
|
||||
"""List input items for a given OpenAI response.
|
||||
"""List input items.
|
||||
|
||||
:param response_id: The ID of the response to retrieve input items for.
|
||||
:param after: An item ID to list items after, used for pagination.
|
||||
|
|
@ -884,7 +884,7 @@ class Agents(Protocol):
|
|||
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||
"""Delete an OpenAI response by its ID.
|
||||
"""Delete a response.
|
||||
|
||||
:param response_id: The ID of the OpenAI response to delete.
|
||||
:returns: An OpenAIDeleteResponseObject
|
||||
|
|
|
|||
|
|
@ -104,6 +104,11 @@ class OpenAIFileDeleteResponse(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Files(Protocol):
|
||||
"""Files
|
||||
|
||||
This API is used to upload documents that can be used with other Llama Stack APIs.
|
||||
"""
|
||||
|
||||
# OpenAI Files API Endpoints
|
||||
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
|
||||
|
|
@ -113,7 +118,8 @@ class Files(Protocol):
|
|||
purpose: Annotated[OpenAIFilePurpose, Form()],
|
||||
expires_after: Annotated[ExpiresAfter | None, Form()] = None,
|
||||
) -> OpenAIFileObject:
|
||||
"""
|
||||
"""Upload file.
|
||||
|
||||
Upload a file that can be used across various endpoints.
|
||||
|
||||
The file upload should be a multipart form request with:
|
||||
|
|
@ -137,7 +143,8 @@ class Files(Protocol):
|
|||
order: Order | None = Order.desc,
|
||||
purpose: OpenAIFilePurpose | None = None,
|
||||
) -> ListOpenAIFileResponse:
|
||||
"""
|
||||
"""List files.
|
||||
|
||||
Returns a list of files that belong to the user's organization.
|
||||
|
||||
:param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
|
||||
|
|
@ -154,7 +161,8 @@ class Files(Protocol):
|
|||
self,
|
||||
file_id: str,
|
||||
) -> OpenAIFileObject:
|
||||
"""
|
||||
"""Retrieve file.
|
||||
|
||||
Returns information about a specific file.
|
||||
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
|
|
@ -168,8 +176,7 @@ class Files(Protocol):
|
|||
self,
|
||||
file_id: str,
|
||||
) -> OpenAIFileDeleteResponse:
|
||||
"""
|
||||
Delete a file.
|
||||
"""Delete file.
|
||||
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
:returns: An OpenAIFileDeleteResponse indicating successful deletion.
|
||||
|
|
@ -182,7 +189,8 @@ class Files(Protocol):
|
|||
self,
|
||||
file_id: str,
|
||||
) -> Response:
|
||||
"""
|
||||
"""Retrieve file content.
|
||||
|
||||
Returns the contents of the specified file.
|
||||
|
||||
:param file_id: The ID of the file to use for this request.
|
||||
|
|
|
|||
|
|
@ -1053,7 +1053,9 @@ class InferenceProvider(Protocol):
|
|||
# for fill-in-the-middle type completion
|
||||
suffix: str | None = None,
|
||||
) -> OpenAICompletion:
|
||||
"""Generate an OpenAI-compatible completion for the given prompt using the specified model.
|
||||
"""Create completion.
|
||||
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified model.
|
||||
|
||||
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||
:param prompt: The prompt to generate a completion for.
|
||||
|
|
@ -1105,7 +1107,9 @@ class InferenceProvider(Protocol):
|
|||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
"""Generate an OpenAI-compatible chat completion for the given messages using the specified model.
|
||||
"""Create chat completions.
|
||||
|
||||
Generate an OpenAI-compatible chat completion for the given messages using the specified model.
|
||||
|
||||
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||
:param messages: List of messages in the conversation.
|
||||
|
|
@ -1144,7 +1148,9 @@ class InferenceProvider(Protocol):
|
|||
dimensions: int | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIEmbeddingsResponse:
|
||||
"""Generate OpenAI-compatible embeddings for the given input using the specified model.
|
||||
"""Create embeddings.
|
||||
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified model.
|
||||
|
||||
:param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
|
||||
:param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
|
||||
|
|
@ -1157,7 +1163,9 @@ class InferenceProvider(Protocol):
|
|||
|
||||
|
||||
class Inference(InferenceProvider):
|
||||
"""Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
"""Inference
|
||||
|
||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||
|
|
@ -1173,7 +1181,7 @@ class Inference(InferenceProvider):
|
|||
model: str | None = None,
|
||||
order: Order | None = Order.desc,
|
||||
) -> ListOpenAIChatCompletionResponse:
|
||||
"""List all chat completions.
|
||||
"""List chat completions.
|
||||
|
||||
:param after: The ID of the last chat completion to return.
|
||||
:param limit: The maximum number of chat completions to return.
|
||||
|
|
@ -1188,7 +1196,9 @@ class Inference(InferenceProvider):
|
|||
)
|
||||
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
||||
"""Describe a chat completion by its ID.
|
||||
"""Get chat completion.
|
||||
|
||||
Describe a chat completion by its ID.
|
||||
|
||||
:param completion_id: ID of the chat completion.
|
||||
:returns: A OpenAICompletionWithInputMessages.
|
||||
|
|
|
|||
|
|
@ -58,9 +58,16 @@ class ListRoutesResponse(BaseModel):
|
|||
|
||||
@runtime_checkable
|
||||
class Inspect(Protocol):
|
||||
"""Inspect
|
||||
|
||||
APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
|
||||
"""
|
||||
|
||||
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_routes(self) -> ListRoutesResponse:
|
||||
"""List all available API routes with their methods and implementing providers.
|
||||
"""List routes.
|
||||
|
||||
List all available API routes with their methods and implementing providers.
|
||||
|
||||
:returns: Response containing information about all available routes.
|
||||
"""
|
||||
|
|
@ -68,7 +75,9 @@ class Inspect(Protocol):
|
|||
|
||||
@webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def health(self) -> HealthInfo:
|
||||
"""Get the current health status of the service.
|
||||
"""Get health status.
|
||||
|
||||
Get the current health status of the service.
|
||||
|
||||
:returns: Health information indicating if the service is operational.
|
||||
"""
|
||||
|
|
@ -76,7 +85,9 @@ class Inspect(Protocol):
|
|||
|
||||
@webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def version(self) -> VersionInfo:
|
||||
"""Get the version of the service.
|
||||
"""Get version.
|
||||
|
||||
Get the version of the service.
|
||||
|
||||
:returns: Version information containing the service version number.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -124,7 +124,9 @@ class Models(Protocol):
|
|||
self,
|
||||
model_id: str,
|
||||
) -> Model:
|
||||
"""Get a model by its identifier.
|
||||
"""Get model.
|
||||
|
||||
Get a model by its identifier.
|
||||
|
||||
:param model_id: The identifier of the model to get.
|
||||
:returns: A Model.
|
||||
|
|
@ -140,7 +142,9 @@ class Models(Protocol):
|
|||
metadata: dict[str, Any] | None = None,
|
||||
model_type: ModelType | None = None,
|
||||
) -> Model:
|
||||
"""Register a model.
|
||||
"""Register model.
|
||||
|
||||
Register a model.
|
||||
|
||||
:param model_id: The identifier of the model to register.
|
||||
:param provider_model_id: The identifier of the model in the provider.
|
||||
|
|
@ -156,7 +160,9 @@ class Models(Protocol):
|
|||
self,
|
||||
model_id: str,
|
||||
) -> None:
|
||||
"""Unregister a model.
|
||||
"""Unregister model.
|
||||
|
||||
Unregister a model.
|
||||
|
||||
:param model_id: The identifier of the model to unregister.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -94,7 +94,9 @@ class ListPromptsResponse(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Prompts(Protocol):
|
||||
"""Protocol for prompt management operations."""
|
||||
"""Prompts
|
||||
|
||||
Protocol for prompt management operations."""
|
||||
|
||||
@webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_prompts(self) -> ListPromptsResponse:
|
||||
|
|
@ -109,7 +111,9 @@ class Prompts(Protocol):
|
|||
self,
|
||||
prompt_id: str,
|
||||
) -> ListPromptsResponse:
|
||||
"""List all versions of a specific prompt.
|
||||
"""List prompt versions.
|
||||
|
||||
List all versions of a specific prompt.
|
||||
|
||||
:param prompt_id: The identifier of the prompt to list versions for.
|
||||
:returns: A ListPromptsResponse containing all versions of the prompt.
|
||||
|
|
@ -122,7 +126,9 @@ class Prompts(Protocol):
|
|||
prompt_id: str,
|
||||
version: int | None = None,
|
||||
) -> Prompt:
|
||||
"""Get a prompt by its identifier and optional version.
|
||||
"""Get prompt.
|
||||
|
||||
Get a prompt by its identifier and optional version.
|
||||
|
||||
:param prompt_id: The identifier of the prompt to get.
|
||||
:param version: The version of the prompt to get (defaults to latest).
|
||||
|
|
@ -136,7 +142,9 @@ class Prompts(Protocol):
|
|||
prompt: str,
|
||||
variables: list[str] | None = None,
|
||||
) -> Prompt:
|
||||
"""Create a new prompt.
|
||||
"""Create prompt.
|
||||
|
||||
Create a new prompt.
|
||||
|
||||
:param prompt: The prompt text content with variable placeholders.
|
||||
:param variables: List of variable names that can be used in the prompt template.
|
||||
|
|
@ -153,7 +161,9 @@ class Prompts(Protocol):
|
|||
variables: list[str] | None = None,
|
||||
set_as_default: bool = True,
|
||||
) -> Prompt:
|
||||
"""Update an existing prompt (increments version).
|
||||
"""Update prompt.
|
||||
|
||||
Update an existing prompt (increments version).
|
||||
|
||||
:param prompt_id: The identifier of the prompt to update.
|
||||
:param prompt: The updated prompt text content.
|
||||
|
|
@ -169,7 +179,9 @@ class Prompts(Protocol):
|
|||
self,
|
||||
prompt_id: str,
|
||||
) -> None:
|
||||
"""Delete a prompt.
|
||||
"""Delete prompt.
|
||||
|
||||
Delete a prompt.
|
||||
|
||||
:param prompt_id: The identifier of the prompt to delete.
|
||||
"""
|
||||
|
|
@ -181,7 +193,9 @@ class Prompts(Protocol):
|
|||
prompt_id: str,
|
||||
version: int,
|
||||
) -> Prompt:
|
||||
"""Set which version of a prompt should be the default in get_prompt (latest).
|
||||
"""Set prompt version.
|
||||
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
|
||||
:param prompt_id: The identifier of the prompt.
|
||||
:param version: The version to set as default.
|
||||
|
|
|
|||
|
|
@ -42,13 +42,16 @@ class ListProvidersResponse(BaseModel):
|
|||
|
||||
@runtime_checkable
|
||||
class Providers(Protocol):
|
||||
"""
|
||||
"""Providers
|
||||
|
||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||
"""
|
||||
|
||||
@webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def list_providers(self) -> ListProvidersResponse:
|
||||
"""List all available providers.
|
||||
"""List providers.
|
||||
|
||||
List all available providers.
|
||||
|
||||
:returns: A ListProvidersResponse containing information about all providers.
|
||||
"""
|
||||
|
|
@ -56,7 +59,9 @@ class Providers(Protocol):
|
|||
|
||||
@webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def inspect_provider(self, provider_id: str) -> ProviderInfo:
|
||||
"""Get detailed information about a specific provider.
|
||||
"""Get provider.
|
||||
|
||||
Get detailed information about a specific provider.
|
||||
|
||||
:param provider_id: The ID of the provider to inspect.
|
||||
:returns: A ProviderInfo object containing the provider's details.
|
||||
|
|
|
|||
|
|
@ -96,6 +96,11 @@ class ShieldStore(Protocol):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Safety(Protocol):
|
||||
"""Safety
|
||||
|
||||
OpenAI-compatible Moderations API.
|
||||
"""
|
||||
|
||||
shield_store: ShieldStore
|
||||
|
||||
@webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
|
||||
|
|
@ -105,7 +110,9 @@ class Safety(Protocol):
|
|||
messages: list[Message],
|
||||
params: dict[str, Any],
|
||||
) -> RunShieldResponse:
|
||||
"""Run a shield.
|
||||
"""Run shield.
|
||||
|
||||
Run a shield.
|
||||
|
||||
:param shield_id: The identifier of the shield to run.
|
||||
:param messages: The messages to run the shield on.
|
||||
|
|
@ -117,7 +124,9 @@ class Safety(Protocol):
|
|||
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
||||
"""Classifies if text and/or image inputs are potentially harmful.
|
||||
"""Create moderation.
|
||||
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
:param input: Input (or inputs) to classify.
|
||||
Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
|
||||
:param model: The content moderation model you would like to use.
|
||||
|
|
|
|||
|
|
@ -444,9 +444,21 @@ def _run_stack_build_command_from_build_config(
|
|||
|
||||
cprint("Build Successful!", color="green", file=sys.stderr)
|
||||
cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
|
||||
if build_config.image_type == LlamaStackImageType.VENV:
|
||||
cprint(
|
||||
"You can run the new Llama Stack distro via: "
|
||||
+ colored(f"llama stack run {run_config_file} --image-type {build_config.image_type}", "blue"),
|
||||
"You can run the new Llama Stack distro (after activating "
|
||||
+ colored(image_name, "cyan")
|
||||
+ ") via: "
|
||||
+ colored(f"llama stack run {run_config_file}", "blue"),
|
||||
color="green",
|
||||
file=sys.stderr,
|
||||
)
|
||||
elif build_config.image_type == LlamaStackImageType.CONTAINER:
|
||||
cprint(
|
||||
"You can run the container with: "
|
||||
+ colored(
|
||||
f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"
|
||||
),
|
||||
color="green",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -6,11 +6,18 @@
|
|||
|
||||
import argparse
|
||||
import os
|
||||
import ssl
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import uvicorn
|
||||
import yaml
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
from llama_stack.core.datatypes import LoggingConfig, StackRunConfig
|
||||
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
||||
|
|
@ -48,18 +55,12 @@ class StackRun(Subcommand):
|
|||
"--image-name",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Name of the image to run. Defaults to the current environment",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--env",
|
||||
action="append",
|
||||
help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.",
|
||||
metavar="KEY=VALUE",
|
||||
help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--image-type",
|
||||
type=str,
|
||||
help="Image Type used during the build. This can be only venv.",
|
||||
help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
|
||||
choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
|
||||
)
|
||||
self.parser.add_argument(
|
||||
|
|
@ -68,48 +69,22 @@ class StackRun(Subcommand):
|
|||
help="Start the UI server",
|
||||
)
|
||||
|
||||
def _resolve_config_and_distro(self, args: argparse.Namespace) -> tuple[Path | None, str | None]:
|
||||
"""Resolve config file path and distribution name from args.config"""
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||
|
||||
if not args.config:
|
||||
return None, None
|
||||
|
||||
config_file = Path(args.config)
|
||||
has_yaml_suffix = args.config.endswith(".yaml")
|
||||
distro_name = None
|
||||
|
||||
if not config_file.exists() and not has_yaml_suffix:
|
||||
# check if this is a distribution
|
||||
config_file = Path(REPO_ROOT) / "llama_stack" / "distributions" / args.config / "run.yaml"
|
||||
if config_file.exists():
|
||||
distro_name = args.config
|
||||
|
||||
if not config_file.exists() and not has_yaml_suffix:
|
||||
# check if it's a build config saved to ~/.llama dir
|
||||
config_file = Path(DISTRIBS_BASE_DIR / f"llamastack-{args.config}" / f"{args.config}-run.yaml")
|
||||
|
||||
if not config_file.exists():
|
||||
self.parser.error(
|
||||
f"File {str(config_file)} does not exist.\n\nPlease run `llama stack build` to generate (and optionally edit) a run.yaml file"
|
||||
)
|
||||
|
||||
if not config_file.is_file():
|
||||
self.parser.error(
|
||||
f"Config file must be a valid file path, '{config_file}' is not a file: type={type(config_file)}"
|
||||
)
|
||||
|
||||
return config_file, distro_name
|
||||
|
||||
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
||||
import yaml
|
||||
|
||||
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
||||
|
||||
if args.image_type or args.image_name:
|
||||
self.parser.error(
|
||||
"The --image-type and --image-name flags are no longer supported.\n\n"
|
||||
"Please activate your virtual environment manually before running `llama stack run`.\n\n"
|
||||
"For example:\n"
|
||||
" source /path/to/venv/bin/activate\n"
|
||||
" llama stack run <config>\n"
|
||||
)
|
||||
|
||||
if args.enable_ui:
|
||||
self._start_ui_development_server(args.port)
|
||||
image_type, image_name = args.image_type, args.image_name
|
||||
|
||||
if args.config:
|
||||
try:
|
||||
|
|
@ -121,10 +96,6 @@ class StackRun(Subcommand):
|
|||
else:
|
||||
config_file = None
|
||||
|
||||
# Check if config is required based on image type
|
||||
if image_type == ImageType.VENV.value and not config_file:
|
||||
self.parser.error("Config file is required for venv environment")
|
||||
|
||||
if config_file:
|
||||
logger.info(f"Using run configuration: {config_file}")
|
||||
|
||||
|
|
@ -139,50 +110,67 @@ class StackRun(Subcommand):
|
|||
os.makedirs(str(config.external_providers_dir), exist_ok=True)
|
||||
except AttributeError as e:
|
||||
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
|
||||
|
||||
self._uvicorn_run(config_file, args)
|
||||
|
||||
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
||||
if not config_file:
|
||||
self.parser.error("Config file is required")
|
||||
|
||||
config_file = resolve_config_or_distro(str(config_file), Mode.RUN)
|
||||
with open(config_file) as fp:
|
||||
config_contents = yaml.safe_load(fp)
|
||||
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
||||
logger_config = LoggingConfig(**cfg)
|
||||
else:
|
||||
config = None
|
||||
logger_config = None
|
||||
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
||||
|
||||
# If neither image type nor image name is provided, assume the server should be run directly
|
||||
# using the current environment packages.
|
||||
if not image_type and not image_name:
|
||||
logger.info("No image type or image name provided. Assuming environment packages.")
|
||||
from llama_stack.core.server.server import main as server_main
|
||||
port = args.port or config.server.port
|
||||
host = config.server.host or ["::", "0.0.0.0"]
|
||||
|
||||
# Build the server args from the current args passed to the CLI
|
||||
server_args = argparse.Namespace()
|
||||
for arg in vars(args):
|
||||
# If this is a function, avoid passing it
|
||||
# "args" contains:
|
||||
# func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
|
||||
if callable(getattr(args, arg)):
|
||||
continue
|
||||
if arg == "config":
|
||||
server_args.config = str(config_file)
|
||||
# Set the config file in environment so create_app can find it
|
||||
os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
|
||||
|
||||
uvicorn_config = {
|
||||
"factory": True,
|
||||
"host": host,
|
||||
"port": port,
|
||||
"lifespan": "on",
|
||||
"log_level": logger.getEffectiveLevel(),
|
||||
"log_config": logger_config,
|
||||
}
|
||||
|
||||
keyfile = config.server.tls_keyfile
|
||||
certfile = config.server.tls_certfile
|
||||
if keyfile and certfile:
|
||||
uvicorn_config["ssl_keyfile"] = config.server.tls_keyfile
|
||||
uvicorn_config["ssl_certfile"] = config.server.tls_certfile
|
||||
if config.server.tls_cafile:
|
||||
uvicorn_config["ssl_ca_certs"] = config.server.tls_cafile
|
||||
uvicorn_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
||||
|
||||
logger.info(
|
||||
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
||||
)
|
||||
else:
|
||||
setattr(server_args, arg, getattr(args, arg))
|
||||
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
||||
|
||||
# Run the server
|
||||
server_main(server_args)
|
||||
else:
|
||||
run_args = formulate_run_args(image_type, image_name)
|
||||
logger.info(f"Listening on {host}:{port}")
|
||||
|
||||
run_args.extend([str(args.port)])
|
||||
|
||||
if config_file:
|
||||
run_args.extend(["--config", str(config_file)])
|
||||
|
||||
if args.env:
|
||||
for env_var in args.env:
|
||||
if "=" not in env_var:
|
||||
self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format")
|
||||
return
|
||||
key, value = env_var.split("=", 1) # split on first = only
|
||||
if not key:
|
||||
self.parser.error(f"Environment variable '{env_var}' has empty key")
|
||||
return
|
||||
run_args.extend(["--env", f"{key}={value}"])
|
||||
|
||||
run_command(run_args)
|
||||
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
||||
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
||||
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
||||
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
||||
# SIGTERM (kill -15) works fine without this because Python doesn't
|
||||
# have a default handler for it.
|
||||
#
|
||||
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
||||
# signal handling but this is quite intrusive and not worth the effort.
|
||||
try:
|
||||
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config)
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
logger.info("Received interrupt signal, shutting down gracefully...")
|
||||
|
||||
def _start_ui_development_server(self, stack_server_port: int):
|
||||
logger.info("Attempting to start UI development server...")
|
||||
|
|
|
|||
|
|
@ -324,14 +324,14 @@ fi
|
|||
RUN pip uninstall -y uv
|
||||
EOF
|
||||
|
||||
# If a run config is provided, we use the --config flag
|
||||
# If a run config is provided, we use the llama stack CLI
|
||||
if [[ -n "$run_config" ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$RUN_CONFIG_PATH"]
|
||||
ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"]
|
||||
EOF
|
||||
elif [[ "$distro_or_config" != *.yaml ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$distro_or_config"]
|
||||
ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"]
|
||||
EOF
|
||||
fi
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ from llama_stack.providers.utils.sqlstore.sqlstore import (
|
|||
sqlstore_impl,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="openai::conversations")
|
||||
logger = get_logger(name=__name__, category="openai_conversations")
|
||||
|
||||
|
||||
class ConversationServiceConfig(BaseModel):
|
||||
|
|
|
|||
|
|
@ -243,6 +243,7 @@ def get_external_providers_from_module(
|
|||
spec = module.get_provider_spec()
|
||||
else:
|
||||
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
|
||||
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
||||
spec = ProviderSpec(
|
||||
api=Api(provider_api),
|
||||
provider_type=provider.provider_type,
|
||||
|
|
@ -251,8 +252,19 @@ def get_external_providers_from_module(
|
|||
config_class="",
|
||||
)
|
||||
provider_type = provider.provider_type
|
||||
# in the case we are building we CANNOT import this module of course because it has not been installed.
|
||||
# return a partially filled out spec that the build script will populate.
|
||||
if isinstance(spec, list):
|
||||
# optionally allow people to pass inline and remote provider specs as a returned list.
|
||||
# with the old method, users could pass in directories of specs using overlapping code
|
||||
# we want to ensure we preserve that flexibility in this method.
|
||||
logger.info(
|
||||
f"Detected a list of external provider specs from {provider.module} adding all to the registry"
|
||||
)
|
||||
for provider_spec in spec:
|
||||
if provider_spec.provider_type != provider.provider_type:
|
||||
continue
|
||||
logger.info(f"Adding {provider.provider_type} to registry")
|
||||
registry[Api(provider_api)][provider.provider_type] = provider_spec
|
||||
else:
|
||||
registry[Api(provider_api)][provider_type] = spec
|
||||
except ModuleNotFoundError as exc:
|
||||
raise ValueError(
|
||||
|
|
|
|||
|
|
@ -611,7 +611,7 @@ class InferenceRouter(Inference):
|
|||
completion_text += "".join(choice_data["content_parts"])
|
||||
|
||||
# Add metrics to the chunk
|
||||
if self.telemetry and chunk.usage:
|
||||
if self.telemetry and hasattr(chunk, "usage") and chunk.usage:
|
||||
metrics = self._construct_metrics(
|
||||
prompt_tokens=chunk.usage.prompt_tokens,
|
||||
completion_tokens=chunk.usage.completion_tokens,
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|||
try:
|
||||
models = await provider.list_models()
|
||||
except Exception as e:
|
||||
logger.warning(f"Model refresh failed for provider {provider_id}: {e}")
|
||||
logger.debug(f"Model refresh failed for provider {provider_id}: {e}")
|
||||
continue
|
||||
|
||||
self.listed_providers.add(provider_id)
|
||||
|
|
@ -67,6 +67,19 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|||
raise ValueError(f"Provider {model.provider_id} not found in the routing table")
|
||||
return self.impls_by_provider_id[model.provider_id]
|
||||
|
||||
async def has_model(self, model_id: str) -> bool:
|
||||
"""
|
||||
Check if a model exists in the routing table.
|
||||
|
||||
:param model_id: The model identifier to check
|
||||
:return: True if the model exists, False otherwise
|
||||
"""
|
||||
try:
|
||||
await lookup_model(self, model_id)
|
||||
return True
|
||||
except ModelNotFoundError:
|
||||
return False
|
||||
|
||||
async def register_model(
|
||||
self,
|
||||
model_id: str,
|
||||
|
|
|
|||
|
|
@ -245,3 +245,65 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
|||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
)
|
||||
|
||||
async def openai_create_vector_store_file_batch(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_ids: list[str],
|
||||
attributes: dict[str, Any] | None = None,
|
||||
chunking_strategy: Any | None = None,
|
||||
):
|
||||
await self.assert_action_allowed("update", "vector_db", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_create_vector_store_file_batch(
|
||||
vector_store_id=vector_store_id,
|
||||
file_ids=file_ids,
|
||||
attributes=attributes,
|
||||
chunking_strategy=chunking_strategy,
|
||||
)
|
||||
|
||||
async def openai_retrieve_vector_store_file_batch(
|
||||
self,
|
||||
batch_id: str,
|
||||
vector_store_id: str,
|
||||
):
|
||||
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_file_batch(
|
||||
batch_id=batch_id,
|
||||
vector_store_id=vector_store_id,
|
||||
)
|
||||
|
||||
async def openai_list_files_in_vector_store_file_batch(
|
||||
self,
|
||||
batch_id: str,
|
||||
vector_store_id: str,
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
filter: str | None = None,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
):
|
||||
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_list_files_in_vector_store_file_batch(
|
||||
batch_id=batch_id,
|
||||
vector_store_id=vector_store_id,
|
||||
after=after,
|
||||
before=before,
|
||||
filter=filter,
|
||||
limit=limit,
|
||||
order=order,
|
||||
)
|
||||
|
||||
async def openai_cancel_vector_store_file_batch(
|
||||
self,
|
||||
batch_id: str,
|
||||
vector_store_id: str,
|
||||
):
|
||||
await self.assert_action_allowed("update", "vector_db", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_cancel_vector_store_file_batch(
|
||||
batch_id=batch_id,
|
||||
vector_store_id=vector_store_id,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import functools
|
||||
|
|
@ -12,7 +11,6 @@ import inspect
|
|||
import json
|
||||
import logging # allow-direct-logging
|
||||
import os
|
||||
import ssl
|
||||
import sys
|
||||
import traceback
|
||||
import warnings
|
||||
|
|
@ -35,7 +33,6 @@ from pydantic import BaseModel, ValidationError
|
|||
|
||||
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.cli.utils import add_config_distro_args, get_config_from_args
|
||||
from llama_stack.core.access_control.access_control import AccessDeniedError
|
||||
from llama_stack.core.datatypes import (
|
||||
AuthenticationRequiredError,
|
||||
|
|
@ -55,7 +52,6 @@ from llama_stack.core.stack import (
|
|||
Stack,
|
||||
cast_image_name_to_string,
|
||||
replace_env_vars,
|
||||
validate_env_pair,
|
||||
)
|
||||
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
|
|
@ -333,23 +329,18 @@ class ClientVersionMiddleware:
|
|||
return await self.app(scope, receive, send)
|
||||
|
||||
|
||||
def create_app(
|
||||
config_file: str | None = None,
|
||||
env_vars: list[str] | None = None,
|
||||
) -> StackApp:
|
||||
def create_app() -> StackApp:
|
||||
"""Create and configure the FastAPI application.
|
||||
|
||||
Args:
|
||||
config_file: Path to config file. If None, uses LLAMA_STACK_CONFIG env var or default resolution.
|
||||
env_vars: List of environment variables in KEY=value format.
|
||||
disable_version_check: Whether to disable version checking. If None, uses LLAMA_STACK_DISABLE_VERSION_CHECK env var.
|
||||
This factory function reads configuration from environment variables:
|
||||
- LLAMA_STACK_CONFIG: Path to config file (required)
|
||||
|
||||
Returns:
|
||||
Configured StackApp instance.
|
||||
"""
|
||||
config_file = config_file or os.getenv("LLAMA_STACK_CONFIG")
|
||||
config_file = os.getenv("LLAMA_STACK_CONFIG")
|
||||
if config_file is None:
|
||||
raise ValueError("No config file provided and LLAMA_STACK_CONFIG env var is not set")
|
||||
raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
|
||||
|
||||
config_file = resolve_config_or_distro(config_file, Mode.RUN)
|
||||
|
||||
|
|
@ -361,16 +352,6 @@ def create_app(
|
|||
logger_config = LoggingConfig(**cfg)
|
||||
logger = get_logger(name=__name__, category="core::server", config=logger_config)
|
||||
|
||||
if env_vars:
|
||||
for env_pair in env_vars:
|
||||
try:
|
||||
key, value = validate_env_pair(env_pair)
|
||||
logger.info(f"Setting environment variable {key} => {value}")
|
||||
os.environ[key] = value
|
||||
except ValueError as e:
|
||||
logger.error(f"Error: {str(e)}")
|
||||
raise ValueError(f"Invalid environment variable format: {env_pair}") from e
|
||||
|
||||
config = replace_env_vars(config_contents)
|
||||
config = StackRunConfig(**cast_image_name_to_string(config))
|
||||
|
||||
|
|
@ -494,101 +475,6 @@ def create_app(
|
|||
return app
|
||||
|
||||
|
||||
def main(args: argparse.Namespace | None = None):
|
||||
"""Start the LlamaStack server."""
|
||||
parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
|
||||
|
||||
add_config_distro_args(parser)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=int(os.getenv("LLAMA_STACK_PORT", 8321)),
|
||||
help="Port to listen on",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--env",
|
||||
action="append",
|
||||
help="Environment variables in KEY=value format. Can be specified multiple times.",
|
||||
)
|
||||
|
||||
# Determine whether the server args are being passed by the "run" command, if this is the case
|
||||
# the args will be passed as a Namespace object to the main function, otherwise they will be
|
||||
# parsed from the command line
|
||||
if args is None:
|
||||
args = parser.parse_args()
|
||||
|
||||
config_or_distro = get_config_from_args(args)
|
||||
|
||||
try:
|
||||
app = create_app(
|
||||
config_file=config_or_distro,
|
||||
env_vars=args.env,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating app: {str(e)}")
|
||||
sys.exit(1)
|
||||
|
||||
config_file = resolve_config_or_distro(config_or_distro, Mode.RUN)
|
||||
with open(config_file) as fp:
|
||||
config_contents = yaml.safe_load(fp)
|
||||
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
|
||||
logger_config = LoggingConfig(**cfg)
|
||||
else:
|
||||
logger_config = None
|
||||
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
|
||||
|
||||
import uvicorn
|
||||
|
||||
# Configure SSL if certificates are provided
|
||||
port = args.port or config.server.port
|
||||
|
||||
ssl_config = None
|
||||
keyfile = config.server.tls_keyfile
|
||||
certfile = config.server.tls_certfile
|
||||
|
||||
if keyfile and certfile:
|
||||
ssl_config = {
|
||||
"ssl_keyfile": keyfile,
|
||||
"ssl_certfile": certfile,
|
||||
}
|
||||
if config.server.tls_cafile:
|
||||
ssl_config["ssl_ca_certs"] = config.server.tls_cafile
|
||||
ssl_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
|
||||
logger.info(
|
||||
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
|
||||
)
|
||||
else:
|
||||
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
||||
|
||||
listen_host = config.server.host or ["::", "0.0.0.0"]
|
||||
logger.info(f"Listening on {listen_host}:{port}")
|
||||
|
||||
uvicorn_config = {
|
||||
"app": app,
|
||||
"host": listen_host,
|
||||
"port": port,
|
||||
"lifespan": "on",
|
||||
"log_level": logger.getEffectiveLevel(),
|
||||
"log_config": logger_config,
|
||||
}
|
||||
if ssl_config:
|
||||
uvicorn_config.update(ssl_config)
|
||||
|
||||
# We need to catch KeyboardInterrupt because uvicorn's signal handling
|
||||
# re-raises SIGINT signals using signal.raise_signal(), which Python
|
||||
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
|
||||
# stack trace when using Ctrl+C or kill -2 (SIGINT).
|
||||
# SIGTERM (kill -15) works fine without this because Python doesn't
|
||||
# have a default handler for it.
|
||||
#
|
||||
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
|
||||
# signal handling but this is quite intrusive and not worth the effort.
|
||||
try:
|
||||
asyncio.run(uvicorn.Server(uvicorn.Config(**uvicorn_config)).serve())
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
logger.info("Received interrupt signal, shutting down gracefully...")
|
||||
|
||||
|
||||
def _log_run_config(run_config: StackRunConfig):
|
||||
"""Logs the run config with redacted fields and disabled providers removed."""
|
||||
logger.info("Run configuration:")
|
||||
|
|
@ -615,7 +501,3 @@ def remove_disabled_providers(obj):
|
|||
return [item for item in (remove_disabled_providers(i) for i in obj) if item is not None]
|
||||
else:
|
||||
return obj
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -274,22 +274,6 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
|
|||
return config_dict
|
||||
|
||||
|
||||
def validate_env_pair(env_pair: str) -> tuple[str, str]:
|
||||
"""Validate and split an environment variable key-value pair."""
|
||||
try:
|
||||
key, value = env_pair.split("=", 1)
|
||||
key = key.strip()
|
||||
if not key:
|
||||
raise ValueError(f"Empty key in environment variable pair: {env_pair}")
|
||||
if not all(c.isalnum() or c == "_" for c in key):
|
||||
raise ValueError(f"Key must contain only alphanumeric characters and underscores: {key}")
|
||||
return key, value
|
||||
except ValueError as e:
|
||||
raise ValueError(
|
||||
f"Invalid environment variable format '{env_pair}': {str(e)}. Expected format: KEY=value"
|
||||
) from e
|
||||
|
||||
|
||||
def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None:
|
||||
"""Add internal implementations (inspect and providers) to the implementations dictionary.
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ error_handler() {
|
|||
trap 'error_handler ${LINENO}' ERR
|
||||
|
||||
if [ $# -lt 3 ]; then
|
||||
echo "Usage: $0 <env_type> <env_path_or_name> <port> [--config <yaml_config>] [--env KEY=VALUE]..."
|
||||
echo "Usage: $0 <env_type> <env_path_or_name> <port> [--config <yaml_config>]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
@ -43,7 +43,6 @@ SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
|||
|
||||
# Initialize variables
|
||||
yaml_config=""
|
||||
env_vars=""
|
||||
other_args=""
|
||||
|
||||
# Process remaining arguments
|
||||
|
|
@ -58,15 +57,6 @@ while [[ $# -gt 0 ]]; do
|
|||
exit 1
|
||||
fi
|
||||
;;
|
||||
--env)
|
||||
if [[ -n "$2" ]]; then
|
||||
env_vars="$env_vars --env $2"
|
||||
shift 2
|
||||
else
|
||||
echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
other_args="$other_args $1"
|
||||
shift
|
||||
|
|
@ -116,10 +106,9 @@ if [[ "$env_type" == "venv" ]]; then
|
|||
yaml_config_arg=""
|
||||
fi
|
||||
|
||||
$PYTHON_BINARY -m llama_stack.core.server.server \
|
||||
llama stack run \
|
||||
$yaml_config_arg \
|
||||
--port "$port" \
|
||||
$env_vars \
|
||||
$other_args
|
||||
elif [[ "$env_type" == "container" ]]; then
|
||||
echo -e "${RED}Warning: Llama Stack no longer supports running Containers via the 'llama stack run' command.${NC}"
|
||||
|
|
|
|||
|
|
@ -98,7 +98,10 @@ class DiskDistributionRegistry(DistributionRegistry):
|
|||
existing_obj = await self.get(obj.type, obj.identifier)
|
||||
# dont register if the object's providerid already exists
|
||||
if existing_obj and existing_obj.provider_id == obj.provider_id:
|
||||
return False
|
||||
raise ValueError(
|
||||
f"Provider '{obj.provider_id}' is already registered."
|
||||
f"Unregister the existing provider first before registering it again."
|
||||
)
|
||||
|
||||
await self.kvstore.set(
|
||||
KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
|
||||
|
|
|
|||
|
|
@ -117,11 +117,11 @@ docker run -it \
|
|||
# NOTE: mount the llama-stack directory if testing local changes else not needed
|
||||
-v $HOME/git/llama-stack:/app/llama-stack-source \
|
||||
# localhost/distribution-dell:dev if building / testing locally
|
||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
-e DEH_URL=$DEH_URL \
|
||||
-e CHROMA_URL=$CHROMA_URL \
|
||||
llamastack/distribution-{{ name }}\
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
--port $LLAMA_STACK_PORT
|
||||
|
||||
```
|
||||
|
||||
|
|
@ -142,14 +142,14 @@ docker run \
|
|||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
-e DEH_URL=$DEH_URL \
|
||||
-e SAFETY_MODEL=$SAFETY_MODEL \
|
||||
-e DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||
-e CHROMA_URL=$CHROMA_URL \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||
--env DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
### Via Conda
|
||||
|
|
@ -158,21 +158,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
|
|||
|
||||
```bash
|
||||
llama stack build --distro {{ name }} --image-type conda
|
||||
llama stack run {{ name }}
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
DEH_URL=$DEH_URL \
|
||||
CHROMA_URL=$CHROMA_URL \
|
||||
llama stack run {{ name }} \
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
||||
```bash
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
DEH_URL=$DEH_URL \
|
||||
SAFETY_MODEL=$SAFETY_MODEL \
|
||||
DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||
CHROMA_URL=$CHROMA_URL \
|
||||
llama stack run ./run-with-safety.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||
--env DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
|
|
|||
|
|
@ -72,9 +72,9 @@ docker run \
|
|||
--gpu all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
|
@ -86,10 +86,10 @@ docker run \
|
|||
--gpu all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
-e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
### Via venv
|
||||
|
|
@ -98,16 +98,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
|
|||
|
||||
```bash
|
||||
llama stack build --distro {{ name }} --image-type venv
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
llama stack run distributions/{{ name }}/run.yaml \
|
||||
--port 8321 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
--port 8321
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
||||
```bash
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
|
||||
llama stack run distributions/{{ name }}/run-with-safety.yaml \
|
||||
--port 8321 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
--port 8321
|
||||
```
|
||||
|
|
|
|||
|
|
@ -118,10 +118,10 @@ docker run \
|
|||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
### Via venv
|
||||
|
|
@ -131,10 +131,10 @@ If you've set up your local development environment, you can also build the imag
|
|||
```bash
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
llama stack build --distro nvidia --image-type venv
|
||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
llama stack run ./run.yaml \
|
||||
--port 8321 \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL
|
||||
--port 8321
|
||||
```
|
||||
|
||||
## Example Notebooks
|
||||
|
|
|
|||
|
|
@ -3,3 +3,5 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .watsonx import get_distribution_template # noqa: F401
|
||||
|
|
|
|||
|
|
@ -3,44 +3,33 @@ distribution_spec:
|
|||
description: Use watsonx for running LLM inference
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: watsonx
|
||||
provider_type: remote::watsonx
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
- provider_type: remote::watsonx
|
||||
- provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
- provider_type: inline::faiss
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
- provider_type: inline::llama-guard
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
- provider_type: inline::meta-reference
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
- provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
- provider_type: remote::huggingface
|
||||
- provider_type: inline::localfs
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
- provider_type: inline::basic
|
||||
- provider_type: inline::llm-as-judge
|
||||
- provider_type: inline::braintrust
|
||||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
- provider_type: inline::rag-runtime
|
||||
- provider_type: remote::model-context-protocol
|
||||
files:
|
||||
- provider_type: inline::localfs
|
||||
image_type: venv
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- sqlalchemy[asyncio]
|
||||
- aiosqlite
|
||||
- aiosqlite
|
||||
|
|
|
|||
|
|
@ -4,13 +4,13 @@ apis:
|
|||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- files
|
||||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
- files
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: watsonx
|
||||
|
|
@ -19,8 +19,6 @@ providers:
|
|||
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||
api_key: ${env.WATSONX_API_KEY:=}
|
||||
project_id: ${env.WATSONX_PROJECT_ID:=}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
|
@ -48,7 +46,7 @@ providers:
|
|||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sinks: ${env.TELEMETRY_SINKS:=sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
eval:
|
||||
|
|
@ -109,102 +107,7 @@ metadata_store:
|
|||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-3-70b-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-3-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-2-13b-chat
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-2-13b-chat
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-2-13b
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-2-13b-chat
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-1-70b-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-1-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-1-70b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-1-8b-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-1-8b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-2-11b-vision-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-2-1b-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-1b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-1b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-2-3b-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-3b-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-3-2-90b-vision-instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/llama-guard-3-11b-vision
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||
provider_id: watsonx
|
||||
provider_model_id: meta-llama/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
models: []
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
|
|
|
|||
|
|
@ -4,17 +4,11 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
|
||||
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
|
||||
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
||||
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
|
||||
from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
|
||||
|
||||
|
||||
def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
||||
|
|
@ -52,15 +46,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
|||
config=WatsonXConfig.sample_run_config(),
|
||||
)
|
||||
|
||||
embedding_provider = Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
|
||||
available_models = {
|
||||
"watsonx": MODEL_ENTRIES,
|
||||
}
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
|
|
@ -72,36 +57,25 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
|||
),
|
||||
]
|
||||
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id="sentence-transformers",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
|
||||
files_provider = Provider(
|
||||
provider_id="meta-reference-files",
|
||||
provider_type="inline::localfs",
|
||||
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
)
|
||||
default_models, _ = get_model_registry(available_models)
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="remote_hosted",
|
||||
description="Use watsonx for running LLM inference",
|
||||
container_image=None,
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
template_path=None,
|
||||
providers=providers,
|
||||
available_models_by_provider=available_models,
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"inference": [inference_provider],
|
||||
"files": [files_provider],
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_models=[],
|
||||
default_tool_groups=default_tool_groups,
|
||||
),
|
||||
},
|
||||
|
|
|
|||
|
|
@ -31,12 +31,17 @@ CATEGORIES = [
|
|||
"client",
|
||||
"telemetry",
|
||||
"openai_responses",
|
||||
"openai_conversations",
|
||||
"testing",
|
||||
"providers",
|
||||
"models",
|
||||
"files",
|
||||
"vector_io",
|
||||
"tool_runtime",
|
||||
"cli",
|
||||
"post_training",
|
||||
"scoring",
|
||||
"tests",
|
||||
]
|
||||
UNCATEGORIZED = "uncategorized"
|
||||
|
||||
|
|
@ -128,7 +133,10 @@ def strip_rich_markup(text):
|
|||
|
||||
class CustomRichHandler(RichHandler):
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs["console"] = Console()
|
||||
# Set a reasonable default width for console output, especially when redirected to files
|
||||
console_width = int(os.environ.get("LLAMA_STACK_LOG_WIDTH", "120"))
|
||||
# Don't force terminal codes to avoid ANSI escape codes in log files
|
||||
kwargs["console"] = Console(width=console_width)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def emit(self, record):
|
||||
|
|
@ -261,11 +269,12 @@ def get_logger(
|
|||
if root_category in _category_levels:
|
||||
log_level = _category_levels[root_category]
|
||||
else:
|
||||
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
|
||||
if category != UNCATEGORIZED:
|
||||
logging.warning(
|
||||
f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}"
|
||||
raise ValueError(
|
||||
f"Unknown logging category: {category}. To resolve, choose a valid category from the CATEGORIES list "
|
||||
f"or add it to the CATEGORIES list. Available categories: {CATEGORIES}"
|
||||
)
|
||||
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
|
||||
logger.setLevel(log_level)
|
||||
return logging.LoggerAdapter(logger, {"category": category})
|
||||
|
||||
|
|
|
|||
|
|
@ -11,19 +11,13 @@
|
|||
# top-level folder for each specific model found within the models/ directory at
|
||||
# the top-level of this source tree.
|
||||
|
||||
import json
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.models.llama.datatypes import (
|
||||
RawContent,
|
||||
RawMediaItem,
|
||||
RawMessage,
|
||||
RawTextItem,
|
||||
StopReason,
|
||||
ToolCall,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.models.llama.llama4.tokenizer import Tokenizer
|
||||
|
|
@ -175,25 +169,6 @@ def llama3_1_builtin_code_interpreter_dialog(tool_prompt_format=ToolPromptFormat
|
|||
return messages
|
||||
|
||||
|
||||
def llama3_1_builtin_tool_call_with_image_dialog(
|
||||
tool_prompt_format=ToolPromptFormat.json,
|
||||
):
|
||||
this_dir = Path(__file__).parent
|
||||
with open(this_dir / "llama3/dog.jpg", "rb") as f:
|
||||
img = f.read()
|
||||
|
||||
interface = LLama31Interface(tool_prompt_format)
|
||||
|
||||
messages = interface.system_messages(**system_message_builtin_tools_only())
|
||||
messages += interface.user_message(content=[RawMediaItem(data=img), RawTextItem(text="What is this dog breed?")])
|
||||
messages += interface.assistant_response_messages(
|
||||
"Based on the description of the dog in the image, it appears to be a small breed dog, possibly a terrier mix",
|
||||
StopReason.end_of_turn,
|
||||
)
|
||||
messages += interface.user_message("Search the web for some food recommendations for the indentified breed")
|
||||
return messages
|
||||
|
||||
|
||||
def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
|
||||
interface = LLama31Interface(tool_prompt_format)
|
||||
|
||||
|
|
@ -202,35 +177,6 @@ def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
|
|||
return messages
|
||||
|
||||
|
||||
def llama3_1_e2e_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
|
||||
tool_response = json.dumps(["great song1", "awesome song2", "cool song3"])
|
||||
interface = LLama31Interface(tool_prompt_format)
|
||||
|
||||
messages = interface.system_messages(**system_message_custom_tools_only())
|
||||
messages += interface.user_message(content="Use tools to get latest trending songs")
|
||||
messages.append(
|
||||
RawMessage(
|
||||
role="assistant",
|
||||
content="",
|
||||
stop_reason=StopReason.end_of_message,
|
||||
tool_calls=[
|
||||
ToolCall(
|
||||
call_id="call_id",
|
||||
tool_name="trending_songs",
|
||||
arguments={"n": "10", "genre": "latest"},
|
||||
)
|
||||
],
|
||||
),
|
||||
)
|
||||
messages.append(
|
||||
RawMessage(
|
||||
role="assistant",
|
||||
content=tool_response,
|
||||
)
|
||||
)
|
||||
return messages
|
||||
|
||||
|
||||
def llama3_2_user_assistant_conversation():
|
||||
return UseCase(
|
||||
title="User and assistant conversation",
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
|||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(__name__, "tokenizer_utils")
|
||||
logger = get_logger(__name__, "models")
|
||||
|
||||
|
||||
def load_bpe_file(model_path: Path) -> dict[bytes, int]:
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
|
|||
deps[Api.tool_runtime],
|
||||
deps[Api.tool_groups],
|
||||
policy,
|
||||
Api.telemetry in deps,
|
||||
)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
import copy
|
||||
import json
|
||||
import re
|
||||
import secrets
|
||||
import string
|
||||
import uuid
|
||||
import warnings
|
||||
from collections.abc import AsyncGenerator
|
||||
|
|
@ -84,11 +82,6 @@ from llama_stack.providers.utils.telemetry import tracing
|
|||
from .persistence import AgentPersistence
|
||||
from .safety import SafetyException, ShieldRunnerMixin
|
||||
|
||||
|
||||
def make_random_string(length: int = 8):
|
||||
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
|
||||
|
||||
|
||||
TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
|
||||
MEMORY_QUERY_TOOL = "knowledge_search"
|
||||
WEB_SEARCH_TOOL = "web_search"
|
||||
|
|
@ -110,6 +103,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
persistence_store: KVStore,
|
||||
created_at: str,
|
||||
policy: list[AccessRule],
|
||||
telemetry_enabled: bool = False,
|
||||
):
|
||||
self.agent_id = agent_id
|
||||
self.agent_config = agent_config
|
||||
|
|
@ -120,6 +114,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
self.tool_runtime_api = tool_runtime_api
|
||||
self.tool_groups_api = tool_groups_api
|
||||
self.created_at = created_at
|
||||
self.telemetry_enabled = telemetry_enabled
|
||||
|
||||
ShieldRunnerMixin.__init__(
|
||||
self,
|
||||
|
|
@ -188,8 +183,9 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
async def create_and_execute_turn(self, request: AgentTurnCreateRequest) -> AsyncGenerator:
|
||||
turn_id = str(uuid.uuid4())
|
||||
if self.telemetry_enabled:
|
||||
span = tracing.get_current_span()
|
||||
if span:
|
||||
if span is not None:
|
||||
span.set_attribute("session_id", request.session_id)
|
||||
span.set_attribute("agent_id", self.agent_id)
|
||||
span.set_attribute("request", request.model_dump_json())
|
||||
|
|
@ -202,8 +198,9 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
yield chunk
|
||||
|
||||
async def resume_turn(self, request: AgentTurnResumeRequest) -> AsyncGenerator:
|
||||
if self.telemetry_enabled:
|
||||
span = tracing.get_current_span()
|
||||
if span:
|
||||
if span is not None:
|
||||
span.set_attribute("agent_id", self.agent_id)
|
||||
span.set_attribute("session_id", request.session_id)
|
||||
span.set_attribute("request", request.model_dump_json())
|
||||
|
|
@ -395,9 +392,12 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
touchpoint: str,
|
||||
) -> AsyncGenerator:
|
||||
async with tracing.span("run_shields") as span:
|
||||
if self.telemetry_enabled and span is not None:
|
||||
span.set_attribute("input", [m.model_dump_json() for m in messages])
|
||||
if len(shields) == 0:
|
||||
span.set_attribute("output", "no shields")
|
||||
|
||||
if len(shields) == 0:
|
||||
return
|
||||
|
||||
step_id = str(uuid.uuid4())
|
||||
|
|
@ -430,6 +430,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
)
|
||||
)
|
||||
)
|
||||
if self.telemetry_enabled and span is not None:
|
||||
span.set_attribute("output", e.violation.model_dump_json())
|
||||
|
||||
yield CompletionMessage(
|
||||
|
|
@ -453,6 +454,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
)
|
||||
)
|
||||
)
|
||||
if self.telemetry_enabled and span is not None:
|
||||
span.set_attribute("output", "no violations")
|
||||
|
||||
async def _run(
|
||||
|
|
@ -518,6 +520,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
stop_reason: StopReason | None = None
|
||||
|
||||
async with tracing.span("inference") as span:
|
||||
if self.telemetry_enabled and span is not None:
|
||||
if self.agent_config.name:
|
||||
span.set_attribute("agent_name", self.agent_config.name)
|
||||
|
||||
|
|
@ -637,6 +640,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
else:
|
||||
raise ValueError(f"Unexpected delta type {type(delta)}")
|
||||
|
||||
if self.telemetry_enabled and span is not None:
|
||||
span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn)
|
||||
span.set_attribute(
|
||||
"input",
|
||||
|
|
@ -756,7 +760,9 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
{
|
||||
"tool_name": tool_call.tool_name,
|
||||
"input": message.model_dump_json(),
|
||||
},
|
||||
}
|
||||
if self.telemetry_enabled
|
||||
else {},
|
||||
) as span:
|
||||
tool_execution_start_time = datetime.now(UTC).isoformat()
|
||||
tool_result = await self.execute_tool_call_maybe(
|
||||
|
|
@ -771,6 +777,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
call_id=tool_call.call_id,
|
||||
content=tool_result.content,
|
||||
)
|
||||
if self.telemetry_enabled and span is not None:
|
||||
span.set_attribute("output", result_message.model_dump_json())
|
||||
|
||||
# Store tool execution step
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
tool_runtime_api: ToolRuntime,
|
||||
tool_groups_api: ToolGroups,
|
||||
policy: list[AccessRule],
|
||||
telemetry_enabled: bool = False,
|
||||
):
|
||||
self.config = config
|
||||
self.inference_api = inference_api
|
||||
|
|
@ -71,6 +72,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
self.safety_api = safety_api
|
||||
self.tool_runtime_api = tool_runtime_api
|
||||
self.tool_groups_api = tool_groups_api
|
||||
self.telemetry_enabled = telemetry_enabled
|
||||
|
||||
self.in_memory_store = InmemoryKVStoreImpl()
|
||||
self.openai_responses_impl: OpenAIResponsesImpl | None = None
|
||||
|
|
@ -135,6 +137,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
),
|
||||
created_at=agent_info.created_at,
|
||||
policy=self.policy,
|
||||
telemetry_enabled=self.telemetry_enabled,
|
||||
)
|
||||
|
||||
async def create_agent_session(
|
||||
|
|
|
|||
|
|
@ -269,7 +269,7 @@ class OpenAIResponsesImpl:
|
|||
response_tools=tools,
|
||||
temperature=temperature,
|
||||
response_format=response_format,
|
||||
inputs=input,
|
||||
inputs=all_input,
|
||||
)
|
||||
|
||||
# Create orchestrator and delegate streaming logic
|
||||
|
|
|
|||
|
|
@ -97,6 +97,8 @@ class StreamingResponseOrchestrator:
|
|||
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = {}
|
||||
# Track final messages after all tool executions
|
||||
self.final_messages: list[OpenAIMessageParam] = []
|
||||
# mapping for annotations
|
||||
self.citation_files: dict[str, str] = {}
|
||||
|
||||
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
# Initialize output messages
|
||||
|
|
@ -126,6 +128,7 @@ class StreamingResponseOrchestrator:
|
|||
# Text is the default response format for chat completion so don't need to pass it
|
||||
# (some providers don't support non-empty response_format when tools are present)
|
||||
response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
|
||||
logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}")
|
||||
completion_result = await self.inference_api.openai_chat_completion(
|
||||
model=self.ctx.model,
|
||||
messages=messages,
|
||||
|
|
@ -160,7 +163,7 @@ class StreamingResponseOrchestrator:
|
|||
# Handle choices with no tool calls
|
||||
for choice in current_response.choices:
|
||||
if not (choice.message.tool_calls and self.ctx.response_tools):
|
||||
output_messages.append(await convert_chat_choice_to_response_message(choice))
|
||||
output_messages.append(await convert_chat_choice_to_response_message(choice, self.citation_files))
|
||||
|
||||
# Execute tool calls and coordinate results
|
||||
async for stream_event in self._coordinate_tool_execution(
|
||||
|
|
@ -172,6 +175,8 @@ class StreamingResponseOrchestrator:
|
|||
):
|
||||
yield stream_event
|
||||
|
||||
messages = next_turn_messages
|
||||
|
||||
if not function_tool_calls and not non_function_tool_calls:
|
||||
break
|
||||
|
||||
|
|
@ -184,9 +189,7 @@ class StreamingResponseOrchestrator:
|
|||
logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}")
|
||||
break
|
||||
|
||||
messages = next_turn_messages
|
||||
|
||||
self.final_messages = messages.copy() + [current_response.choices[0].message]
|
||||
self.final_messages = messages.copy()
|
||||
|
||||
# Create final response
|
||||
final_response = OpenAIResponseObject(
|
||||
|
|
@ -211,6 +214,8 @@ class StreamingResponseOrchestrator:
|
|||
|
||||
for choice in current_response.choices:
|
||||
next_turn_messages.append(choice.message)
|
||||
logger.debug(f"Choice message content: {choice.message.content}")
|
||||
logger.debug(f"Choice message tool_calls: {choice.message.tool_calls}")
|
||||
|
||||
if choice.message.tool_calls and self.ctx.response_tools:
|
||||
for tool_call in choice.message.tool_calls:
|
||||
|
|
@ -227,9 +232,11 @@ class StreamingResponseOrchestrator:
|
|||
non_function_tool_calls.append(tool_call)
|
||||
else:
|
||||
logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}")
|
||||
next_turn_messages.pop()
|
||||
else:
|
||||
logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}")
|
||||
approvals.append(tool_call)
|
||||
next_turn_messages.pop()
|
||||
else:
|
||||
non_function_tool_calls.append(tool_call)
|
||||
|
||||
|
|
@ -470,6 +477,8 @@ class StreamingResponseOrchestrator:
|
|||
tool_call_log = result.final_output_message
|
||||
tool_response_message = result.final_input_message
|
||||
self.sequence_number = result.sequence_number
|
||||
if result.citation_files:
|
||||
self.citation_files.update(result.citation_files)
|
||||
|
||||
if tool_call_log:
|
||||
output_messages.append(tool_call_log)
|
||||
|
|
|
|||
|
|
@ -94,7 +94,10 @@ class ToolExecutor:
|
|||
|
||||
# Yield the final result
|
||||
yield ToolExecutionResult(
|
||||
sequence_number=sequence_number, final_output_message=output_message, final_input_message=input_message
|
||||
sequence_number=sequence_number,
|
||||
final_output_message=output_message,
|
||||
final_input_message=input_message,
|
||||
citation_files=result.metadata.get("citation_files") if result and result.metadata else None,
|
||||
)
|
||||
|
||||
async def _execute_knowledge_search_via_vector_store(
|
||||
|
|
@ -129,8 +132,6 @@ class ToolExecutor:
|
|||
for results in all_results:
|
||||
search_results.extend(results)
|
||||
|
||||
# Convert search results to tool result format matching memory.py
|
||||
# Format the results as interleaved content similar to memory.py
|
||||
content_items = []
|
||||
content_items.append(
|
||||
TextContentItem(
|
||||
|
|
@ -138,27 +139,58 @@ class ToolExecutor:
|
|||
)
|
||||
)
|
||||
|
||||
unique_files = set()
|
||||
for i, result_item in enumerate(search_results):
|
||||
chunk_text = result_item.content[0].text if result_item.content else ""
|
||||
metadata_text = f"document_id: {result_item.file_id}, score: {result_item.score}"
|
||||
# Get file_id from attributes if result_item.file_id is empty
|
||||
file_id = result_item.file_id or (
|
||||
result_item.attributes.get("document_id") if result_item.attributes else None
|
||||
)
|
||||
metadata_text = f"document_id: {file_id}, score: {result_item.score}"
|
||||
if result_item.attributes:
|
||||
metadata_text += f", attributes: {result_item.attributes}"
|
||||
text_content = f"[{i + 1}] {metadata_text}\n{chunk_text}\n"
|
||||
|
||||
text_content = f"[{i + 1}] {metadata_text} (cite as <|{file_id}|>)\n{chunk_text}\n"
|
||||
content_items.append(TextContentItem(text=text_content))
|
||||
unique_files.add(file_id)
|
||||
|
||||
content_items.append(TextContentItem(text="END of knowledge_search tool results.\n"))
|
||||
|
||||
citation_instruction = ""
|
||||
if unique_files:
|
||||
citation_instruction = (
|
||||
" Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). "
|
||||
"Do not add extra punctuation. Use only the file IDs provided (do not invent new ones)."
|
||||
)
|
||||
|
||||
content_items.append(
|
||||
TextContentItem(
|
||||
text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.\n',
|
||||
text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{citation_instruction}\n',
|
||||
)
|
||||
)
|
||||
|
||||
# handling missing attributes for old versions
|
||||
citation_files = {}
|
||||
for result in search_results:
|
||||
file_id = result.file_id
|
||||
if not file_id and result.attributes:
|
||||
file_id = result.attributes.get("document_id")
|
||||
|
||||
filename = result.filename
|
||||
if not filename and result.attributes:
|
||||
filename = result.attributes.get("filename")
|
||||
if not filename:
|
||||
filename = "unknown"
|
||||
|
||||
citation_files[file_id] = filename
|
||||
|
||||
return ToolInvocationResult(
|
||||
content=content_items,
|
||||
metadata={
|
||||
"document_ids": [r.file_id for r in search_results],
|
||||
"chunks": [r.content[0].text if r.content else "" for r in search_results],
|
||||
"scores": [r.score for r in search_results],
|
||||
"citation_files": citation_files,
|
||||
},
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ class ToolExecutionResult(BaseModel):
|
|||
sequence_number: int
|
||||
final_output_message: OpenAIResponseOutput | None = None
|
||||
final_input_message: OpenAIMessageParam | None = None
|
||||
citation_files: dict[str, str] | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -4,9 +4,11 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
OpenAIResponseAnnotationFileCitation,
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseInputFunctionToolCallOutput,
|
||||
OpenAIResponseInputMessageContent,
|
||||
|
|
@ -45,7 +47,9 @@ from llama_stack.apis.inference import (
|
|||
)
|
||||
|
||||
|
||||
async def convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenAIResponseMessage:
|
||||
async def convert_chat_choice_to_response_message(
|
||||
choice: OpenAIChoice, citation_files: dict[str, str] | None = None
|
||||
) -> OpenAIResponseMessage:
|
||||
"""Convert an OpenAI Chat Completion choice into an OpenAI Response output message."""
|
||||
output_content = ""
|
||||
if isinstance(choice.message.content, str):
|
||||
|
|
@ -57,9 +61,11 @@ async def convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenA
|
|||
f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}"
|
||||
)
|
||||
|
||||
annotations, clean_text = _extract_citations_from_text(output_content, citation_files or {})
|
||||
|
||||
return OpenAIResponseMessage(
|
||||
id=f"msg_{uuid.uuid4()}",
|
||||
content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
|
||||
content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=annotations)],
|
||||
status="completed",
|
||||
role="assistant",
|
||||
)
|
||||
|
|
@ -200,6 +206,53 @@ async def get_message_type_by_role(role: str):
|
|||
return role_to_type.get(role)
|
||||
|
||||
|
||||
def _extract_citations_from_text(
|
||||
text: str, citation_files: dict[str, str]
|
||||
) -> tuple[list[OpenAIResponseAnnotationFileCitation], str]:
|
||||
"""Extract citation markers from text and create annotations
|
||||
|
||||
Args:
|
||||
text: The text containing citation markers like [file-Cn3MSNn72ENTiiq11Qda4A]
|
||||
citation_files: Dictionary mapping file_id to filename
|
||||
|
||||
Returns:
|
||||
Tuple of (annotations_list, clean_text_without_markers)
|
||||
"""
|
||||
file_id_regex = re.compile(r"<\|(?P<file_id>file-[A-Za-z0-9_-]+)\|>")
|
||||
|
||||
annotations = []
|
||||
parts = []
|
||||
total_len = 0
|
||||
last_end = 0
|
||||
|
||||
for m in file_id_regex.finditer(text):
|
||||
# segment before the marker
|
||||
prefix = text[last_end : m.start()]
|
||||
|
||||
# drop one space if it exists (since marker is at sentence end)
|
||||
if prefix.endswith(" "):
|
||||
prefix = prefix[:-1]
|
||||
|
||||
parts.append(prefix)
|
||||
total_len += len(prefix)
|
||||
|
||||
fid = m.group(1)
|
||||
if fid in citation_files:
|
||||
annotations.append(
|
||||
OpenAIResponseAnnotationFileCitation(
|
||||
file_id=fid,
|
||||
filename=citation_files[fid],
|
||||
index=total_len, # index points to punctuation
|
||||
)
|
||||
)
|
||||
|
||||
last_end = m.end()
|
||||
|
||||
parts.append(text[last_end:])
|
||||
cleaned_text = "".join(parts)
|
||||
return annotations, cleaned_text
|
||||
|
||||
|
||||
def is_function_tool_call(
|
||||
tool_call: OpenAIChatCompletionToolCall,
|
||||
tools: list[OpenAIResponseInputTool],
|
||||
|
|
|
|||
|
|
@ -8,8 +8,6 @@ import asyncio
|
|||
import base64
|
||||
import io
|
||||
import mimetypes
|
||||
import secrets
|
||||
import string
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
|
@ -52,10 +50,6 @@ from .context_retriever import generate_rag_query
|
|||
log = get_logger(name=__name__, category="tool_runtime")
|
||||
|
||||
|
||||
def make_random_string(length: int = 8):
|
||||
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
|
||||
|
||||
|
||||
async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
||||
"""Get raw binary data and mime type from a RAGDocument for file upload."""
|
||||
if isinstance(doc.content, URL):
|
||||
|
|
@ -331,5 +325,8 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|||
|
||||
return ToolInvocationResult(
|
||||
content=result.content or [],
|
||||
metadata=result.metadata,
|
||||
metadata={
|
||||
**(result.metadata or {}),
|
||||
"citation_files": getattr(result, "citation_files", None),
|
||||
},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -200,12 +200,10 @@ class FaissIndex(EmbeddingIndex):
|
|||
|
||||
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
|
||||
super().__init__(files_api=files_api, kvstore=None)
|
||||
self.config = config
|
||||
self.inference_api = inference_api
|
||||
self.files_api = files_api
|
||||
self.cache: dict[str, VectorDBWithIndex] = {}
|
||||
self.kvstore: KVStore | None = None
|
||||
self.openai_vector_stores: dict[str, dict[str, Any]] = {}
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.kvstore = await kvstore_impl(self.config.kvstore)
|
||||
|
|
@ -227,8 +225,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
|
|||
await self.initialize_openai_vector_stores()
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
# Cleanup if needed
|
||||
pass
|
||||
# Clean up mixin resources (file batch tasks)
|
||||
await super().shutdown()
|
||||
|
||||
async def health(self) -> HealthResponse:
|
||||
"""
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue