chore: Enabling Milvus for VectorIO CI

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Javier Arceo 2025-06-27 21:25:57 -04:00
parent 709eb7da33
commit c8d41d45ec
115 changed files with 2919 additions and 184 deletions

View file

@ -22,7 +22,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "remote::chromadb", "remote::pgvector"] vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::chromadb", "remote::pgvector"]
python-version: ["3.12", "3.13"] python-version: ["3.12", "3.13"]
fail-fast: false # we want to run all tests regardless of failure fail-fast: false # we want to run all tests regardless of failure

View file

@ -20,7 +20,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install uv - name: Install uv
uses: astral-sh/setup-uv@445689ea25e0de0a23313031f5fe577c74ae45a1 # v6.3.0 uses: astral-sh/setup-uv@bd01e18f51369d5a26f1651c3cb451d3417e3bba # v6.3.1
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
activate-environment: true activate-environment: true

View file

@ -14,7 +14,7 @@ repos:
- id: check-added-large-files - id: check-added-large-files
args: ['--maxkb=1000'] args: ['--maxkb=1000']
- id: end-of-file-fixer - id: end-of-file-fixer
exclude: '^(.*\.svg)$' exclude: '^(.*\.svg|.*\.md)$'
- id: no-commit-to-branch - id: no-commit-to-branch
- id: check-yaml - id: check-yaml
args: ["--unsafe"] args: ["--unsafe"]
@ -95,6 +95,15 @@ repos:
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$ files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
- id: provider-codegen
name: Provider Codegen
additional_dependencies:
- uv==0.7.8
entry: uv run --group codegen ./scripts/provider_codegen.py
language: python
pass_filenames: false
require_serial: true
files: ^llama_stack/providers/.*$
- id: openapi-codegen - id: openapi-codegen
name: API Spec Codegen name: API Spec Codegen
additional_dependencies: additional_dependencies:

View file

@ -139,6 +139,8 @@ uv sync
justification for bypassing the check. justification for bypassing the check.
* Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or * Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or
readability reasons. readability reasons.
* Providers configuration class should be Pydantic Field class. It should have a `description` field
that describes the configuration. These descriptions will be used to generate the provider documentation.
## Common Tasks ## Common Tasks
@ -157,10 +159,19 @@ cd llama-stack
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...> LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...>
``` ```
### Updating distribution configurations
### Updating Provider Configurations If you have made changes to a provider's configuration in any form (introducing a new config key, or
changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML
files as well as the documentation. You should not change `docs/source/.../distributions/` files
manually as they are auto-generated.
If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated. ### Updating the provider documentation
If you have made changes to a provider's configuration, you should run `./scripts/distro_codegen.py`
to re-generate the documentation. You should not change `docs/source/.../providers/` files manually
as they are auto-generated.
Note that the provider "description" field will be used to generate the provider documentation.
### Building the Documentation ### Building the Documentation

View file

@ -817,6 +817,90 @@
] ]
} }
}, },
"/v1/openai/v1/responses/{response_id}": {
"get": {
"responses": {
"200": {
"description": "An OpenAIResponseObject.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenAIResponseObject"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "Retrieve an OpenAI response by its ID.",
"parameters": [
{
"name": "response_id",
"in": "path",
"description": "The ID of the OpenAI response to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"delete": {
"responses": {
"200": {
"description": "An OpenAIDeleteResponseObject",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenAIDeleteResponseObject"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "Delete an OpenAI response by its ID.",
"parameters": [
{
"name": "response_id",
"in": "path",
"description": "The ID of the OpenAI response to delete.",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/inference/embeddings": { "/v1/inference/embeddings": {
"post": { "post": {
"responses": { "responses": {
@ -1284,49 +1368,6 @@
] ]
} }
}, },
"/v1/openai/v1/responses/{response_id}": {
"get": {
"responses": {
"200": {
"description": "An OpenAIResponseObject.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenAIResponseObject"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "Retrieve an OpenAI response by its ID.",
"parameters": [
{
"name": "response_id",
"in": "path",
"description": "The ID of the OpenAI response to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/scoring-functions/{scoring_fn_id}": { "/v1/scoring-functions/{scoring_fn_id}": {
"get": { "get": {
"responses": { "responses": {
@ -9063,6 +9104,30 @@
], ],
"title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching" "title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching"
}, },
"OpenAIDeleteResponseObject": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"object": {
"type": "string",
"const": "response",
"default": "response"
},
"deleted": {
"type": "boolean",
"default": true
}
},
"additionalProperties": false,
"required": [
"id",
"object",
"deleted"
],
"title": "OpenAIDeleteResponseObject"
},
"EmbeddingsRequest": { "EmbeddingsRequest": {
"type": "object", "type": "object",
"properties": { "properties": {

View file

@ -558,6 +558,64 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1/openai/v1/responses/{response_id}:
get:
responses:
'200':
description: An OpenAIResponseObject.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIResponseObject'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: Retrieve an OpenAI response by its ID.
parameters:
- name: response_id
in: path
description: >-
The ID of the OpenAI response to retrieve.
required: true
schema:
type: string
delete:
responses:
'200':
description: An OpenAIDeleteResponseObject
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIDeleteResponseObject'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: Delete an OpenAI response by its ID.
parameters:
- name: response_id
in: path
description: The ID of the OpenAI response to delete.
required: true
schema:
type: string
/v1/inference/embeddings: /v1/inference/embeddings:
post: post:
responses: responses:
@ -883,36 +941,6 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1/openai/v1/responses/{response_id}:
get:
responses:
'200':
description: An OpenAIResponseObject.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIResponseObject'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: Retrieve an OpenAI response by its ID.
parameters:
- name: response_id
in: path
description: >-
The ID of the OpenAI response to retrieve.
required: true
schema:
type: string
/v1/scoring-functions/{scoring_fn_id}: /v1/scoring-functions/{scoring_fn_id}:
get: get:
responses: responses:
@ -6404,6 +6432,24 @@ components:
- type - type
title: >- title: >-
OpenAIResponseObjectStreamResponseWebSearchCallSearching OpenAIResponseObjectStreamResponseWebSearchCallSearching
OpenAIDeleteResponseObject:
type: object
properties:
id:
type: string
object:
type: string
const: response
default: response
deleted:
type: boolean
default: true
additionalProperties: false
required:
- id
- object
- deleted
title: OpenAIDeleteResponseObject
EmbeddingsRequest: EmbeddingsRequest:
type: object type: object
properties: properties:

View file

@ -156,7 +156,7 @@ def _validate_api_delete_method_returns_none(method) -> str | None:
# Allow OpenAI endpoints to return response objects since they follow OpenAI specification # Allow OpenAI endpoints to return response objects since they follow OpenAI specification
method_name = getattr(method, '__name__', '') method_name = getattr(method, '__name__', '')
if method_name.startswith('openai_'): if method_name.__contains__('openai_'):
return None return None
if return_type is not None and return_type is not type(None): if return_type is not None and return_type is not type(None):

View file

@ -64,10 +64,9 @@ options:
--template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None) --template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
--list-templates Show the available templates for building a Llama Stack distribution (default: False) --list-templates Show the available templates for building a Llama Stack distribution (default: False)
--image-type {conda,container,venv} --image-type {conda,container,venv}
Image Type to use for the build. This can be either conda or container or venv. If not specified, will use the image type from the template config. (default: Image Type to use for the build. If not specified, will use the image type from the template config. (default: None)
conda)
--image-name IMAGE_NAME --image-name IMAGE_NAME
[for image-type=conda|container|venv] Name of the conda or virtual environment to use for the build. If not specified, currently active Conda environment will be used if [for image-type=conda|container|venv] Name of the conda or virtual environment to use for the build. If not specified, currently active environment will be used if
found. (default: None) found. (default: None)
--print-deps-only Print the dependencies for the stack only, without building the stack (default: False) --print-deps-only Print the dependencies for the stack only, without building the stack (default: False)
--run Run the stack after building using the same image type, name, and other applicable arguments (default: False) --run Run the stack after building using the same image type, name, and other applicable arguments (default: False)
@ -89,32 +88,53 @@ llama stack build --list-templates
------------------------------+-----------------------------------------------------------------------------+ ------------------------------+-----------------------------------------------------------------------------+
| Template Name | Description | | Template Name | Description |
+------------------------------+-----------------------------------------------------------------------------+ +------------------------------+-----------------------------------------------------------------------------+
| hf-serverless | Use (an external) Hugging Face Inference Endpoint for running LLM inference | | watsonx | Use watsonx for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+
| together | Use Together.AI for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+ +------------------------------+-----------------------------------------------------------------------------+
| vllm-gpu | Use a built-in vLLM engine for running LLM inference | | vllm-gpu | Use a built-in vLLM engine for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+ +------------------------------+-----------------------------------------------------------------------------+
| experimental-post-training | Experimental template for post training | | together | Use Together.AI for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+
| remote-vllm | Use (an external) vLLM server for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+
| fireworks | Use Fireworks.AI for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+ +------------------------------+-----------------------------------------------------------------------------+
| tgi | Use (an external) TGI server for running LLM inference | | tgi | Use (an external) TGI server for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+ +------------------------------+-----------------------------------------------------------------------------+
| bedrock | Use AWS Bedrock for running LLM inference and safety | | starter | Quick start template for running Llama Stack with several popular providers |
+------------------------------+-----------------------------------------------------------------------------+ +------------------------------+-----------------------------------------------------------------------------+
| meta-reference-gpu | Use Meta Reference for running LLM inference | | sambanova | Use SambaNova for running LLM inference and safety |
+------------------------------+-----------------------------------------------------------------------------+ +------------------------------+-----------------------------------------------------------------------------+
| nvidia | Use NVIDIA NIM for running LLM inference | | remote-vllm | Use (an external) vLLM server for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+ +------------------------------+-----------------------------------------------------------------------------+
| cerebras | Use Cerebras for running LLM inference | | postgres-demo | Quick start template for running Llama Stack with several popular providers |
+------------------------------+-----------------------------------------------------------------------------+
| passthrough | Use Passthrough hosted llama-stack endpoint for LLM inference |
+------------------------------+-----------------------------------------------------------------------------+
| open-benchmark | Distribution for running open benchmarks |
+------------------------------+-----------------------------------------------------------------------------+ +------------------------------+-----------------------------------------------------------------------------+
| ollama | Use (an external) Ollama server for running LLM inference | | ollama | Use (an external) Ollama server for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+ +------------------------------+-----------------------------------------------------------------------------+
| nvidia | Use NVIDIA NIM for running LLM inference, evaluation and safety |
+------------------------------+-----------------------------------------------------------------------------+
| meta-reference-gpu | Use Meta Reference for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+
| llama_api | Distribution for running e2e tests in CI |
+------------------------------+-----------------------------------------------------------------------------+
| hf-serverless | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+
| hf-endpoint | Use (an external) Hugging Face Inference Endpoint for running LLM inference | | hf-endpoint | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+ +------------------------------+-----------------------------------------------------------------------------+
| groq | Use Groq for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+
| fireworks | Use Fireworks.AI for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+
| experimental-post-training | Experimental template for post training |
+------------------------------+-----------------------------------------------------------------------------+
| dell | Dell's distribution of Llama Stack. TGI inference via Dell's custom |
| | container |
+------------------------------+-----------------------------------------------------------------------------+
| ci-tests | Distribution for running e2e tests in CI |
+------------------------------+-----------------------------------------------------------------------------+
| cerebras | Use Cerebras for running LLM inference |
+------------------------------+-----------------------------------------------------------------------------+
| bedrock | Use AWS Bedrock for running LLM inference and safety |
+------------------------------+-----------------------------------------------------------------------------+
``` ```
You may then pick a template to build your distribution with providers fitted to your liking. You may then pick a template to build your distribution with providers fitted to your liking.
@ -256,6 +276,7 @@ $ llama stack build --template ollama --image-type container
... ...
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
... ...
```
You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml` You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml`
``` ```
@ -305,30 +326,28 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
``` ```
llama stack run -h llama stack run -h
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE] [--tls-keyfile TLS_KEYFILE] [--tls-certfile TLS_CERTFILE] usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE]
[--image-type {conda,container,venv}] [--image-type {conda,venv}] [--enable-ui]
config [config | template]
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution. Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
positional arguments: positional arguments:
config Path to config file to use for the run config | template Path to config file to use for the run or name of known template (`llama stack list` for a list). (default: None)
options: options:
-h, --help show this help message and exit -h, --help show this help message and exit
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321) --port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
--image-name IMAGE_NAME --image-name IMAGE_NAME
Name of the image to run. Defaults to the current environment (default: None) Name of the image to run. Defaults to the current environment (default: None)
--env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: []) --env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: None)
--tls-keyfile TLS_KEYFILE --image-type {conda,venv}
Path to TLS key file for HTTPS (default: None) Image Type used during the build. This can be either conda or venv. (default: None)
--tls-certfile TLS_CERTFILE --enable-ui Start the UI server (default: False)
Path to TLS certificate file for HTTPS (default: None)
--image-type {conda,container,venv}
Image Type used during the build. This can be either conda or container or venv. (default: conda)
``` ```
**Note:** Container images built with `llama stack build --image-type container` cannot be run using `llama stack run`. Instead, they must be run directly using Docker or Podman commands as shown in the container building section above.
``` ```
# Start using template name # Start using template name
llama stack run tgi llama stack run tgi
@ -372,6 +391,7 @@ INFO: Application startup complete.
INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
``` ```
### Listing Distributions ### Listing Distributions
Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files. Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
@ -391,6 +411,20 @@ Example Usage
llama stack list llama stack list
``` ```
```
------------------------------+-----------------------------------------------------------------------------+--------------+------------+
| Stack Name | Path | Build Config | Run Config |
+------------------------------+-----------------------------------------------------------------------------+--------------+------------+
| together | /home/wenzhou/.llama/distributions/together | Yes | No |
+------------------------------+-----------------------------------------------------------------------------+--------------+------------+
| bedrock | /home/wenzhou/.llama/distributions/bedrock | Yes | No |
+------------------------------+-----------------------------------------------------------------------------+--------------+------------+
| starter | /home/wenzhou/.llama/distributions/starter | No | No |
+------------------------------+-----------------------------------------------------------------------------+--------------+------------+
| remote-vllm | /home/wenzhou/.llama/distributions/remote-vllm | Yes | Yes |
+------------------------------+-----------------------------------------------------------------------------+--------------+------------+
```
### Removing a Distribution ### Removing a Distribution
Use the remove command to delete a distribution you've previously built. Use the remove command to delete a distribution you've previously built.
@ -413,7 +447,7 @@ Example
llama stack rm llamastack-test llama stack rm llamastack-test
``` ```
To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when theyre no longer needed. To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they're no longer needed.
### Troubleshooting ### Troubleshooting

View file

@ -6,7 +6,7 @@ Llama Stack is a stateful service with REST APIs to support the seamless transit
environments. You can build and test using a local server first and deploy to a hosted endpoint for production. environments. You can build and test using a local server first and deploy to a hosted endpoint for production.
In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/) In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
as the inference [provider](../providers/index.md#inference) for a Llama Model. as the inference [provider](../providers/inference/index) for a Llama Model.
#### Step 1: Install and setup #### Step 1: Install and setup
1. Install [uv](https://docs.astral.sh/uv/) 1. Install [uv](https://docs.astral.sh/uv/)

View file

@ -0,0 +1,5 @@
# Agents Providers
This section contains documentation for all available providers for the **agents** API.
- [inline::meta-reference](inline_meta-reference.md)

View file

@ -0,0 +1,26 @@
# inline::meta-reference
## Description
Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
| `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | |
## Sample Configuration
```yaml
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db
```

View file

@ -0,0 +1,7 @@
# Datasetio Providers
This section contains documentation for all available providers for the **datasetio** API.
- [inline::localfs](inline_localfs.md)
- [remote::huggingface](remote_huggingface.md)
- [remote::nvidia](remote_nvidia.md)

View file

@ -0,0 +1,22 @@
# inline::localfs
## Description
Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
## Sample Configuration
```yaml
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
```

View file

@ -0,0 +1,22 @@
# remote::huggingface
## Description
HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
## Sample Configuration
```yaml
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
```

View file

@ -0,0 +1,25 @@
# remote::nvidia
## Description
NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The NVIDIA API key. |
| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
| `project_id` | `str \| None` | No | test-project | The NVIDIA project ID. |
| `datasets_url` | `<class 'str'>` | No | http://nemo.test | Base URL for the NeMo Dataset API |
## Sample Configuration
```yaml
api_key: ${env.NVIDIA_API_KEY:+}
dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
```

View file

@ -0,0 +1,6 @@
# Eval Providers
This section contains documentation for all available providers for the **eval** API.
- [inline::meta-reference](inline_meta-reference.md)
- [remote::nvidia](remote_nvidia.md)

View file

@ -0,0 +1,22 @@
# inline::meta-reference
## Description
Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
## Sample Configuration
```yaml
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
```

View file

@ -0,0 +1,19 @@
# remote::nvidia
## Description
NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `evaluator_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service |
## Sample Configuration
```yaml
evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
```

View file

@ -0,0 +1,5 @@
# Files Providers
This section contains documentation for all available providers for the **files** API.
- [inline::localfs](inline_localfs.md)

View file

@ -0,0 +1,24 @@
# inline::localfs
## Description
Local filesystem-based file storage provider for managing files and documents locally.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `storage_dir` | `<class 'str'>` | No | PydanticUndefined | Directory to store uploaded files |
| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
| `ttl_secs` | `<class 'int'>` | No | 31536000 | |
## Sample Configuration
```yaml
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db
```

View file

@ -18,60 +18,92 @@ Llama Stack supports external providers that live outside of the main codebase.
## Agents ## Agents
Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc. Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc.
```{toctree}
:maxdepth: 1
agents/index
```
## DatasetIO ## DatasetIO
Interfaces with datasets and data loaders. Interfaces with datasets and data loaders.
## Eval
Generates outputs (via Inference or Agents) and perform scoring.
## Inference
Runs inference with an LLM.
## Post Training
Fine-tunes a model.
#### Post Training Providers
The following providers are available for Post Training:
```{toctree} ```{toctree}
:maxdepth: 1 :maxdepth: 1
external datasetio/index
post_training/huggingface ```
post_training/torchtune
post_training/nvidia_nemo ## Eval
Generates outputs (via Inference or Agents) and perform scoring.
```{toctree}
:maxdepth: 1
eval/index
```
## Inference
Runs inference with an LLM.
```{toctree}
:maxdepth: 1
inference/index
```
## Post Training
Fine-tunes a model.
```{toctree}
:maxdepth: 1
post_training/index
``` ```
## Safety ## Safety
Applies safety policies to the output at a Systems (not only model) level. Applies safety policies to the output at a Systems (not only model) level.
```{toctree}
:maxdepth: 1
safety/index
```
## Scoring ## Scoring
Evaluates the outputs of the system. Evaluates the outputs of the system.
```{toctree}
:maxdepth: 1
scoring/index
```
## Telemetry ## Telemetry
Collects telemetry data from the system. Collects telemetry data from the system.
```{toctree}
:maxdepth: 1
telemetry/index
```
## Tool Runtime ## Tool Runtime
Is associated with the ToolGroup resouces. Is associated with the ToolGroup resouces.
```{toctree}
:maxdepth: 1
tool_runtime/index
```
## Vector IO ## Vector IO
Vector IO refers to operations on vector databases, such as adding documents, searching, and deleting documents. Vector IO refers to operations on vector databases, such as adding documents, searching, and deleting documents.
Vector IO plays a crucial role in [Retreival Augmented Generation (RAG)](../..//building_applications/rag), where the vector Vector IO plays a crucial role in [Retreival Augmented Generation (RAG)](../..//building_applications/rag), where the vector
io and database are used to store and retrieve documents for retrieval. io and database are used to store and retrieve documents for retrieval.
#### Vector IO Providers
The following providers (i.e., databases) are available for Vector IO:
```{toctree} ```{toctree}
:maxdepth: 1 :maxdepth: 1
external vector_io/index
vector_io/faiss
vector_io/sqlite-vec
vector_io/chromadb
vector_io/pgvector
vector_io/qdrant
vector_io/milvus
vector_io/weaviate
``` ```

View file

@ -0,0 +1,32 @@
# Inference Providers
This section contains documentation for all available providers for the **inference** API.
- [inline::meta-reference](inline_meta-reference.md)
- [inline::sentence-transformers](inline_sentence-transformers.md)
- [inline::vllm](inline_vllm.md)
- [remote::anthropic](remote_anthropic.md)
- [remote::bedrock](remote_bedrock.md)
- [remote::cerebras](remote_cerebras.md)
- [remote::cerebras-openai-compat](remote_cerebras-openai-compat.md)
- [remote::databricks](remote_databricks.md)
- [remote::fireworks](remote_fireworks.md)
- [remote::fireworks-openai-compat](remote_fireworks-openai-compat.md)
- [remote::gemini](remote_gemini.md)
- [remote::groq](remote_groq.md)
- [remote::groq-openai-compat](remote_groq-openai-compat.md)
- [remote::hf::endpoint](remote_hf_endpoint.md)
- [remote::hf::serverless](remote_hf_serverless.md)
- [remote::llama-openai-compat](remote_llama-openai-compat.md)
- [remote::nvidia](remote_nvidia.md)
- [remote::ollama](remote_ollama.md)
- [remote::openai](remote_openai.md)
- [remote::passthrough](remote_passthrough.md)
- [remote::runpod](remote_runpod.md)
- [remote::sambanova](remote_sambanova.md)
- [remote::sambanova-openai-compat](remote_sambanova-openai-compat.md)
- [remote::tgi](remote_tgi.md)
- [remote::together](remote_together.md)
- [remote::together-openai-compat](remote_together-openai-compat.md)
- [remote::vllm](remote_vllm.md)
- [remote::watsonx](remote_watsonx.md)

View file

@ -0,0 +1,32 @@
# inline::meta-reference
## Description
Meta's reference implementation of inference with support for various model formats and optimization techniques.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `model` | `str \| None` | No | | |
| `torch_seed` | `int \| None` | No | | |
| `max_seq_len` | `<class 'int'>` | No | 4096 | |
| `max_batch_size` | `<class 'int'>` | No | 1 | |
| `model_parallel_size` | `int \| None` | No | | |
| `create_distributed_process_group` | `<class 'bool'>` | No | True | |
| `checkpoint_dir` | `str \| None` | No | | |
| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig, annotation=NoneType, required=True, discriminator='type'` | No | | |
## Sample Configuration
```yaml
model: Llama3.2-3B-Instruct
checkpoint_dir: ${env.CHECKPOINT_DIR:=null}
quantization:
type: ${env.QUANTIZATION_TYPE:=bf16}
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
max_batch_size: ${env.MAX_BATCH_SIZE:=1}
max_seq_len: ${env.MAX_SEQ_LEN:=4096}
```

View file

@ -0,0 +1,13 @@
# inline::sentence-transformers
## Description
Sentence Transformers inference provider for text embeddings and similarity search.
## Sample Configuration
```yaml
{}
```

View file

@ -0,0 +1,29 @@
# inline::vllm
## Description
vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `tensor_parallel_size` | `<class 'int'>` | No | 1 | Number of tensor parallel replicas (number of GPUs to use). |
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
| `max_model_len` | `<class 'int'>` | No | 4096 | Maximum context length to use during serving. |
| `max_num_seqs` | `<class 'int'>` | No | 4 | Maximum parallel batch size for generation. |
| `enforce_eager` | `<class 'bool'>` | No | False | Whether to use eager mode for inference (otherwise cuda graphs are used). |
| `gpu_memory_utilization` | `<class 'float'>` | No | 0.3 | How much GPU memory will be allocated when this provider has finished loading, including memory that was already allocated before loading. |
## Sample Configuration
```yaml
tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:=1}
max_tokens: ${env.MAX_TOKENS:=4096}
max_model_len: ${env.MAX_MODEL_LEN:=4096}
max_num_seqs: ${env.MAX_NUM_SEQS:=4}
enforce_eager: ${env.ENFORCE_EAGER:=False}
gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:=0.3}
```

View file

@ -0,0 +1,19 @@
# remote::anthropic
## Description
Anthropic inference provider for accessing Claude models and Anthropic's AI services.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | API key for Anthropic models |
## Sample Configuration
```yaml
api_key: ${env.ANTHROPIC_API_KEY}
```

View file

@ -0,0 +1,28 @@
# remote::bedrock
## Description
AWS Bedrock inference provider for accessing various AI models through AWS's managed service.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
| `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
## Sample Configuration
```yaml
{}
```

View file

@ -0,0 +1,21 @@
# remote::cerebras-openai-compat
## Description
Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Cerebras API key |
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.cerebras.ai/v1 | The URL for the Cerebras API server |
## Sample Configuration
```yaml
openai_compat_api_base: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY}
```

View file

@ -0,0 +1,21 @@
# remote::cerebras
## Description
Cerebras inference provider for running models on Cerebras Cloud platform.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Cerebras API Key |
## Sample Configuration
```yaml
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY}
```

View file

@ -0,0 +1,21 @@
# remote::databricks
## Description
Databricks inference provider for running models on Databricks' unified analytics platform.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | | The URL for the Databricks model serving endpoint |
| `api_token` | `<class 'str'>` | No | | The Databricks API token |
## Sample Configuration
```yaml
url: ${env.DATABRICKS_URL}
api_token: ${env.DATABRICKS_API_TOKEN}
```

View file

@ -0,0 +1,21 @@
# remote::fireworks-openai-compat
## Description
Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Fireworks API key |
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks API server |
## Sample Configuration
```yaml
openai_compat_api_base: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY}
```

View file

@ -0,0 +1,21 @@
# remote::fireworks
## Description
Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Fireworks.ai API Key |
## Sample Configuration
```yaml
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY}
```

View file

@ -0,0 +1,19 @@
# remote::gemini
## Description
Google Gemini inference provider for accessing Gemini models and Google's AI services.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | API key for Gemini models |
## Sample Configuration
```yaml
api_key: ${env.GEMINI_API_KEY}
```

View file

@ -0,0 +1,21 @@
# remote::groq-openai-compat
## Description
Groq OpenAI-compatible provider for using Groq models with OpenAI API format.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Groq API key |
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.groq.com/openai/v1 | The URL for the Groq API server |
## Sample Configuration
```yaml
openai_compat_api_base: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY}
```

View file

@ -0,0 +1,21 @@
# remote::groq
## Description
Groq inference provider for ultra-fast inference using Groq's LPU technology.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Groq API key |
| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
## Sample Configuration
```yaml
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY}
```

View file

@ -0,0 +1,21 @@
# remote::hf::endpoint
## Description
HuggingFace Inference Endpoints provider for dedicated model serving.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `endpoint_name` | `<class 'str'>` | No | PydanticUndefined | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. |
| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) |
## Sample Configuration
```yaml
endpoint_name: ${env.INFERENCE_ENDPOINT_NAME}
api_token: ${env.HF_API_TOKEN}
```

View file

@ -0,0 +1,21 @@
# remote::hf::serverless
## Description
HuggingFace Inference API serverless provider for on-demand model inference.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `huggingface_repo` | `<class 'str'>` | No | PydanticUndefined | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') |
| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) |
## Sample Configuration
```yaml
huggingface_repo: ${env.INFERENCE_MODEL}
api_token: ${env.HF_API_TOKEN}
```

View file

@ -0,0 +1,21 @@
# remote::llama-openai-compat
## Description
Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Llama API key |
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
## Sample Configuration
```yaml
openai_compat_api_base: https://api.llama.com/compat/v1/
api_key: ${env.LLAMA_API_KEY}
```

View file

@ -0,0 +1,24 @@
# remote::nvidia
## Description
NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The NVIDIA API key, only needed of using the hosted service |
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
## Sample Configuration
```yaml
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:+}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
```

View file

@ -0,0 +1,21 @@
# remote::ollama
## Description
Ollama inference provider for running local models through the Ollama runtime.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | http://localhost:11434 | |
| `raise_on_connect_error` | `<class 'bool'>` | No | True | |
## Sample Configuration
```yaml
url: ${env.OLLAMA_URL:=http://localhost:11434}
raise_on_connect_error: true
```

View file

@ -0,0 +1,19 @@
# remote::openai
## Description
OpenAI inference provider for accessing GPT models and other OpenAI services.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | API key for OpenAI models |
## Sample Configuration
```yaml
api_key: ${env.OPENAI_API_KEY}
```

View file

@ -0,0 +1,21 @@
# remote::passthrough
## Description
Passthrough inference provider for connecting to any external inference service not directly supported.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint |
## Sample Configuration
```yaml
url: ${env.PASSTHROUGH_URL}
api_key: ${env.PASSTHROUGH_API_KEY}
```

View file

@ -0,0 +1,21 @@
# remote::runpod
## Description
RunPod inference provider for running models on RunPod's cloud GPU platform.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint |
| `api_token` | `str \| None` | No | | The API token |
## Sample Configuration
```yaml
url: ${env.RUNPOD_URL:+}
api_token: ${env.RUNPOD_API_TOKEN:+}
```

View file

@ -0,0 +1,21 @@
# remote::sambanova-openai-compat
## Description
SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The SambaNova API key |
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova API server |
## Sample Configuration
```yaml
openai_compat_api_base: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY}
```

View file

@ -0,0 +1,21 @@
# remote::sambanova
## Description
SambaNova inference provider for running models on SambaNova's dataflow architecture.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
## Sample Configuration
```yaml
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY}
```

View file

@ -0,0 +1,19 @@
# remote::tgi
## Description
Text Generation Inference (TGI) provider for HuggingFace model serving.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | PydanticUndefined | The URL for the TGI serving endpoint |
## Sample Configuration
```yaml
url: ${env.TGI_URL}
```

View file

@ -0,0 +1,21 @@
# remote::together-openai-compat
## Description
Together AI OpenAI-compatible provider for using Together models with OpenAI API format.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Together API key |
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together API server |
## Sample Configuration
```yaml
openai_compat_api_base: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY}
```

View file

@ -0,0 +1,21 @@
# remote::together
## Description
Together AI inference provider for open-source models and collaborative AI development.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Together AI API Key |
## Sample Configuration
```yaml
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:+}
```

View file

@ -0,0 +1,25 @@
# remote::vllm
## Description
Remote vLLM inference provider for connecting to vLLM servers.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint |
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
| `api_token` | `str \| None` | No | fake | The API token |
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
## Sample Configuration
```yaml
url: ${env.VLLM_URL}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
```

View file

@ -0,0 +1,24 @@
# remote::watsonx
## Description
IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key, only needed of using the hosted service |
| `project_id` | `str \| None` | No | | The Project ID key, only needed of using the hosted service |
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
## Sample Configuration
```yaml
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
api_key: ${env.WATSONX_API_KEY:+}
project_id: ${env.WATSONX_PROJECT_ID:+}
```

View file

@ -0,0 +1,7 @@
# Post_Training Providers
This section contains documentation for all available providers for the **post_training** API.
- [inline::huggingface](inline_huggingface.md)
- [inline::torchtune](inline_torchtune.md)
- [remote::nvidia](remote_nvidia.md)

View file

@ -0,0 +1,36 @@
# inline::huggingface
## Description
HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `device` | `<class 'str'>` | No | cuda | |
| `distributed_backend` | `Literal['fsdp', 'deepspeed'` | No | | |
| `checkpoint_format` | `Literal['full_state', 'huggingface'` | No | huggingface | |
| `chat_template` | `<class 'str'>` | No | <|user|>
{input}
<|assistant|>
{output} | |
| `model_specific_config` | `<class 'dict'>` | No | {'trust_remote_code': True, 'attn_implementation': 'sdpa'} | |
| `max_seq_length` | `<class 'int'>` | No | 2048 | |
| `gradient_checkpointing` | `<class 'bool'>` | No | False | |
| `save_total_limit` | `<class 'int'>` | No | 3 | |
| `logging_steps` | `<class 'int'>` | No | 10 | |
| `warmup_ratio` | `<class 'float'>` | No | 0.1 | |
| `weight_decay` | `<class 'float'>` | No | 0.01 | |
| `dataloader_num_workers` | `<class 'int'>` | No | 4 | |
| `dataloader_pin_memory` | `<class 'bool'>` | No | True | |
## Sample Configuration
```yaml
checkpoint_format: huggingface
distributed_backend: null
device: cpu
```

View file

@ -0,0 +1,20 @@
# inline::torchtune
## Description
TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `torch_seed` | `int \| None` | No | | |
| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta | |
## Sample Configuration
```yaml
checkpoint_format: meta
```

View file

@ -0,0 +1,28 @@
# remote::nvidia
## Description
NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The NVIDIA API key. |
| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
| `project_id` | `str \| None` | No | test-example-model@v1 | The NVIDIA project ID. |
| `customizer_url` | `str \| None` | No | | Base URL for the NeMo Customizer API |
| `timeout` | `<class 'int'>` | No | 300 | Timeout for the NVIDIA Post Training API |
| `max_retries` | `<class 'int'>` | No | 3 | Maximum number of retries for the NVIDIA Post Training API |
| `output_model_dir` | `<class 'str'>` | No | test-example-model@v1 | Directory to save the output model |
## Sample Configuration
```yaml
api_key: ${env.NVIDIA_API_KEY:+}
dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
```

View file

@ -0,0 +1,10 @@
# Safety Providers
This section contains documentation for all available providers for the **safety** API.
- [inline::code-scanner](inline_code-scanner.md)
- [inline::llama-guard](inline_llama-guard.md)
- [inline::prompt-guard](inline_prompt-guard.md)
- [remote::bedrock](remote_bedrock.md)
- [remote::nvidia](remote_nvidia.md)
- [remote::sambanova](remote_sambanova.md)

View file

@ -0,0 +1,13 @@
# inline::code-scanner
## Description
Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.
## Sample Configuration
```yaml
{}
```

View file

@ -0,0 +1,19 @@
# inline::llama-guard
## Description
Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `excluded_categories` | `list[str` | No | [] | |
## Sample Configuration
```yaml
excluded_categories: []
```

View file

@ -0,0 +1,19 @@
# inline::prompt-guard
## Description
Prompt Guard safety provider for detecting and filtering unsafe prompts and content.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `guard_type` | `<class 'str'>` | No | injection | |
## Sample Configuration
```yaml
guard_type: injection
```

View file

@ -0,0 +1,28 @@
# remote::bedrock
## Description
AWS Bedrock safety provider for content moderation using AWS's safety services.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
| `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
## Sample Configuration
```yaml
{}
```

View file

@ -0,0 +1,21 @@
# remote::nvidia
## Description
NVIDIA's safety provider for content moderation and safety filtering.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `guardrails_service_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service |
| `config_id` | `str \| None` | No | self-check | Guardrails configuration ID to use from the Guardrails configuration store |
## Sample Configuration
```yaml
guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
```

View file

@ -0,0 +1,21 @@
# remote::sambanova
## Description
SambaNova's safety provider for content moderation and safety filtering.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
## Sample Configuration
```yaml
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY}
```

View file

@ -0,0 +1,7 @@
# Scoring Providers
This section contains documentation for all available providers for the **scoring** API.
- [inline::basic](inline_basic.md)
- [inline::braintrust](inline_braintrust.md)
- [inline::llm-as-judge](inline_llm-as-judge.md)

View file

@ -0,0 +1,13 @@
# inline::basic
## Description
Basic scoring provider for simple evaluation metrics and scoring functions.
## Sample Configuration
```yaml
{}
```

View file

@ -0,0 +1,19 @@
# inline::braintrust
## Description
Braintrust scoring provider for evaluation and scoring using the Braintrust platform.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `openai_api_key` | `str \| None` | No | | The OpenAI API Key |
## Sample Configuration
```yaml
openai_api_key: ${env.OPENAI_API_KEY:+}
```

View file

@ -0,0 +1,13 @@
# inline::llm-as-judge
## Description
LLM-as-judge scoring provider that uses language models to evaluate and score responses.
## Sample Configuration
```yaml
{}
```

View file

@ -0,0 +1,5 @@
# Telemetry Providers
This section contains documentation for all available providers for the **telemetry** API.
- [inline::meta-reference](inline_meta-reference.md)

View file

@ -0,0 +1,25 @@
# inline::meta-reference
## Description
Meta's reference implementation of telemetry and observability using OpenTelemetry.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `otel_trace_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL for traces |
| `otel_metric_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL for metrics |
| `service_name` | `<class 'str'>` | No | | The service name to use for telemetry |
| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.CONSOLE: 'console'>, <TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel, sqlite, console) |
| `sqlite_db_path` | `<class 'str'>` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces |
## Sample Configuration
```yaml
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/trace_store.db
```

View file

@ -0,0 +1,10 @@
# Tool_Runtime Providers
This section contains documentation for all available providers for the **tool_runtime** API.
- [inline::rag-runtime](inline_rag-runtime.md)
- [remote::bing-search](remote_bing-search.md)
- [remote::brave-search](remote_brave-search.md)
- [remote::model-context-protocol](remote_model-context-protocol.md)
- [remote::tavily-search](remote_tavily-search.md)
- [remote::wolfram-alpha](remote_wolfram-alpha.md)

View file

@ -0,0 +1,13 @@
# inline::rag-runtime
## Description
RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.
## Sample Configuration
```yaml
{}
```

View file

@ -0,0 +1,20 @@
# remote::bing-search
## Description
Bing Search tool for web search capabilities using Microsoft's search engine.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | |
| `top_k` | `<class 'int'>` | No | 3 | |
## Sample Configuration
```yaml
api_key: ${env.BING_API_KEY:}
```

View file

@ -0,0 +1,21 @@
# remote::brave-search
## Description
Brave Search tool for web search capabilities with privacy-focused results.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Brave Search API Key |
| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
## Sample Configuration
```yaml
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
max_results: 3
```

View file

@ -0,0 +1,13 @@
# remote::model-context-protocol
## Description
Model Context Protocol (MCP) tool for standardized tool calling and context management.
## Sample Configuration
```yaml
{}
```

View file

@ -0,0 +1,21 @@
# remote::tavily-search
## Description
Tavily Search tool for AI-optimized web search with structured results.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Tavily Search API Key |
| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
## Sample Configuration
```yaml
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
max_results: 3
```

View file

@ -0,0 +1,19 @@
# remote::wolfram-alpha
## Description
Wolfram Alpha tool for computational knowledge and mathematical calculations.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | |
## Sample Configuration
```yaml
api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
```

View file

@ -0,0 +1,16 @@
# Vector_Io Providers
This section contains documentation for all available providers for the **vector_io** API.
- [inline::chromadb](inline_chromadb.md)
- [inline::faiss](inline_faiss.md)
- [inline::meta-reference](inline_meta-reference.md)
- [inline::milvus](inline_milvus.md)
- [inline::qdrant](inline_qdrant.md)
- [inline::sqlite-vec](inline_sqlite-vec.md)
- [inline::sqlite_vec](inline_sqlite_vec.md)
- [remote::chromadb](remote_chromadb.md)
- [remote::milvus](remote_milvus.md)
- [remote::pgvector](remote_pgvector.md)
- [remote::qdrant](remote_qdrant.md)
- [remote::weaviate](remote_weaviate.md)

View file

@ -0,0 +1,52 @@
# inline::chromadb
## Description
[Chroma](https://www.trychroma.com/) is an inline and remote vector
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
That means you're not limited to storing vectors in memory or in a separate service.
## Features
Chroma supports:
- Store embeddings and their metadata
- Vector search
- Full-text search
- Document storage
- Metadata filtering
- Multi-modal retrieval
## Usage
To use Chrome in your Llama Stack project, follow these steps:
1. Install the necessary dependencies.
2. Configure your Llama Stack project to use chroma.
3. Start storing and querying vectors.
## Installation
You can install chroma using pip:
```bash
pip install chromadb
```
## Documentation
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
## Sample Configuration
```yaml
db_path: ${env.CHROMADB_PATH}
```

View file

@ -1,7 +1,7 @@
--- # inline::faiss
orphan: true
--- ## Description
# Faiss
[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It [Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It
allows you to store and query vectors directly in memory. allows you to store and query vectors directly in memory.
@ -31,3 +31,21 @@ pip install faiss-cpu
## Documentation ## Documentation
See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for
more details about Faiss in general. more details about Faiss in general.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
## Sample Configuration
```yaml
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
```

View file

@ -0,0 +1,26 @@
# inline::meta-reference
## Description
Meta's reference implementation of a vector database.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
## Sample Configuration
```yaml
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
```
## Deprecation Notice
⚠️ **Warning**: Please use the `inline::faiss` provider instead.

View file

@ -0,0 +1,26 @@
# inline::milvus
## Description
Please refer to the remote provider documentation.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
## Sample Configuration
```yaml
db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy/milvus.db}
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/${env.MILVUS_KVSTORE_DB_PATH:=~/.llama/dummy/milvus_registry.db}
```

View file

@ -1,7 +1,7 @@
--- # inline::qdrant
orphan: true
--- ## Description
# Qdrant
[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It [Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
allows you to store and query vectors directly in memory. allows you to store and query vectors directly in memory.
@ -44,3 +44,18 @@ docker pull qdrant/qdrant
``` ```
## Documentation ## Documentation
See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general. See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `path` | `<class 'str'>` | No | PydanticUndefined | |
## Sample Configuration
```yaml
path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
```

View file

@ -1,7 +1,7 @@
--- # inline::sqlite-vec
orphan: true
--- ## Description
# SQLite-Vec
[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It [SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It
allows you to store and query vectors directly within an SQLite database. allows you to store and query vectors directly within an SQLite database.
@ -199,3 +199,18 @@ pip install sqlite-vec
See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general. See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759). [^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
## Sample Configuration
```yaml
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
```

View file

@ -0,0 +1,25 @@
# inline::sqlite_vec
## Description
Please refer to the sqlite-vec provider documentation.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
## Sample Configuration
```yaml
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
```
## Deprecation Notice
⚠️ **Warning**: Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.

View file

@ -1,7 +1,7 @@
--- # remote::chromadb
orphan: true
--- ## Description
# Chroma
[Chroma](https://www.trychroma.com/) is an inline and remote vector [Chroma](https://www.trychroma.com/) is an inline and remote vector
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
@ -34,3 +34,18 @@ pip install chromadb
## Documentation ## Documentation
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general. See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `str \| None` | No | PydanticUndefined | |
## Sample Configuration
```yaml
url: ${env.CHROMADB_URL}
```

View file

@ -1,7 +1,7 @@
--- # remote::milvus
orphan: true
--- ## Description
# Milvus
[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It [Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
allows you to store and query vectors directly within a Milvus database. allows you to store and query vectors directly within a Milvus database.
@ -96,7 +96,7 @@ vector_io:
#### Key Parameters for TLS Configuration #### Key Parameters for TLS Configuration
- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`. - **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
- **`server_pem_path`**: Path to the **server certificate** for verifying the servers identity (used in one-way TLS). - **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS).
- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS). - **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS). - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
- **`client_key_path`**: Path to the **client private key** file (required for mTLS). - **`client_key_path`**: Path to the **client private key** file (required for mTLS).
@ -105,3 +105,24 @@ vector_io:
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general. See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md). For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `uri` | `<class 'str'>` | No | PydanticUndefined | The URI of the Milvus server |
| `token` | `str \| None` | No | PydanticUndefined | The token of the Milvus server |
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
| `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
> **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider.
## Sample Configuration
```yaml
uri: ${env.MILVUS_ENDPOINT}
token: ${env.MILVUS_TOKEN}
```

View file

@ -1,7 +1,7 @@
--- # remote::pgvector
orphan: true
--- ## Description
# Postgres PGVector
[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It [PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It
allows you to store and query vectors directly in memory. allows you to store and query vectors directly in memory.
@ -29,3 +29,26 @@ docker pull pgvector/pgvector:pg17
``` ```
## Documentation ## Documentation
See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general. See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `host` | `str \| None` | No | localhost | |
| `port` | `int \| None` | No | 5432 | |
| `db` | `str \| None` | No | postgres | |
| `user` | `str \| None` | No | postgres | |
| `password` | `str \| None` | No | mysecretpassword | |
## Sample Configuration
```yaml
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB}
user: ${env.PGVECTOR_USER}
password: ${env.PGVECTOR_PASSWORD}
```

View file

@ -0,0 +1,30 @@
# remote::qdrant
## Description
Please refer to the inline provider documentation.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `location` | `str \| None` | No | | |
| `url` | `str \| None` | No | | |
| `port` | `int \| None` | No | 6333 | |
| `grpc_port` | `<class 'int'>` | No | 6334 | |
| `prefer_grpc` | `<class 'bool'>` | No | False | |
| `https` | `bool \| None` | No | | |
| `api_key` | `str \| None` | No | | |
| `prefix` | `str \| None` | No | | |
| `timeout` | `int \| None` | No | | |
| `host` | `str \| None` | No | | |
## Sample Configuration
```yaml
api_key: ${env.QDRANT_API_KEY}
```

View file

@ -1,7 +1,7 @@
--- # remote::weaviate
orphan: true
--- ## Description
# Weaviate
[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack. [Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack.
It allows you to store and query vectors directly within a Weaviate database. It allows you to store and query vectors directly within a Weaviate database.
@ -31,3 +31,12 @@ To install Weaviate see the [Weaviate quickstart documentation](https://weaviate
## Documentation ## Documentation
See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general. See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
## Sample Configuration
```yaml
{}
```

View file

@ -32,6 +32,7 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
from .openai_responses import ( from .openai_responses import (
ListOpenAIResponseInputItem, ListOpenAIResponseInputItem,
ListOpenAIResponseObject, ListOpenAIResponseObject,
OpenAIDeleteResponseObject,
OpenAIResponseInput, OpenAIResponseInput,
OpenAIResponseInputTool, OpenAIResponseInputTool,
OpenAIResponseObject, OpenAIResponseObject,
@ -647,3 +648,12 @@ class Agents(Protocol):
:returns: An ListOpenAIResponseInputItem. :returns: An ListOpenAIResponseInputItem.
""" """
... ...
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE")
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
"""Delete an OpenAI response by its ID.
:param response_id: The ID of the OpenAI response to delete.
:returns: An OpenAIDeleteResponseObject
"""
...

View file

@ -229,6 +229,13 @@ class OpenAIResponseObject(BaseModel):
user: str | None = None user: str | None = None
@json_schema_type
class OpenAIDeleteResponseObject(BaseModel):
id: str
object: Literal["response"] = "response"
deleted: bool = True
@json_schema_type @json_schema_type
class OpenAIResponseObjectStreamResponseCreated(BaseModel): class OpenAIResponseObjectStreamResponseCreated(BaseModel):
response: OpenAIResponseObject response: OpenAIResponseObject

View file

@ -141,6 +141,12 @@ Fully-qualified name of the module to import. The module is expected to have:
provider_data_validator: str | None = Field( provider_data_validator: str | None = Field(
default=None, default=None,
) )
description: str | None = Field(
default=None,
description="""
A description of the provider. This is used to display in the documentation.
""",
)
@json_schema_type @json_schema_type
@ -167,6 +173,12 @@ Fully-qualified name of the module to import. The module is expected to have:
provider_data_validator: str | None = Field( provider_data_validator: str | None = Field(
default=None, default=None,
) )
description: str | None = Field(
default=None,
description="""
A description of the provider. This is used to display in the documentation.
""",
)
class RemoteProviderConfig(BaseModel): class RemoteProviderConfig(BaseModel):

View file

@ -359,3 +359,6 @@ class MetaReferenceAgentsImpl(Agents):
return await self.openai_responses_impl.list_openai_response_input_items( return await self.openai_responses_impl.list_openai_response_input_items(
response_id, after, before, include, limit, order response_id, after, before, include, limit, order
) )
async def delete_openai_response(self, response_id: str) -> None:
return await self.openai_responses_impl.delete_openai_response(response_id)

View file

@ -19,6 +19,7 @@ from llama_stack.apis.agents.openai_responses import (
AllowedToolsFilter, AllowedToolsFilter,
ListOpenAIResponseInputItem, ListOpenAIResponseInputItem,
ListOpenAIResponseObject, ListOpenAIResponseObject,
OpenAIDeleteResponseObject,
OpenAIResponseInput, OpenAIResponseInput,
OpenAIResponseInputFunctionToolCallOutput, OpenAIResponseInputFunctionToolCallOutput,
OpenAIResponseInputMessageContent, OpenAIResponseInputMessageContent,
@ -574,6 +575,9 @@ class OpenAIResponsesImpl:
input=input, input=input,
) )
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
return await self.responses_store.delete_response_object(response_id)
async def _convert_response_tools_to_chat_tools( async def _convert_response_tools_to_chat_tools(
self, tools: list[OpenAIResponseInputTool] self, tools: list[OpenAIResponseInputTool]
) -> tuple[ ) -> tuple[

View file

@ -38,7 +38,7 @@ class TelemetryConfig(BaseModel):
description="List of telemetry sinks to enable (possible values: otel, sqlite, console)", description="List of telemetry sinks to enable (possible values: otel, sqlite, console)",
) )
sqlite_db_path: str = Field( sqlite_db_path: str = Field(
default=(RUNTIME_BASE_DIR / "trace_store.db").as_posix(), default_factory=lambda: (RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
description="The path to the SQLite database to use for storing traces", description="The path to the SQLite database to use for storing traces",
) )

View file

@ -35,5 +35,6 @@ def available_providers() -> list[ProviderSpec]:
Api.tool_runtime, Api.tool_runtime,
Api.tool_groups, Api.tool_groups,
], ],
description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
), ),
] ]

View file

@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.inline.datasetio.localfs", module="llama_stack.providers.inline.datasetio.localfs",
config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig", config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig",
api_dependencies=[], api_dependencies=[],
description="Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.",
), ),
remote_provider_spec( remote_provider_spec(
api=Api.datasetio, api=Api.datasetio,
@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
], ],
module="llama_stack.providers.remote.datasetio.huggingface", module="llama_stack.providers.remote.datasetio.huggingface",
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig", config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
description="HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
], ],
module="llama_stack.providers.remote.datasetio.nvidia", module="llama_stack.providers.remote.datasetio.nvidia",
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig", config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
description="NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.",
), ),
), ),
] ]

View file

@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
Api.inference, Api.inference,
Api.agents, Api.agents,
], ],
description="Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.",
), ),
remote_provider_spec( remote_provider_spec(
api=Api.eval, api=Api.eval,
@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
], ],
module="llama_stack.providers.remote.eval.nvidia", module="llama_stack.providers.remote.eval.nvidia",
config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig", config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig",
description="NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.",
), ),
api_dependencies=[ api_dependencies=[
Api.datasetio, Api.datasetio,

View file

@ -21,5 +21,6 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=sql_store_pip_packages, pip_packages=sql_store_pip_packages,
module="llama_stack.providers.inline.files.localfs", module="llama_stack.providers.inline.files.localfs",
config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig", config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
description="Local filesystem-based file storage provider for managing files and documents locally.",
), ),
] ]

View file

@ -35,6 +35,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=META_REFERENCE_DEPS, pip_packages=META_REFERENCE_DEPS,
module="llama_stack.providers.inline.inference.meta_reference", module="llama_stack.providers.inline.inference.meta_reference",
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig", config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.inference, api=Api.inference,
@ -44,6 +45,7 @@ def available_providers() -> list[ProviderSpec]:
], ],
module="llama_stack.providers.inline.inference.vllm", module="llama_stack.providers.inline.inference.vllm",
config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig", config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
description="vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.",
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.inference, api=Api.inference,
@ -54,6 +56,7 @@ def available_providers() -> list[ProviderSpec]:
], ],
module="llama_stack.providers.inline.inference.sentence_transformers", module="llama_stack.providers.inline.inference.sentence_transformers",
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig", config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
description="Sentence Transformers inference provider for text embeddings and similarity search.",
), ),
remote_provider_spec( remote_provider_spec(
api=Api.inference, api=Api.inference,
@ -64,6 +67,7 @@ def available_providers() -> list[ProviderSpec]:
], ],
module="llama_stack.providers.remote.inference.cerebras", module="llama_stack.providers.remote.inference.cerebras",
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig", config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
description="Cerebras inference provider for running models on Cerebras Cloud platform.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -73,6 +77,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=["ollama", "aiohttp", "h11>=0.16.0"], pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig", config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
module="llama_stack.providers.remote.inference.ollama", module="llama_stack.providers.remote.inference.ollama",
description="Ollama inference provider for running local models through the Ollama runtime.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=["openai"], pip_packages=["openai"],
module="llama_stack.providers.remote.inference.vllm", module="llama_stack.providers.remote.inference.vllm",
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig", config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
description="Remote vLLM inference provider for connecting to vLLM servers.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -91,6 +97,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=["huggingface_hub", "aiohttp"], pip_packages=["huggingface_hub", "aiohttp"],
module="llama_stack.providers.remote.inference.tgi", module="llama_stack.providers.remote.inference.tgi",
config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig", config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
description="Text Generation Inference (TGI) provider for HuggingFace model serving.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -100,6 +107,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=["huggingface_hub", "aiohttp"], pip_packages=["huggingface_hub", "aiohttp"],
module="llama_stack.providers.remote.inference.tgi", module="llama_stack.providers.remote.inference.tgi",
config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig", config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
description="HuggingFace Inference API serverless provider for on-demand model inference.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -109,6 +117,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=["huggingface_hub", "aiohttp"], pip_packages=["huggingface_hub", "aiohttp"],
module="llama_stack.providers.remote.inference.tgi", module="llama_stack.providers.remote.inference.tgi",
config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig", config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
description="HuggingFace Inference Endpoints provider for dedicated model serving.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -121,6 +130,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.fireworks", module="llama_stack.providers.remote.inference.fireworks",
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig", config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -133,6 +143,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.together", module="llama_stack.providers.remote.inference.together",
config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig", config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
description="Together AI inference provider for open-source models and collaborative AI development.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -142,6 +153,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=["boto3"], pip_packages=["boto3"],
module="llama_stack.providers.remote.inference.bedrock", module="llama_stack.providers.remote.inference.bedrock",
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig", config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -153,6 +165,7 @@ def available_providers() -> list[ProviderSpec]:
], ],
module="llama_stack.providers.remote.inference.databricks", module="llama_stack.providers.remote.inference.databricks",
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig", config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -164,6 +177,7 @@ def available_providers() -> list[ProviderSpec]:
], ],
module="llama_stack.providers.remote.inference.nvidia", module="llama_stack.providers.remote.inference.nvidia",
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig", config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -173,6 +187,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=["openai"], pip_packages=["openai"],
module="llama_stack.providers.remote.inference.runpod", module="llama_stack.providers.remote.inference.runpod",
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig", config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -183,6 +198,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.openai", module="llama_stack.providers.remote.inference.openai",
config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig", config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
description="OpenAI inference provider for accessing GPT models and other OpenAI services.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -193,6 +209,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.anthropic", module="llama_stack.providers.remote.inference.anthropic",
config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig", config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -203,6 +220,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.gemini", module="llama_stack.providers.remote.inference.gemini",
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig", config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -213,6 +231,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.groq", module="llama_stack.providers.remote.inference.groq",
config_class="llama_stack.providers.remote.inference.groq.GroqConfig", config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
description="Groq inference provider for ultra-fast inference using Groq's LPU technology.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -223,6 +242,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.fireworks_openai_compat", module="llama_stack.providers.remote.inference.fireworks_openai_compat",
config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig", config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig",
provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator",
description="Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -233,6 +253,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.llama_openai_compat", module="llama_stack.providers.remote.inference.llama_openai_compat",
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig", config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -243,6 +264,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.together_openai_compat", module="llama_stack.providers.remote.inference.together_openai_compat",
config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig", config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig",
provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator",
description="Together AI OpenAI-compatible provider for using Together models with OpenAI API format.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -253,6 +275,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.groq_openai_compat", module="llama_stack.providers.remote.inference.groq_openai_compat",
config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig", config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig",
provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator",
description="Groq OpenAI-compatible provider for using Groq models with OpenAI API format.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -263,6 +286,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.sambanova_openai_compat", module="llama_stack.providers.remote.inference.sambanova_openai_compat",
config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig", config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig",
provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator",
description="SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -273,6 +297,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.cerebras_openai_compat", module="llama_stack.providers.remote.inference.cerebras_openai_compat",
config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig", config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig",
provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator",
description="Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -283,6 +308,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.sambanova", module="llama_stack.providers.remote.inference.sambanova",
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig", config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
description="SambaNova inference provider for running models on SambaNova's dataflow architecture.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -293,6 +319,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.passthrough", module="llama_stack.providers.remote.inference.passthrough",
config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig", config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig",
provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
description="Passthrough inference provider for connecting to any external inference service not directly supported.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -303,6 +330,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.inference.watsonx", module="llama_stack.providers.remote.inference.watsonx",
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig", config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator", provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
), ),
), ),
] ]

View file

@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
Api.datasetio, Api.datasetio,
Api.datasets, Api.datasets,
], ],
description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.post_training, api=Api.post_training,
@ -31,6 +32,7 @@ def available_providers() -> list[ProviderSpec]:
Api.datasetio, Api.datasetio,
Api.datasets, Api.datasets,
], ],
description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
), ),
remote_provider_spec( remote_provider_spec(
api=Api.post_training, api=Api.post_training,
@ -39,6 +41,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=["requests", "aiohttp"], pip_packages=["requests", "aiohttp"],
module="llama_stack.providers.remote.post_training.nvidia", module="llama_stack.providers.remote.post_training.nvidia",
config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig", config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig",
description="NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.",
), ),
), ),
] ]

View file

@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]:
], ],
module="llama_stack.providers.inline.safety.prompt_guard", module="llama_stack.providers.inline.safety.prompt_guard",
config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig", config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig",
description="Prompt Guard safety provider for detecting and filtering unsafe prompts and content.",
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.safety, api=Api.safety,
@ -35,6 +36,7 @@ def available_providers() -> list[ProviderSpec]:
api_dependencies=[ api_dependencies=[
Api.inference, Api.inference,
], ],
description="Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.",
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.safety, api=Api.safety,
@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
], ],
module="llama_stack.providers.inline.safety.code_scanner", module="llama_stack.providers.inline.safety.code_scanner",
config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig", config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig",
description="Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.",
), ),
remote_provider_spec( remote_provider_spec(
api=Api.safety, api=Api.safety,
@ -52,6 +55,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=["boto3"], pip_packages=["boto3"],
module="llama_stack.providers.remote.safety.bedrock", module="llama_stack.providers.remote.safety.bedrock",
config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig", config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig",
description="AWS Bedrock safety provider for content moderation using AWS's safety services.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -61,6 +65,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=["requests"], pip_packages=["requests"],
module="llama_stack.providers.remote.safety.nvidia", module="llama_stack.providers.remote.safety.nvidia",
config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig", config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig",
description="NVIDIA's safety provider for content moderation and safety filtering.",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -71,6 +76,7 @@ def available_providers() -> list[ProviderSpec]:
module="llama_stack.providers.remote.safety.sambanova", module="llama_stack.providers.remote.safety.sambanova",
config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig", config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator", provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
description="SambaNova's safety provider for content moderation and safety filtering.",
), ),
), ),
] ]

Some files were not shown because too many files have changed in this diff Show more