mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-04 13:15:24 +00:00
docs: auto generated documentation for providers (#2543)
# What does this PR do? Simple approach to get some provider pages in the docs. Add or update description fields in the provider configuration class using Pydantic’s Field, ensuring these descriptions are clear and complete, as they will be used to auto-generate provider documentation via ./scripts/distro_codegen.py instead of editing the docs manually. Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
8d8e90d78e
commit
c9a49a80e8
96 changed files with 2562 additions and 65 deletions
|
@ -14,7 +14,7 @@ repos:
|
||||||
- id: check-added-large-files
|
- id: check-added-large-files
|
||||||
args: ['--maxkb=1000']
|
args: ['--maxkb=1000']
|
||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
exclude: '^(.*\.svg)$'
|
exclude: '^(.*\.svg|.*\.md)$'
|
||||||
- id: no-commit-to-branch
|
- id: no-commit-to-branch
|
||||||
- id: check-yaml
|
- id: check-yaml
|
||||||
args: ["--unsafe"]
|
args: ["--unsafe"]
|
||||||
|
@ -95,6 +95,15 @@ repos:
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
require_serial: true
|
require_serial: true
|
||||||
files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
||||||
|
- id: provider-codegen
|
||||||
|
name: Provider Codegen
|
||||||
|
additional_dependencies:
|
||||||
|
- uv==0.7.8
|
||||||
|
entry: uv run --group codegen ./scripts/provider_codegen.py
|
||||||
|
language: python
|
||||||
|
pass_filenames: false
|
||||||
|
require_serial: true
|
||||||
|
files: ^llama_stack/providers/.*$
|
||||||
- id: openapi-codegen
|
- id: openapi-codegen
|
||||||
name: API Spec Codegen
|
name: API Spec Codegen
|
||||||
additional_dependencies:
|
additional_dependencies:
|
||||||
|
|
|
@ -139,6 +139,8 @@ uv sync
|
||||||
justification for bypassing the check.
|
justification for bypassing the check.
|
||||||
* Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or
|
* Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or
|
||||||
readability reasons.
|
readability reasons.
|
||||||
|
* Providers configuration class should be Pydantic Field class. It should have a `description` field
|
||||||
|
that describes the configuration. These descriptions will be used to generate the provider documentation.
|
||||||
|
|
||||||
## Common Tasks
|
## Common Tasks
|
||||||
|
|
||||||
|
@ -157,10 +159,19 @@ cd llama-stack
|
||||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...>
|
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Updating distribution configurations
|
||||||
|
|
||||||
### Updating Provider Configurations
|
If you have made changes to a provider's configuration in any form (introducing a new config key, or
|
||||||
|
changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML
|
||||||
|
files as well as the documentation. You should not change `docs/source/.../distributions/` files
|
||||||
|
manually as they are auto-generated.
|
||||||
|
|
||||||
If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated.
|
### Updating the provider documentation
|
||||||
|
|
||||||
|
If you have made changes to a provider's configuration, you should run `./scripts/distro_codegen.py`
|
||||||
|
to re-generate the documentation. You should not change `docs/source/.../providers/` files manually
|
||||||
|
as they are auto-generated.
|
||||||
|
Note that the provider "description" field will be used to generate the provider documentation.
|
||||||
|
|
||||||
### Building the Documentation
|
### Building the Documentation
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ Llama Stack is a stateful service with REST APIs to support the seamless transit
|
||||||
environments. You can build and test using a local server first and deploy to a hosted endpoint for production.
|
environments. You can build and test using a local server first and deploy to a hosted endpoint for production.
|
||||||
|
|
||||||
In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
|
In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
|
||||||
as the inference [provider](../providers/index.md#inference) for a Llama Model.
|
as the inference [provider](../providers/inference/index) for a Llama Model.
|
||||||
|
|
||||||
#### Step 1: Install and setup
|
#### Step 1: Install and setup
|
||||||
1. Install [uv](https://docs.astral.sh/uv/)
|
1. Install [uv](https://docs.astral.sh/uv/)
|
||||||
|
|
5
docs/source/providers/agents/index.md
Normal file
5
docs/source/providers/agents/index.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Agents Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **agents** API.
|
||||||
|
|
||||||
|
- [inline::meta-reference](inline_meta-reference.md)
|
26
docs/source/providers/agents/inline_meta-reference.md
Normal file
26
docs/source/providers/agents/inline_meta-reference.md
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
# inline::meta-reference
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
| `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
persistence_store:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
|
||||||
|
responses_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
7
docs/source/providers/datasetio/index.md
Normal file
7
docs/source/providers/datasetio/index.md
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# Datasetio Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **datasetio** API.
|
||||||
|
|
||||||
|
- [inline::localfs](inline_localfs.md)
|
||||||
|
- [remote::huggingface](remote_huggingface.md)
|
||||||
|
- [remote::nvidia](remote_nvidia.md)
|
22
docs/source/providers/datasetio/inline_localfs.md
Normal file
22
docs/source/providers/datasetio/inline_localfs.md
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# inline::localfs
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
22
docs/source/providers/datasetio/remote_huggingface.md
Normal file
22
docs/source/providers/datasetio/remote_huggingface.md
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# remote::huggingface
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
25
docs/source/providers/datasetio/remote_nvidia.md
Normal file
25
docs/source/providers/datasetio/remote_nvidia.md
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# remote::nvidia
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The NVIDIA API key. |
|
||||||
|
| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
|
||||||
|
| `project_id` | `str \| None` | No | test-project | The NVIDIA project ID. |
|
||||||
|
| `datasets_url` | `<class 'str'>` | No | http://nemo.test | Base URL for the NeMo Dataset API |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.NVIDIA_API_KEY:+}
|
||||||
|
dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
|
||||||
|
project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
|
||||||
|
datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
6
docs/source/providers/eval/index.md
Normal file
6
docs/source/providers/eval/index.md
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
# Eval Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **eval** API.
|
||||||
|
|
||||||
|
- [inline::meta-reference](inline_meta-reference.md)
|
||||||
|
- [remote::nvidia](remote_nvidia.md)
|
22
docs/source/providers/eval/inline_meta-reference.md
Normal file
22
docs/source/providers/eval/inline_meta-reference.md
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# inline::meta-reference
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/eval/remote_nvidia.md
Normal file
19
docs/source/providers/eval/remote_nvidia.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::nvidia
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `evaluator_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
5
docs/source/providers/files/index.md
Normal file
5
docs/source/providers/files/index.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Files Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **files** API.
|
||||||
|
|
||||||
|
- [inline::localfs](inline_localfs.md)
|
24
docs/source/providers/files/inline_localfs.md
Normal file
24
docs/source/providers/files/inline_localfs.md
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
# inline::localfs
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Local filesystem-based file storage provider for managing files and documents locally.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `storage_dir` | `<class 'str'>` | No | PydanticUndefined | Directory to store uploaded files |
|
||||||
|
| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
|
||||||
|
| `ttl_secs` | `<class 'int'>` | No | 31536000 | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -18,60 +18,92 @@ Llama Stack supports external providers that live outside of the main codebase.
|
||||||
## Agents
|
## Agents
|
||||||
Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc.
|
Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
agents/index
|
||||||
|
```
|
||||||
|
|
||||||
## DatasetIO
|
## DatasetIO
|
||||||
Interfaces with datasets and data loaders.
|
Interfaces with datasets and data loaders.
|
||||||
|
|
||||||
## Eval
|
|
||||||
Generates outputs (via Inference or Agents) and perform scoring.
|
|
||||||
|
|
||||||
## Inference
|
|
||||||
Runs inference with an LLM.
|
|
||||||
|
|
||||||
## Post Training
|
|
||||||
Fine-tunes a model.
|
|
||||||
|
|
||||||
#### Post Training Providers
|
|
||||||
The following providers are available for Post Training:
|
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
external
|
datasetio/index
|
||||||
post_training/huggingface
|
```
|
||||||
post_training/torchtune
|
|
||||||
post_training/nvidia_nemo
|
## Eval
|
||||||
|
Generates outputs (via Inference or Agents) and perform scoring.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
eval/index
|
||||||
|
```
|
||||||
|
|
||||||
|
## Inference
|
||||||
|
Runs inference with an LLM.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
inference/index
|
||||||
|
```
|
||||||
|
|
||||||
|
## Post Training
|
||||||
|
Fine-tunes a model.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
post_training/index
|
||||||
```
|
```
|
||||||
|
|
||||||
## Safety
|
## Safety
|
||||||
Applies safety policies to the output at a Systems (not only model) level.
|
Applies safety policies to the output at a Systems (not only model) level.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
safety/index
|
||||||
|
```
|
||||||
|
|
||||||
## Scoring
|
## Scoring
|
||||||
Evaluates the outputs of the system.
|
Evaluates the outputs of the system.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
scoring/index
|
||||||
|
```
|
||||||
|
|
||||||
## Telemetry
|
## Telemetry
|
||||||
Collects telemetry data from the system.
|
Collects telemetry data from the system.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
telemetry/index
|
||||||
|
```
|
||||||
|
|
||||||
## Tool Runtime
|
## Tool Runtime
|
||||||
Is associated with the ToolGroup resouces.
|
Is associated with the ToolGroup resouces.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
tool_runtime/index
|
||||||
|
```
|
||||||
|
|
||||||
## Vector IO
|
## Vector IO
|
||||||
|
|
||||||
Vector IO refers to operations on vector databases, such as adding documents, searching, and deleting documents.
|
Vector IO refers to operations on vector databases, such as adding documents, searching, and deleting documents.
|
||||||
Vector IO plays a crucial role in [Retreival Augmented Generation (RAG)](../..//building_applications/rag), where the vector
|
Vector IO plays a crucial role in [Retreival Augmented Generation (RAG)](../..//building_applications/rag), where the vector
|
||||||
io and database are used to store and retrieve documents for retrieval.
|
io and database are used to store and retrieve documents for retrieval.
|
||||||
|
|
||||||
#### Vector IO Providers
|
|
||||||
The following providers (i.e., databases) are available for Vector IO:
|
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
external
|
vector_io/index
|
||||||
vector_io/faiss
|
|
||||||
vector_io/sqlite-vec
|
|
||||||
vector_io/chromadb
|
|
||||||
vector_io/pgvector
|
|
||||||
vector_io/qdrant
|
|
||||||
vector_io/milvus
|
|
||||||
vector_io/weaviate
|
|
||||||
```
|
```
|
||||||
|
|
32
docs/source/providers/inference/index.md
Normal file
32
docs/source/providers/inference/index.md
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# Inference Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **inference** API.
|
||||||
|
|
||||||
|
- [inline::meta-reference](inline_meta-reference.md)
|
||||||
|
- [inline::sentence-transformers](inline_sentence-transformers.md)
|
||||||
|
- [inline::vllm](inline_vllm.md)
|
||||||
|
- [remote::anthropic](remote_anthropic.md)
|
||||||
|
- [remote::bedrock](remote_bedrock.md)
|
||||||
|
- [remote::cerebras](remote_cerebras.md)
|
||||||
|
- [remote::cerebras-openai-compat](remote_cerebras-openai-compat.md)
|
||||||
|
- [remote::databricks](remote_databricks.md)
|
||||||
|
- [remote::fireworks](remote_fireworks.md)
|
||||||
|
- [remote::fireworks-openai-compat](remote_fireworks-openai-compat.md)
|
||||||
|
- [remote::gemini](remote_gemini.md)
|
||||||
|
- [remote::groq](remote_groq.md)
|
||||||
|
- [remote::groq-openai-compat](remote_groq-openai-compat.md)
|
||||||
|
- [remote::hf::endpoint](remote_hf_endpoint.md)
|
||||||
|
- [remote::hf::serverless](remote_hf_serverless.md)
|
||||||
|
- [remote::llama-openai-compat](remote_llama-openai-compat.md)
|
||||||
|
- [remote::nvidia](remote_nvidia.md)
|
||||||
|
- [remote::ollama](remote_ollama.md)
|
||||||
|
- [remote::openai](remote_openai.md)
|
||||||
|
- [remote::passthrough](remote_passthrough.md)
|
||||||
|
- [remote::runpod](remote_runpod.md)
|
||||||
|
- [remote::sambanova](remote_sambanova.md)
|
||||||
|
- [remote::sambanova-openai-compat](remote_sambanova-openai-compat.md)
|
||||||
|
- [remote::tgi](remote_tgi.md)
|
||||||
|
- [remote::together](remote_together.md)
|
||||||
|
- [remote::together-openai-compat](remote_together-openai-compat.md)
|
||||||
|
- [remote::vllm](remote_vllm.md)
|
||||||
|
- [remote::watsonx](remote_watsonx.md)
|
32
docs/source/providers/inference/inline_meta-reference.md
Normal file
32
docs/source/providers/inference/inline_meta-reference.md
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# inline::meta-reference
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Meta's reference implementation of inference with support for various model formats and optimization techniques.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `model` | `str \| None` | No | | |
|
||||||
|
| `torch_seed` | `int \| None` | No | | |
|
||||||
|
| `max_seq_len` | `<class 'int'>` | No | 4096 | |
|
||||||
|
| `max_batch_size` | `<class 'int'>` | No | 1 | |
|
||||||
|
| `model_parallel_size` | `int \| None` | No | | |
|
||||||
|
| `create_distributed_process_group` | `<class 'bool'>` | No | True | |
|
||||||
|
| `checkpoint_dir` | `str \| None` | No | | |
|
||||||
|
| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig, annotation=NoneType, required=True, discriminator='type'` | No | | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model: Llama3.2-3B-Instruct
|
||||||
|
checkpoint_dir: ${env.CHECKPOINT_DIR:=null}
|
||||||
|
quantization:
|
||||||
|
type: ${env.QUANTIZATION_TYPE:=bf16}
|
||||||
|
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
|
||||||
|
max_batch_size: ${env.MAX_BATCH_SIZE:=1}
|
||||||
|
max_seq_len: ${env.MAX_SEQ_LEN:=4096}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
# inline::sentence-transformers
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Sentence Transformers inference provider for text embeddings and similarity search.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
29
docs/source/providers/inference/inline_vllm.md
Normal file
29
docs/source/providers/inference/inline_vllm.md
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
# inline::vllm
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `tensor_parallel_size` | `<class 'int'>` | No | 1 | Number of tensor parallel replicas (number of GPUs to use). |
|
||||||
|
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
|
||||||
|
| `max_model_len` | `<class 'int'>` | No | 4096 | Maximum context length to use during serving. |
|
||||||
|
| `max_num_seqs` | `<class 'int'>` | No | 4 | Maximum parallel batch size for generation. |
|
||||||
|
| `enforce_eager` | `<class 'bool'>` | No | False | Whether to use eager mode for inference (otherwise cuda graphs are used). |
|
||||||
|
| `gpu_memory_utilization` | `<class 'float'>` | No | 0.3 | How much GPU memory will be allocated when this provider has finished loading, including memory that was already allocated before loading. |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:=1}
|
||||||
|
max_tokens: ${env.MAX_TOKENS:=4096}
|
||||||
|
max_model_len: ${env.MAX_MODEL_LEN:=4096}
|
||||||
|
max_num_seqs: ${env.MAX_NUM_SEQS:=4}
|
||||||
|
enforce_eager: ${env.ENFORCE_EAGER:=False}
|
||||||
|
gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:=0.3}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/inference/remote_anthropic.md
Normal file
19
docs/source/providers/inference/remote_anthropic.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::anthropic
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Anthropic inference provider for accessing Claude models and Anthropic's AI services.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | API key for Anthropic models |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.ANTHROPIC_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
28
docs/source/providers/inference/remote_bedrock.md
Normal file
28
docs/source/providers/inference/remote_bedrock.md
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# remote::bedrock
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
AWS Bedrock inference provider for accessing various AI models through AWS's managed service.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||||
|
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||||
|
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||||
|
| `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
|
||||||
|
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
||||||
|
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
||||||
|
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
||||||
|
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
||||||
|
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
||||||
|
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::cerebras-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Cerebras API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.cerebras.ai/v1 | The URL for the Cerebras API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.cerebras.ai/v1
|
||||||
|
api_key: ${env.CEREBRAS_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_cerebras.md
Normal file
21
docs/source/providers/inference/remote_cerebras.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::cerebras
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Cerebras inference provider for running models on Cerebras Cloud platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Cerebras API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
base_url: https://api.cerebras.ai
|
||||||
|
api_key: ${env.CEREBRAS_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_databricks.md
Normal file
21
docs/source/providers/inference/remote_databricks.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::databricks
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Databricks inference provider for running models on Databricks' unified analytics platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | | The URL for the Databricks model serving endpoint |
|
||||||
|
| `api_token` | `<class 'str'>` | No | | The Databricks API token |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.DATABRICKS_URL}
|
||||||
|
api_token: ${env.DATABRICKS_API_TOKEN}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::fireworks-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Fireworks API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.fireworks.ai/inference/v1
|
||||||
|
api_key: ${env.FIREWORKS_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_fireworks.md
Normal file
21
docs/source/providers/inference/remote_fireworks.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::fireworks
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Fireworks.ai API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: https://api.fireworks.ai/inference/v1
|
||||||
|
api_key: ${env.FIREWORKS_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/inference/remote_gemini.md
Normal file
19
docs/source/providers/inference/remote_gemini.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::gemini
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Google Gemini inference provider for accessing Gemini models and Google's AI services.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | API key for Gemini models |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.GEMINI_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_groq-openai-compat.md
Normal file
21
docs/source/providers/inference/remote_groq-openai-compat.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::groq-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Groq OpenAI-compatible provider for using Groq models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Groq API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.groq.com/openai/v1 | The URL for the Groq API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.groq.com/openai/v1
|
||||||
|
api_key: ${env.GROQ_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_groq.md
Normal file
21
docs/source/providers/inference/remote_groq.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::groq
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Groq inference provider for ultra-fast inference using Groq's LPU technology.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Groq API key |
|
||||||
|
| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: https://api.groq.com
|
||||||
|
api_key: ${env.GROQ_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_hf_endpoint.md
Normal file
21
docs/source/providers/inference/remote_hf_endpoint.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::hf::endpoint
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
HuggingFace Inference Endpoints provider for dedicated model serving.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `endpoint_name` | `<class 'str'>` | No | PydanticUndefined | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. |
|
||||||
|
| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
endpoint_name: ${env.INFERENCE_ENDPOINT_NAME}
|
||||||
|
api_token: ${env.HF_API_TOKEN}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_hf_serverless.md
Normal file
21
docs/source/providers/inference/remote_hf_serverless.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::hf::serverless
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
HuggingFace Inference API serverless provider for on-demand model inference.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `huggingface_repo` | `<class 'str'>` | No | PydanticUndefined | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') |
|
||||||
|
| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
huggingface_repo: ${env.INFERENCE_MODEL}
|
||||||
|
api_token: ${env.HF_API_TOKEN}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::llama-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Llama API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.llama.com/compat/v1/
|
||||||
|
api_key: ${env.LLAMA_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
24
docs/source/providers/inference/remote_nvidia.md
Normal file
24
docs/source/providers/inference/remote_nvidia.md
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
# remote::nvidia
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The NVIDIA API key, only needed of using the hosted service |
|
||||||
|
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||||
|
| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||||
|
api_key: ${env.NVIDIA_API_KEY:+}
|
||||||
|
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_ollama.md
Normal file
21
docs/source/providers/inference/remote_ollama.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::ollama
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Ollama inference provider for running local models through the Ollama runtime.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | http://localhost:11434 | |
|
||||||
|
| `raise_on_connect_error` | `<class 'bool'>` | No | True | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||||
|
raise_on_connect_error: true
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/inference/remote_openai.md
Normal file
19
docs/source/providers/inference/remote_openai.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::openai
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
OpenAI inference provider for accessing GPT models and other OpenAI services.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | API key for OpenAI models |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.OPENAI_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_passthrough.md
Normal file
21
docs/source/providers/inference/remote_passthrough.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::passthrough
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Passthrough inference provider for connecting to any external inference service not directly supported.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.PASSTHROUGH_URL}
|
||||||
|
api_key: ${env.PASSTHROUGH_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_runpod.md
Normal file
21
docs/source/providers/inference/remote_runpod.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::runpod
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
RunPod inference provider for running models on RunPod's cloud GPU platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint |
|
||||||
|
| `api_token` | `str \| None` | No | | The API token |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.RUNPOD_URL:+}
|
||||||
|
api_token: ${env.RUNPOD_API_TOKEN:+}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::sambanova-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The SambaNova API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.sambanova.ai/v1
|
||||||
|
api_key: ${env.SAMBANOVA_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_sambanova.md
Normal file
21
docs/source/providers/inference/remote_sambanova.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::sambanova
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
SambaNova inference provider for running models on SambaNova's dataflow architecture.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: https://api.sambanova.ai/v1
|
||||||
|
api_key: ${env.SAMBANOVA_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/inference/remote_tgi.md
Normal file
19
docs/source/providers/inference/remote_tgi.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::tgi
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Text Generation Inference (TGI) provider for HuggingFace model serving.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | PydanticUndefined | The URL for the TGI serving endpoint |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.TGI_URL}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::together-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Together AI OpenAI-compatible provider for using Together models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Together API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.together.xyz/v1
|
||||||
|
api_key: ${env.TOGETHER_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_together.md
Normal file
21
docs/source/providers/inference/remote_together.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::together
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Together AI inference provider for open-source models and collaborative AI development.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Together AI API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: https://api.together.xyz/v1
|
||||||
|
api_key: ${env.TOGETHER_API_KEY:+}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
25
docs/source/providers/inference/remote_vllm.md
Normal file
25
docs/source/providers/inference/remote_vllm.md
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# remote::vllm
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Remote vLLM inference provider for connecting to vLLM servers.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint |
|
||||||
|
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
|
||||||
|
| `api_token` | `str \| None` | No | fake | The API token |
|
||||||
|
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.VLLM_URL}
|
||||||
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||||
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||||
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
24
docs/source/providers/inference/remote_watsonx.md
Normal file
24
docs/source/providers/inference/remote_watsonx.md
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
# remote::watsonx
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key, only needed of using the hosted service |
|
||||||
|
| `project_id` | `str \| None` | No | | The Project ID key, only needed of using the hosted service |
|
||||||
|
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||||
|
api_key: ${env.WATSONX_API_KEY:+}
|
||||||
|
project_id: ${env.WATSONX_PROJECT_ID:+}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
7
docs/source/providers/post_training/index.md
Normal file
7
docs/source/providers/post_training/index.md
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# Post_Training Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **post_training** API.
|
||||||
|
|
||||||
|
- [inline::huggingface](inline_huggingface.md)
|
||||||
|
- [inline::torchtune](inline_torchtune.md)
|
||||||
|
- [remote::nvidia](remote_nvidia.md)
|
36
docs/source/providers/post_training/inline_huggingface.md
Normal file
36
docs/source/providers/post_training/inline_huggingface.md
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# inline::huggingface
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `device` | `<class 'str'>` | No | cuda | |
|
||||||
|
| `distributed_backend` | `Literal['fsdp', 'deepspeed'` | No | | |
|
||||||
|
| `checkpoint_format` | `Literal['full_state', 'huggingface'` | No | huggingface | |
|
||||||
|
| `chat_template` | `<class 'str'>` | No | <|user|>
|
||||||
|
{input}
|
||||||
|
<|assistant|>
|
||||||
|
{output} | |
|
||||||
|
| `model_specific_config` | `<class 'dict'>` | No | {'trust_remote_code': True, 'attn_implementation': 'sdpa'} | |
|
||||||
|
| `max_seq_length` | `<class 'int'>` | No | 2048 | |
|
||||||
|
| `gradient_checkpointing` | `<class 'bool'>` | No | False | |
|
||||||
|
| `save_total_limit` | `<class 'int'>` | No | 3 | |
|
||||||
|
| `logging_steps` | `<class 'int'>` | No | 10 | |
|
||||||
|
| `warmup_ratio` | `<class 'float'>` | No | 0.1 | |
|
||||||
|
| `weight_decay` | `<class 'float'>` | No | 0.01 | |
|
||||||
|
| `dataloader_num_workers` | `<class 'int'>` | No | 4 | |
|
||||||
|
| `dataloader_pin_memory` | `<class 'bool'>` | No | True | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
checkpoint_format: huggingface
|
||||||
|
distributed_backend: null
|
||||||
|
device: cpu
|
||||||
|
|
||||||
|
```
|
||||||
|
|
20
docs/source/providers/post_training/inline_torchtune.md
Normal file
20
docs/source/providers/post_training/inline_torchtune.md
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# inline::torchtune
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `torch_seed` | `int \| None` | No | | |
|
||||||
|
| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
checkpoint_format: meta
|
||||||
|
|
||||||
|
```
|
||||||
|
|
28
docs/source/providers/post_training/remote_nvidia.md
Normal file
28
docs/source/providers/post_training/remote_nvidia.md
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# remote::nvidia
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The NVIDIA API key. |
|
||||||
|
| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
|
||||||
|
| `project_id` | `str \| None` | No | test-example-model@v1 | The NVIDIA project ID. |
|
||||||
|
| `customizer_url` | `str \| None` | No | | Base URL for the NeMo Customizer API |
|
||||||
|
| `timeout` | `<class 'int'>` | No | 300 | Timeout for the NVIDIA Post Training API |
|
||||||
|
| `max_retries` | `<class 'int'>` | No | 3 | Maximum number of retries for the NVIDIA Post Training API |
|
||||||
|
| `output_model_dir` | `<class 'str'>` | No | test-example-model@v1 | Directory to save the output model |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.NVIDIA_API_KEY:+}
|
||||||
|
dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
|
||||||
|
project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
|
||||||
|
customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
10
docs/source/providers/safety/index.md
Normal file
10
docs/source/providers/safety/index.md
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# Safety Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **safety** API.
|
||||||
|
|
||||||
|
- [inline::code-scanner](inline_code-scanner.md)
|
||||||
|
- [inline::llama-guard](inline_llama-guard.md)
|
||||||
|
- [inline::prompt-guard](inline_prompt-guard.md)
|
||||||
|
- [remote::bedrock](remote_bedrock.md)
|
||||||
|
- [remote::nvidia](remote_nvidia.md)
|
||||||
|
- [remote::sambanova](remote_sambanova.md)
|
13
docs/source/providers/safety/inline_code-scanner.md
Normal file
13
docs/source/providers/safety/inline_code-scanner.md
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# inline::code-scanner
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/safety/inline_llama-guard.md
Normal file
19
docs/source/providers/safety/inline_llama-guard.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# inline::llama-guard
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `excluded_categories` | `list[str` | No | [] | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
excluded_categories: []
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/safety/inline_prompt-guard.md
Normal file
19
docs/source/providers/safety/inline_prompt-guard.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# inline::prompt-guard
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Prompt Guard safety provider for detecting and filtering unsafe prompts and content.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `guard_type` | `<class 'str'>` | No | injection | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
guard_type: injection
|
||||||
|
|
||||||
|
```
|
||||||
|
|
28
docs/source/providers/safety/remote_bedrock.md
Normal file
28
docs/source/providers/safety/remote_bedrock.md
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# remote::bedrock
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
AWS Bedrock safety provider for content moderation using AWS's safety services.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||||
|
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||||
|
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||||
|
| `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
|
||||||
|
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
||||||
|
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
||||||
|
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
||||||
|
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
||||||
|
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
||||||
|
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/safety/remote_nvidia.md
Normal file
21
docs/source/providers/safety/remote_nvidia.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::nvidia
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
NVIDIA's safety provider for content moderation and safety filtering.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `guardrails_service_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service |
|
||||||
|
| `config_id` | `str \| None` | No | self-check | Guardrails configuration ID to use from the Guardrails configuration store |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
|
||||||
|
config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/safety/remote_sambanova.md
Normal file
21
docs/source/providers/safety/remote_sambanova.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::sambanova
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
SambaNova's safety provider for content moderation and safety filtering.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: https://api.sambanova.ai/v1
|
||||||
|
api_key: ${env.SAMBANOVA_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
7
docs/source/providers/scoring/index.md
Normal file
7
docs/source/providers/scoring/index.md
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# Scoring Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **scoring** API.
|
||||||
|
|
||||||
|
- [inline::basic](inline_basic.md)
|
||||||
|
- [inline::braintrust](inline_braintrust.md)
|
||||||
|
- [inline::llm-as-judge](inline_llm-as-judge.md)
|
13
docs/source/providers/scoring/inline_basic.md
Normal file
13
docs/source/providers/scoring/inline_basic.md
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# inline::basic
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Basic scoring provider for simple evaluation metrics and scoring functions.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/scoring/inline_braintrust.md
Normal file
19
docs/source/providers/scoring/inline_braintrust.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# inline::braintrust
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Braintrust scoring provider for evaluation and scoring using the Braintrust platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `openai_api_key` | `str \| None` | No | | The OpenAI API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_api_key: ${env.OPENAI_API_KEY:+}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
13
docs/source/providers/scoring/inline_llm-as-judge.md
Normal file
13
docs/source/providers/scoring/inline_llm-as-judge.md
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# inline::llm-as-judge
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
LLM-as-judge scoring provider that uses language models to evaluate and score responses.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
5
docs/source/providers/telemetry/index.md
Normal file
5
docs/source/providers/telemetry/index.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Telemetry Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **telemetry** API.
|
||||||
|
|
||||||
|
- [inline::meta-reference](inline_meta-reference.md)
|
25
docs/source/providers/telemetry/inline_meta-reference.md
Normal file
25
docs/source/providers/telemetry/inline_meta-reference.md
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# inline::meta-reference
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Meta's reference implementation of telemetry and observability using OpenTelemetry.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `otel_trace_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL for traces |
|
||||||
|
| `otel_metric_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL for metrics |
|
||||||
|
| `service_name` | `<class 'str'>` | No | | The service name to use for telemetry |
|
||||||
|
| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.CONSOLE: 'console'>, <TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel, sqlite, console) |
|
||||||
|
| `sqlite_db_path` | `<class 'str'>` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||||
|
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||||
|
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/trace_store.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
10
docs/source/providers/tool_runtime/index.md
Normal file
10
docs/source/providers/tool_runtime/index.md
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# Tool_Runtime Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **tool_runtime** API.
|
||||||
|
|
||||||
|
- [inline::rag-runtime](inline_rag-runtime.md)
|
||||||
|
- [remote::bing-search](remote_bing-search.md)
|
||||||
|
- [remote::brave-search](remote_brave-search.md)
|
||||||
|
- [remote::model-context-protocol](remote_model-context-protocol.md)
|
||||||
|
- [remote::tavily-search](remote_tavily-search.md)
|
||||||
|
- [remote::wolfram-alpha](remote_wolfram-alpha.md)
|
13
docs/source/providers/tool_runtime/inline_rag-runtime.md
Normal file
13
docs/source/providers/tool_runtime/inline_rag-runtime.md
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# inline::rag-runtime
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
20
docs/source/providers/tool_runtime/remote_bing-search.md
Normal file
20
docs/source/providers/tool_runtime/remote_bing-search.md
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# remote::bing-search
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Bing Search tool for web search capabilities using Microsoft's search engine.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | |
|
||||||
|
| `top_k` | `<class 'int'>` | No | 3 | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.BING_API_KEY:}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/tool_runtime/remote_brave-search.md
Normal file
21
docs/source/providers/tool_runtime/remote_brave-search.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::brave-search
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Brave Search tool for web search capabilities with privacy-focused results.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Brave Search API Key |
|
||||||
|
| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
||||||
|
max_results: 3
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
# remote::model-context-protocol
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Model Context Protocol (MCP) tool for standardized tool calling and context management.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/tool_runtime/remote_tavily-search.md
Normal file
21
docs/source/providers/tool_runtime/remote_tavily-search.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::tavily-search
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Tavily Search tool for AI-optimized web search with structured results.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Tavily Search API Key |
|
||||||
|
| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
|
||||||
|
max_results: 3
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/tool_runtime/remote_wolfram-alpha.md
Normal file
19
docs/source/providers/tool_runtime/remote_wolfram-alpha.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::wolfram-alpha
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Wolfram Alpha tool for computational knowledge and mathematical calculations.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
16
docs/source/providers/vector_io/index.md
Normal file
16
docs/source/providers/vector_io/index.md
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
# Vector_Io Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **vector_io** API.
|
||||||
|
|
||||||
|
- [inline::chromadb](inline_chromadb.md)
|
||||||
|
- [inline::faiss](inline_faiss.md)
|
||||||
|
- [inline::meta-reference](inline_meta-reference.md)
|
||||||
|
- [inline::milvus](inline_milvus.md)
|
||||||
|
- [inline::qdrant](inline_qdrant.md)
|
||||||
|
- [inline::sqlite-vec](inline_sqlite-vec.md)
|
||||||
|
- [inline::sqlite_vec](inline_sqlite_vec.md)
|
||||||
|
- [remote::chromadb](remote_chromadb.md)
|
||||||
|
- [remote::milvus](remote_milvus.md)
|
||||||
|
- [remote::pgvector](remote_pgvector.md)
|
||||||
|
- [remote::qdrant](remote_qdrant.md)
|
||||||
|
- [remote::weaviate](remote_weaviate.md)
|
52
docs/source/providers/vector_io/inline_chromadb.md
Normal file
52
docs/source/providers/vector_io/inline_chromadb.md
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
# inline::chromadb
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
|
||||||
|
[Chroma](https://www.trychroma.com/) is an inline and remote vector
|
||||||
|
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
|
||||||
|
That means you're not limited to storing vectors in memory or in a separate service.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
Chroma supports:
|
||||||
|
- Store embeddings and their metadata
|
||||||
|
- Vector search
|
||||||
|
- Full-text search
|
||||||
|
- Document storage
|
||||||
|
- Metadata filtering
|
||||||
|
- Multi-modal retrieval
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To use Chrome in your Llama Stack project, follow these steps:
|
||||||
|
|
||||||
|
1. Install the necessary dependencies.
|
||||||
|
2. Configure your Llama Stack project to use chroma.
|
||||||
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
You can install chroma using pip:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install chromadb
|
||||||
|
```
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
db_path: ${env.CHROMADB_PATH}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# inline::faiss
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Faiss
|
|
||||||
|
|
||||||
[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It
|
[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It
|
||||||
allows you to store and query vectors directly in memory.
|
allows you to store and query vectors directly in memory.
|
||||||
|
@ -31,3 +31,21 @@ pip install faiss-cpu
|
||||||
## Documentation
|
## Documentation
|
||||||
See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for
|
See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for
|
||||||
more details about Faiss in general.
|
more details about Faiss in general.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
26
docs/source/providers/vector_io/inline_meta-reference.md
Normal file
26
docs/source/providers/vector_io/inline_meta-reference.md
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
# inline::meta-reference
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Meta's reference implementation of a vector database.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deprecation Notice
|
||||||
|
|
||||||
|
⚠️ **Warning**: Please use the `inline::faiss` provider instead.
|
||||||
|
|
26
docs/source/providers/vector_io/inline_milvus.md
Normal file
26
docs/source/providers/vector_io/inline_milvus.md
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
# inline::milvus
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
|
||||||
|
Please refer to the remote provider documentation.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy/milvus.db}
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/${env.MILVUS_KVSTORE_DB_PATH:=~/.llama/dummy/milvus_registry.db}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# inline::qdrant
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Qdrant
|
|
||||||
|
|
||||||
[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
|
[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
|
||||||
allows you to store and query vectors directly in memory.
|
allows you to store and query vectors directly in memory.
|
||||||
|
@ -44,3 +44,18 @@ docker pull qdrant/qdrant
|
||||||
```
|
```
|
||||||
## Documentation
|
## Documentation
|
||||||
See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general.
|
See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `path` | `<class 'str'>` | No | PydanticUndefined | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# inline::sqlite-vec
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# SQLite-Vec
|
|
||||||
|
|
||||||
[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It
|
[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It
|
||||||
allows you to store and query vectors directly within an SQLite database.
|
allows you to store and query vectors directly within an SQLite database.
|
||||||
|
@ -199,3 +199,18 @@ pip install sqlite-vec
|
||||||
See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
|
See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
|
||||||
|
|
||||||
[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
|
[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
25
docs/source/providers/vector_io/inline_sqlite_vec.md
Normal file
25
docs/source/providers/vector_io/inline_sqlite_vec.md
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# inline::sqlite_vec
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
|
||||||
|
Please refer to the sqlite-vec provider documentation.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deprecation Notice
|
||||||
|
|
||||||
|
⚠️ **Warning**: Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# remote::chromadb
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Chroma
|
|
||||||
|
|
||||||
[Chroma](https://www.trychroma.com/) is an inline and remote vector
|
[Chroma](https://www.trychroma.com/) is an inline and remote vector
|
||||||
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
|
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
|
||||||
|
@ -34,3 +34,18 @@ pip install chromadb
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
|
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `str \| None` | No | PydanticUndefined | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.CHROMADB_URL}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# remote::milvus
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Milvus
|
|
||||||
|
|
||||||
[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
|
[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
|
||||||
allows you to store and query vectors directly within a Milvus database.
|
allows you to store and query vectors directly within a Milvus database.
|
||||||
|
@ -96,7 +96,7 @@ vector_io:
|
||||||
#### Key Parameters for TLS Configuration
|
#### Key Parameters for TLS Configuration
|
||||||
|
|
||||||
- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
|
- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
|
||||||
- **`server_pem_path`**: Path to the **server certificate** for verifying the server’s identity (used in one-way TLS).
|
- **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS).
|
||||||
- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
|
- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
|
||||||
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
||||||
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
||||||
|
@ -105,3 +105,24 @@ vector_io:
|
||||||
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
||||||
|
|
||||||
For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
|
For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `uri` | `<class 'str'>` | No | PydanticUndefined | The URI of the Milvus server |
|
||||||
|
| `token` | `str \| None` | No | PydanticUndefined | The token of the Milvus server |
|
||||||
|
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
|
||||||
|
| `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
|
||||||
|
|
||||||
|
> **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
uri: ${env.MILVUS_ENDPOINT}
|
||||||
|
token: ${env.MILVUS_TOKEN}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# remote::pgvector
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Postgres PGVector
|
|
||||||
|
|
||||||
[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It
|
[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It
|
||||||
allows you to store and query vectors directly in memory.
|
allows you to store and query vectors directly in memory.
|
||||||
|
@ -29,3 +29,26 @@ docker pull pgvector/pgvector:pg17
|
||||||
```
|
```
|
||||||
## Documentation
|
## Documentation
|
||||||
See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
|
See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `host` | `str \| None` | No | localhost | |
|
||||||
|
| `port` | `int \| None` | No | 5432 | |
|
||||||
|
| `db` | `str \| None` | No | postgres | |
|
||||||
|
| `user` | `str \| None` | No | postgres | |
|
||||||
|
| `password` | `str \| None` | No | mysecretpassword | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
host: ${env.PGVECTOR_HOST:=localhost}
|
||||||
|
port: ${env.PGVECTOR_PORT:=5432}
|
||||||
|
db: ${env.PGVECTOR_DB}
|
||||||
|
user: ${env.PGVECTOR_USER}
|
||||||
|
password: ${env.PGVECTOR_PASSWORD}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
30
docs/source/providers/vector_io/remote_qdrant.md
Normal file
30
docs/source/providers/vector_io/remote_qdrant.md
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
# remote::qdrant
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
|
||||||
|
Please refer to the inline provider documentation.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `location` | `str \| None` | No | | |
|
||||||
|
| `url` | `str \| None` | No | | |
|
||||||
|
| `port` | `int \| None` | No | 6333 | |
|
||||||
|
| `grpc_port` | `<class 'int'>` | No | 6334 | |
|
||||||
|
| `prefer_grpc` | `<class 'bool'>` | No | False | |
|
||||||
|
| `https` | `bool \| None` | No | | |
|
||||||
|
| `api_key` | `str \| None` | No | | |
|
||||||
|
| `prefix` | `str \| None` | No | | |
|
||||||
|
| `timeout` | `int \| None` | No | | |
|
||||||
|
| `host` | `str \| None` | No | | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.QDRANT_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# remote::weaviate
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Weaviate
|
|
||||||
|
|
||||||
[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack.
|
[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack.
|
||||||
It allows you to store and query vectors directly within a Weaviate database.
|
It allows you to store and query vectors directly within a Weaviate database.
|
||||||
|
@ -31,3 +31,12 @@ To install Weaviate see the [Weaviate quickstart documentation](https://weaviate
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
|
See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
|
||||||
|
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -141,6 +141,12 @@ Fully-qualified name of the module to import. The module is expected to have:
|
||||||
provider_data_validator: str | None = Field(
|
provider_data_validator: str | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
|
description: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="""
|
||||||
|
A description of the provider. This is used to display in the documentation.
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
@ -167,6 +173,12 @@ Fully-qualified name of the module to import. The module is expected to have:
|
||||||
provider_data_validator: str | None = Field(
|
provider_data_validator: str | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
|
description: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="""
|
||||||
|
A description of the provider. This is used to display in the documentation.
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RemoteProviderConfig(BaseModel):
|
class RemoteProviderConfig(BaseModel):
|
||||||
|
|
|
@ -38,7 +38,7 @@ class TelemetryConfig(BaseModel):
|
||||||
description="List of telemetry sinks to enable (possible values: otel, sqlite, console)",
|
description="List of telemetry sinks to enable (possible values: otel, sqlite, console)",
|
||||||
)
|
)
|
||||||
sqlite_db_path: str = Field(
|
sqlite_db_path: str = Field(
|
||||||
default=(RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
|
default_factory=lambda: (RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
|
||||||
description="The path to the SQLite database to use for storing traces",
|
description="The path to the SQLite database to use for storing traces",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -35,5 +35,6 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
Api.tool_runtime,
|
Api.tool_runtime,
|
||||||
Api.tool_groups,
|
Api.tool_groups,
|
||||||
],
|
],
|
||||||
|
description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.inline.datasetio.localfs",
|
module="llama_stack.providers.inline.datasetio.localfs",
|
||||||
config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig",
|
config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig",
|
||||||
api_dependencies=[],
|
api_dependencies=[],
|
||||||
|
description="Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.datasetio,
|
api=Api.datasetio,
|
||||||
|
@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.datasetio.huggingface",
|
module="llama_stack.providers.remote.datasetio.huggingface",
|
||||||
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
|
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
|
||||||
|
description="HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.datasetio.nvidia",
|
module="llama_stack.providers.remote.datasetio.nvidia",
|
||||||
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
|
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
|
||||||
|
description="NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
Api.inference,
|
Api.inference,
|
||||||
Api.agents,
|
Api.agents,
|
||||||
],
|
],
|
||||||
|
description="Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.eval,
|
api=Api.eval,
|
||||||
|
@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.eval.nvidia",
|
module="llama_stack.providers.remote.eval.nvidia",
|
||||||
config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig",
|
config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig",
|
||||||
|
description="NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.",
|
||||||
),
|
),
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
Api.datasetio,
|
Api.datasetio,
|
||||||
|
|
|
@ -21,5 +21,6 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=sql_store_pip_packages,
|
pip_packages=sql_store_pip_packages,
|
||||||
module="llama_stack.providers.inline.files.localfs",
|
module="llama_stack.providers.inline.files.localfs",
|
||||||
config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
|
config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
|
||||||
|
description="Local filesystem-based file storage provider for managing files and documents locally.",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -35,6 +35,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=META_REFERENCE_DEPS,
|
pip_packages=META_REFERENCE_DEPS,
|
||||||
module="llama_stack.providers.inline.inference.meta_reference",
|
module="llama_stack.providers.inline.inference.meta_reference",
|
||||||
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
|
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
|
||||||
|
description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
|
@ -44,6 +45,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.inline.inference.vllm",
|
module="llama_stack.providers.inline.inference.vllm",
|
||||||
config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
|
config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
|
||||||
|
description="vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
|
@ -54,6 +56,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.inline.inference.sentence_transformers",
|
module="llama_stack.providers.inline.inference.sentence_transformers",
|
||||||
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
|
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
|
||||||
|
description="Sentence Transformers inference provider for text embeddings and similarity search.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
|
@ -64,6 +67,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.inference.cerebras",
|
module="llama_stack.providers.remote.inference.cerebras",
|
||||||
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
||||||
|
description="Cerebras inference provider for running models on Cerebras Cloud platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -73,6 +77,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
|
pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
|
||||||
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
||||||
module="llama_stack.providers.remote.inference.ollama",
|
module="llama_stack.providers.remote.inference.ollama",
|
||||||
|
description="Ollama inference provider for running local models through the Ollama runtime.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["openai"],
|
pip_packages=["openai"],
|
||||||
module="llama_stack.providers.remote.inference.vllm",
|
module="llama_stack.providers.remote.inference.vllm",
|
||||||
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
||||||
|
description="Remote vLLM inference provider for connecting to vLLM servers.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -91,6 +97,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["huggingface_hub", "aiohttp"],
|
pip_packages=["huggingface_hub", "aiohttp"],
|
||||||
module="llama_stack.providers.remote.inference.tgi",
|
module="llama_stack.providers.remote.inference.tgi",
|
||||||
config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
|
config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
|
||||||
|
description="Text Generation Inference (TGI) provider for HuggingFace model serving.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -100,6 +107,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["huggingface_hub", "aiohttp"],
|
pip_packages=["huggingface_hub", "aiohttp"],
|
||||||
module="llama_stack.providers.remote.inference.tgi",
|
module="llama_stack.providers.remote.inference.tgi",
|
||||||
config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
|
config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
|
||||||
|
description="HuggingFace Inference API serverless provider for on-demand model inference.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -109,6 +117,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["huggingface_hub", "aiohttp"],
|
pip_packages=["huggingface_hub", "aiohttp"],
|
||||||
module="llama_stack.providers.remote.inference.tgi",
|
module="llama_stack.providers.remote.inference.tgi",
|
||||||
config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
|
config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
|
||||||
|
description="HuggingFace Inference Endpoints provider for dedicated model serving.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -121,6 +130,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.fireworks",
|
module="llama_stack.providers.remote.inference.fireworks",
|
||||||
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
|
||||||
|
description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -133,6 +143,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.together",
|
module="llama_stack.providers.remote.inference.together",
|
||||||
config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
|
config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
|
||||||
|
description="Together AI inference provider for open-source models and collaborative AI development.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -142,6 +153,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["boto3"],
|
pip_packages=["boto3"],
|
||||||
module="llama_stack.providers.remote.inference.bedrock",
|
module="llama_stack.providers.remote.inference.bedrock",
|
||||||
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
|
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
|
||||||
|
description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -153,6 +165,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.inference.databricks",
|
module="llama_stack.providers.remote.inference.databricks",
|
||||||
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
||||||
|
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -164,6 +177,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.inference.nvidia",
|
module="llama_stack.providers.remote.inference.nvidia",
|
||||||
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
||||||
|
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -173,6 +187,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["openai"],
|
pip_packages=["openai"],
|
||||||
module="llama_stack.providers.remote.inference.runpod",
|
module="llama_stack.providers.remote.inference.runpod",
|
||||||
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
||||||
|
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -183,6 +198,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.openai",
|
module="llama_stack.providers.remote.inference.openai",
|
||||||
config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
|
config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
|
||||||
|
description="OpenAI inference provider for accessing GPT models and other OpenAI services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -193,6 +209,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.anthropic",
|
module="llama_stack.providers.remote.inference.anthropic",
|
||||||
config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
|
config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
|
||||||
|
description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -203,6 +220,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.gemini",
|
module="llama_stack.providers.remote.inference.gemini",
|
||||||
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
||||||
|
description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -213,6 +231,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.groq",
|
module="llama_stack.providers.remote.inference.groq",
|
||||||
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
||||||
|
description="Groq inference provider for ultra-fast inference using Groq's LPU technology.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -223,6 +242,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.fireworks_openai_compat",
|
module="llama_stack.providers.remote.inference.fireworks_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig",
|
config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator",
|
||||||
|
description="Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -233,6 +253,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.llama_openai_compat",
|
module="llama_stack.providers.remote.inference.llama_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
|
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
|
||||||
|
description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -243,6 +264,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.together_openai_compat",
|
module="llama_stack.providers.remote.inference.together_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig",
|
config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator",
|
||||||
|
description="Together AI OpenAI-compatible provider for using Together models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -253,6 +275,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.groq_openai_compat",
|
module="llama_stack.providers.remote.inference.groq_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig",
|
config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator",
|
||||||
|
description="Groq OpenAI-compatible provider for using Groq models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -263,6 +286,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.sambanova_openai_compat",
|
module="llama_stack.providers.remote.inference.sambanova_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig",
|
config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator",
|
||||||
|
description="SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -273,6 +297,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.cerebras_openai_compat",
|
module="llama_stack.providers.remote.inference.cerebras_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig",
|
config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator",
|
||||||
|
description="Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -283,6 +308,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.sambanova",
|
module="llama_stack.providers.remote.inference.sambanova",
|
||||||
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
||||||
|
description="SambaNova inference provider for running models on SambaNova's dataflow architecture.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -293,6 +319,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.passthrough",
|
module="llama_stack.providers.remote.inference.passthrough",
|
||||||
config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig",
|
config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
|
||||||
|
description="Passthrough inference provider for connecting to any external inference service not directly supported.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -303,6 +330,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.watsonx",
|
module="llama_stack.providers.remote.inference.watsonx",
|
||||||
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
||||||
|
description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
Api.datasetio,
|
Api.datasetio,
|
||||||
Api.datasets,
|
Api.datasets,
|
||||||
],
|
],
|
||||||
|
description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.post_training,
|
api=Api.post_training,
|
||||||
|
@ -31,6 +32,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
Api.datasetio,
|
Api.datasetio,
|
||||||
Api.datasets,
|
Api.datasets,
|
||||||
],
|
],
|
||||||
|
description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.post_training,
|
api=Api.post_training,
|
||||||
|
@ -39,6 +41,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["requests", "aiohttp"],
|
pip_packages=["requests", "aiohttp"],
|
||||||
module="llama_stack.providers.remote.post_training.nvidia",
|
module="llama_stack.providers.remote.post_training.nvidia",
|
||||||
config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig",
|
config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig",
|
||||||
|
description="NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.inline.safety.prompt_guard",
|
module="llama_stack.providers.inline.safety.prompt_guard",
|
||||||
config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig",
|
config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig",
|
||||||
|
description="Prompt Guard safety provider for detecting and filtering unsafe prompts and content.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.safety,
|
api=Api.safety,
|
||||||
|
@ -35,6 +36,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
Api.inference,
|
Api.inference,
|
||||||
],
|
],
|
||||||
|
description="Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.safety,
|
api=Api.safety,
|
||||||
|
@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.inline.safety.code_scanner",
|
module="llama_stack.providers.inline.safety.code_scanner",
|
||||||
config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig",
|
config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig",
|
||||||
|
description="Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.safety,
|
api=Api.safety,
|
||||||
|
@ -52,6 +55,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["boto3"],
|
pip_packages=["boto3"],
|
||||||
module="llama_stack.providers.remote.safety.bedrock",
|
module="llama_stack.providers.remote.safety.bedrock",
|
||||||
config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig",
|
config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig",
|
||||||
|
description="AWS Bedrock safety provider for content moderation using AWS's safety services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -61,6 +65,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["requests"],
|
pip_packages=["requests"],
|
||||||
module="llama_stack.providers.remote.safety.nvidia",
|
module="llama_stack.providers.remote.safety.nvidia",
|
||||||
config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig",
|
config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig",
|
||||||
|
description="NVIDIA's safety provider for content moderation and safety filtering.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -71,6 +76,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.safety.sambanova",
|
module="llama_stack.providers.remote.safety.sambanova",
|
||||||
config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
|
config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
|
||||||
|
description="SambaNova's safety provider for content moderation and safety filtering.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
Api.datasetio,
|
Api.datasetio,
|
||||||
Api.datasets,
|
Api.datasets,
|
||||||
],
|
],
|
||||||
|
description="Basic scoring provider for simple evaluation metrics and scoring functions.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.scoring,
|
api=Api.scoring,
|
||||||
|
@ -32,6 +33,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
Api.datasets,
|
Api.datasets,
|
||||||
Api.inference,
|
Api.inference,
|
||||||
],
|
],
|
||||||
|
description="LLM-as-judge scoring provider that uses language models to evaluate and score responses.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.scoring,
|
api=Api.scoring,
|
||||||
|
@ -44,5 +46,6 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
Api.datasets,
|
Api.datasets,
|
||||||
],
|
],
|
||||||
provider_data_validator="llama_stack.providers.inline.scoring.braintrust.BraintrustProviderDataValidator",
|
provider_data_validator="llama_stack.providers.inline.scoring.braintrust.BraintrustProviderDataValidator",
|
||||||
|
description="Braintrust scoring provider for evaluation and scoring using the Braintrust platform.",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -24,5 +24,6 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
optional_api_dependencies=[Api.datasetio],
|
optional_api_dependencies=[Api.datasetio],
|
||||||
module="llama_stack.providers.inline.telemetry.meta_reference",
|
module="llama_stack.providers.inline.telemetry.meta_reference",
|
||||||
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
|
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
|
||||||
|
description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -33,6 +33,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.inline.tool_runtime.rag",
|
module="llama_stack.providers.inline.tool_runtime.rag",
|
||||||
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
|
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
|
||||||
api_dependencies=[Api.vector_io, Api.inference],
|
api_dependencies=[Api.vector_io, Api.inference],
|
||||||
|
description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.tool_runtime,
|
api=Api.tool_runtime,
|
||||||
|
@ -42,6 +43,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
config_class="llama_stack.providers.remote.tool_runtime.brave_search.config.BraveSearchToolConfig",
|
config_class="llama_stack.providers.remote.tool_runtime.brave_search.config.BraveSearchToolConfig",
|
||||||
pip_packages=["requests"],
|
pip_packages=["requests"],
|
||||||
provider_data_validator="llama_stack.providers.remote.tool_runtime.brave_search.BraveSearchToolProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.tool_runtime.brave_search.BraveSearchToolProviderDataValidator",
|
||||||
|
description="Brave Search tool for web search capabilities with privacy-focused results.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -52,6 +54,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
config_class="llama_stack.providers.remote.tool_runtime.bing_search.config.BingSearchToolConfig",
|
config_class="llama_stack.providers.remote.tool_runtime.bing_search.config.BingSearchToolConfig",
|
||||||
pip_packages=["requests"],
|
pip_packages=["requests"],
|
||||||
provider_data_validator="llama_stack.providers.remote.tool_runtime.bing_search.BingSearchToolProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.tool_runtime.bing_search.BingSearchToolProviderDataValidator",
|
||||||
|
description="Bing Search tool for web search capabilities using Microsoft's search engine.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -62,6 +65,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
config_class="llama_stack.providers.remote.tool_runtime.tavily_search.config.TavilySearchToolConfig",
|
config_class="llama_stack.providers.remote.tool_runtime.tavily_search.config.TavilySearchToolConfig",
|
||||||
pip_packages=["requests"],
|
pip_packages=["requests"],
|
||||||
provider_data_validator="llama_stack.providers.remote.tool_runtime.tavily_search.TavilySearchToolProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.tool_runtime.tavily_search.TavilySearchToolProviderDataValidator",
|
||||||
|
description="Tavily Search tool for AI-optimized web search with structured results.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -72,6 +76,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
config_class="llama_stack.providers.remote.tool_runtime.wolfram_alpha.config.WolframAlphaToolConfig",
|
config_class="llama_stack.providers.remote.tool_runtime.wolfram_alpha.config.WolframAlphaToolConfig",
|
||||||
pip_packages=["requests"],
|
pip_packages=["requests"],
|
||||||
provider_data_validator="llama_stack.providers.remote.tool_runtime.wolfram_alpha.WolframAlphaToolProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.tool_runtime.wolfram_alpha.WolframAlphaToolProviderDataValidator",
|
||||||
|
description="Wolfram Alpha tool for computational knowledge and mathematical calculations.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
config_class="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderConfig",
|
config_class="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderConfig",
|
||||||
pip_packages=["mcp"],
|
pip_packages=["mcp"],
|
||||||
provider_data_validator="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderDataValidator",
|
||||||
|
description="Model Context Protocol (MCP) tool for standardized tool calling and context management.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
deprecation_warning="Please use the `inline::faiss` provider instead.",
|
deprecation_warning="Please use the `inline::faiss` provider instead.",
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
optional_api_dependencies=[Api.files],
|
optional_api_dependencies=[Api.files],
|
||||||
|
description="Meta's reference implementation of a vector database.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.vector_io,
|
api=Api.vector_io,
|
||||||
|
@ -34,6 +35,36 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
|
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
optional_api_dependencies=[Api.files],
|
optional_api_dependencies=[Api.files],
|
||||||
|
description="""
|
||||||
|
[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It
|
||||||
|
allows you to store and query vectors directly in memory.
|
||||||
|
That means you'll get fast and efficient vector retrieval.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Lightweight and easy to use
|
||||||
|
- Fully integrated with Llama Stack
|
||||||
|
- GPU support
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To use Faiss in your Llama Stack project, follow these steps:
|
||||||
|
|
||||||
|
1. Install the necessary dependencies.
|
||||||
|
2. Configure your Llama Stack project to use Faiss.
|
||||||
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
You can install Faiss using pip:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install faiss-cpu
|
||||||
|
```
|
||||||
|
## Documentation
|
||||||
|
See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for
|
||||||
|
more details about Faiss in general.
|
||||||
|
""",
|
||||||
),
|
),
|
||||||
# NOTE: sqlite-vec cannot be bundled into the container image because it does not have a
|
# NOTE: sqlite-vec cannot be bundled into the container image because it does not have a
|
||||||
# source distribution and the wheels are not available for all platforms.
|
# source distribution and the wheels are not available for all platforms.
|
||||||
|
@ -45,6 +76,204 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
|
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
optional_api_dependencies=[Api.files],
|
optional_api_dependencies=[Api.files],
|
||||||
|
description="""
|
||||||
|
[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It
|
||||||
|
allows you to store and query vectors directly within an SQLite database.
|
||||||
|
That means you're not limited to storing vectors in memory or in a separate service.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Lightweight and easy to use
|
||||||
|
- Fully integrated with Llama Stacks
|
||||||
|
- Uses disk-based storage for persistence, allowing for larger vector storage
|
||||||
|
|
||||||
|
### Comparison to Faiss
|
||||||
|
|
||||||
|
The choice between Faiss and sqlite-vec should be made based on the needs of your application,
|
||||||
|
as they have different strengths.
|
||||||
|
|
||||||
|
#### Choosing the Right Provider
|
||||||
|
|
||||||
|
Scenario | Recommended Tool | Reason
|
||||||
|
-- |-----------------| --
|
||||||
|
Online Analytical Processing (OLAP) | Faiss | Fast, in-memory searches
|
||||||
|
Online Transaction Processing (OLTP) | sqlite-vec | Frequent writes and reads
|
||||||
|
Frequent writes | sqlite-vec | Efficient disk-based storage and incremental indexing
|
||||||
|
Large datasets | sqlite-vec | Disk-based storage for larger vector storage
|
||||||
|
Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, indexing, and GPU acceleration
|
||||||
|
|
||||||
|
#### Empirical Example
|
||||||
|
|
||||||
|
Consider the histogram below in which 10,000 randomly generated strings were inserted
|
||||||
|
in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
|
||||||
|
|
||||||
|
```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
|
||||||
|
:alt: Comparison of SQLite-Vec and Faiss write times
|
||||||
|
:width: 400px
|
||||||
|
```
|
||||||
|
|
||||||
|
You will notice that the average write time for `sqlite-vec` was 788ms, compared to
|
||||||
|
47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather
|
||||||
|
uniformly spread across the [1500, 100000] interval.
|
||||||
|
|
||||||
|
Looking at each individual write in the order that the documents are inserted you'll see the increase in
|
||||||
|
write speed as Faiss reindexes the vectors after each write.
|
||||||
|
```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png
|
||||||
|
:alt: Comparison of SQLite-Vec and Faiss write times
|
||||||
|
:width: 400px
|
||||||
|
```
|
||||||
|
|
||||||
|
In comparison, the read times for Faiss was on average 10% faster than sqlite-vec.
|
||||||
|
The modes of the two distributions highlight the differences much further where Faiss
|
||||||
|
will likely yield faster read performance.
|
||||||
|
|
||||||
|
```{image} ../../../../_static/providers/vector_io/read_time_comparison_sqlite-vec-faiss.png
|
||||||
|
:alt: Comparison of SQLite-Vec and Faiss read times
|
||||||
|
:width: 400px
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To use sqlite-vec in your Llama Stack project, follow these steps:
|
||||||
|
|
||||||
|
1. Install the necessary dependencies.
|
||||||
|
2. Configure your Llama Stack project to use SQLite-Vec.
|
||||||
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
The SQLite-vec provider supports three search modes:
|
||||||
|
|
||||||
|
1. **Vector Search** (`mode="vector"`): Performs pure vector similarity search using the embeddings.
|
||||||
|
2. **Keyword Search** (`mode="keyword"`): Performs full-text search using SQLite's FTS5.
|
||||||
|
3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword search for better results. First performs keyword search to get candidate matches, then applies vector similarity search on those candidates.
|
||||||
|
|
||||||
|
Example with hybrid search:
|
||||||
|
```python
|
||||||
|
response = await vector_io.query_chunks(
|
||||||
|
vector_db_id="my_db",
|
||||||
|
query="your query here",
|
||||||
|
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Using RRF ranker
|
||||||
|
response = await vector_io.query_chunks(
|
||||||
|
vector_db_id="my_db",
|
||||||
|
query="your query here",
|
||||||
|
params={
|
||||||
|
"mode": "hybrid",
|
||||||
|
"max_chunks": 3,
|
||||||
|
"score_threshold": 0.7,
|
||||||
|
"ranker": {"type": "rrf", "impact_factor": 60.0},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Using weighted ranker
|
||||||
|
response = await vector_io.query_chunks(
|
||||||
|
vector_db_id="my_db",
|
||||||
|
query="your query here",
|
||||||
|
params={
|
||||||
|
"mode": "hybrid",
|
||||||
|
"max_chunks": 3,
|
||||||
|
"score_threshold": 0.7,
|
||||||
|
"ranker": {"type": "weighted", "alpha": 0.7}, # 70% vector, 30% keyword
|
||||||
|
},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Example with explicit vector search:
|
||||||
|
```python
|
||||||
|
response = await vector_io.query_chunks(
|
||||||
|
vector_db_id="my_db",
|
||||||
|
query="your query here",
|
||||||
|
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Example with keyword search:
|
||||||
|
```python
|
||||||
|
response = await vector_io.query_chunks(
|
||||||
|
vector_db_id="my_db",
|
||||||
|
query="your query here",
|
||||||
|
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Supported Search Modes
|
||||||
|
|
||||||
|
The SQLite vector store supports three search modes:
|
||||||
|
|
||||||
|
1. **Vector Search** (`mode="vector"`): Uses vector similarity to find relevant chunks
|
||||||
|
2. **Keyword Search** (`mode="keyword"`): Uses keyword matching to find relevant chunks
|
||||||
|
3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword scores using a ranker
|
||||||
|
|
||||||
|
### Hybrid Search
|
||||||
|
|
||||||
|
Hybrid search combines the strengths of both vector and keyword search by:
|
||||||
|
- Computing vector similarity scores
|
||||||
|
- Computing keyword match scores
|
||||||
|
- Using a ranker to combine these scores
|
||||||
|
|
||||||
|
Two ranker types are supported:
|
||||||
|
|
||||||
|
1. **RRF (Reciprocal Rank Fusion)**:
|
||||||
|
- Combines ranks from both vector and keyword results
|
||||||
|
- Uses an impact factor (default: 60.0) to control the weight of higher-ranked results
|
||||||
|
- Good for balancing between vector and keyword results
|
||||||
|
- The default impact factor of 60.0 comes from the original RRF paper by Cormack et al. (2009) [^1], which found this value to provide optimal performance across various retrieval tasks
|
||||||
|
|
||||||
|
2. **Weighted**:
|
||||||
|
- Linearly combines normalized vector and keyword scores
|
||||||
|
- Uses an alpha parameter (0-1) to control the blend:
|
||||||
|
- alpha=0: Only use keyword scores
|
||||||
|
- alpha=1: Only use vector scores
|
||||||
|
- alpha=0.5: Equal weight to both (default)
|
||||||
|
|
||||||
|
Example using RAGQueryConfig with different search modes:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
|
||||||
|
|
||||||
|
# Vector search
|
||||||
|
config = RAGQueryConfig(mode="vector", max_chunks=5)
|
||||||
|
|
||||||
|
# Keyword search
|
||||||
|
config = RAGQueryConfig(mode="keyword", max_chunks=5)
|
||||||
|
|
||||||
|
# Hybrid search with custom RRF ranker
|
||||||
|
config = RAGQueryConfig(
|
||||||
|
mode="hybrid",
|
||||||
|
max_chunks=5,
|
||||||
|
ranker=RRFRanker(impact_factor=50.0), # Custom impact factor
|
||||||
|
)
|
||||||
|
|
||||||
|
# Hybrid search with weighted ranker
|
||||||
|
config = RAGQueryConfig(
|
||||||
|
mode="hybrid",
|
||||||
|
max_chunks=5,
|
||||||
|
ranker=WeightedRanker(alpha=0.7), # 70% vector, 30% keyword
|
||||||
|
)
|
||||||
|
|
||||||
|
# Hybrid search with default RRF ranker
|
||||||
|
config = RAGQueryConfig(
|
||||||
|
mode="hybrid", max_chunks=5
|
||||||
|
) # Will use RRF with impact_factor=60.0
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: The ranker configuration is only used in hybrid mode. For vector or keyword modes, the ranker parameter is ignored.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
You can install SQLite-Vec using pip:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install sqlite-vec
|
||||||
|
```
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
|
||||||
|
|
||||||
|
[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
|
||||||
|
""",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.vector_io,
|
api=Api.vector_io,
|
||||||
|
@ -55,6 +284,9 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
|
deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
optional_api_dependencies=[Api.files],
|
optional_api_dependencies=[Api.files],
|
||||||
|
description="""
|
||||||
|
Please refer to the sqlite-vec provider documentation.
|
||||||
|
""",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
Api.vector_io,
|
Api.vector_io,
|
||||||
|
@ -63,6 +295,39 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["chromadb-client"],
|
pip_packages=["chromadb-client"],
|
||||||
module="llama_stack.providers.remote.vector_io.chroma",
|
module="llama_stack.providers.remote.vector_io.chroma",
|
||||||
config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig",
|
config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig",
|
||||||
|
description="""
|
||||||
|
[Chroma](https://www.trychroma.com/) is an inline and remote vector
|
||||||
|
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
|
||||||
|
That means you're not limited to storing vectors in memory or in a separate service.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
Chroma supports:
|
||||||
|
- Store embeddings and their metadata
|
||||||
|
- Vector search
|
||||||
|
- Full-text search
|
||||||
|
- Document storage
|
||||||
|
- Metadata filtering
|
||||||
|
- Multi-modal retrieval
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To use Chrome in your Llama Stack project, follow these steps:
|
||||||
|
|
||||||
|
1. Install the necessary dependencies.
|
||||||
|
2. Configure your Llama Stack project to use chroma.
|
||||||
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
You can install chroma using pip:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install chromadb
|
||||||
|
```
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
|
||||||
|
""",
|
||||||
),
|
),
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
),
|
),
|
||||||
|
@ -73,6 +338,40 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.inline.vector_io.chroma",
|
module="llama_stack.providers.inline.vector_io.chroma",
|
||||||
config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
|
config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
|
description="""
|
||||||
|
[Chroma](https://www.trychroma.com/) is an inline and remote vector
|
||||||
|
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
|
||||||
|
That means you're not limited to storing vectors in memory or in a separate service.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
Chroma supports:
|
||||||
|
- Store embeddings and their metadata
|
||||||
|
- Vector search
|
||||||
|
- Full-text search
|
||||||
|
- Document storage
|
||||||
|
- Metadata filtering
|
||||||
|
- Multi-modal retrieval
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To use Chrome in your Llama Stack project, follow these steps:
|
||||||
|
|
||||||
|
1. Install the necessary dependencies.
|
||||||
|
2. Configure your Llama Stack project to use chroma.
|
||||||
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
You can install chroma using pip:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install chromadb
|
||||||
|
```
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
|
||||||
|
|
||||||
|
""",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
Api.vector_io,
|
Api.vector_io,
|
||||||
|
@ -81,6 +380,34 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["psycopg2-binary"],
|
pip_packages=["psycopg2-binary"],
|
||||||
module="llama_stack.providers.remote.vector_io.pgvector",
|
module="llama_stack.providers.remote.vector_io.pgvector",
|
||||||
config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig",
|
config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig",
|
||||||
|
description="""
|
||||||
|
[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It
|
||||||
|
allows you to store and query vectors directly in memory.
|
||||||
|
That means you'll get fast and efficient vector retrieval.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Easy to use
|
||||||
|
- Fully integrated with Llama Stack
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To use PGVector in your Llama Stack project, follow these steps:
|
||||||
|
|
||||||
|
1. Install the necessary dependencies.
|
||||||
|
2. Configure your Llama Stack project to use Faiss.
|
||||||
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
You can install PGVector using docker:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker pull pgvector/pgvector:pg17
|
||||||
|
```
|
||||||
|
## Documentation
|
||||||
|
See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
|
||||||
|
""",
|
||||||
),
|
),
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
),
|
),
|
||||||
|
@ -92,6 +419,36 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.vector_io.weaviate",
|
module="llama_stack.providers.remote.vector_io.weaviate",
|
||||||
config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig",
|
config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
|
provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
|
||||||
|
description="""
|
||||||
|
[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack.
|
||||||
|
It allows you to store and query vectors directly within a Weaviate database.
|
||||||
|
That means you're not limited to storing vectors in memory or in a separate service.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
Weaviate supports:
|
||||||
|
- Store embeddings and their metadata
|
||||||
|
- Vector search
|
||||||
|
- Full-text search
|
||||||
|
- Hybrid search
|
||||||
|
- Document storage
|
||||||
|
- Metadata filtering
|
||||||
|
- Multi-modal retrieval
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To use Weaviate in your Llama Stack project, follow these steps:
|
||||||
|
|
||||||
|
1. Install the necessary dependencies.
|
||||||
|
2. Configure your Llama Stack project to use chroma.
|
||||||
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
To install Weaviate see the [Weaviate quickstart documentation](https://weaviate.io/developers/weaviate/quickstart).
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
|
||||||
|
""",
|
||||||
),
|
),
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
),
|
),
|
||||||
|
@ -102,6 +459,49 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.inline.vector_io.qdrant",
|
module="llama_stack.providers.inline.vector_io.qdrant",
|
||||||
config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
|
config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
|
description=r"""
|
||||||
|
[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
|
||||||
|
allows you to store and query vectors directly in memory.
|
||||||
|
That means you'll get fast and efficient vector retrieval.
|
||||||
|
|
||||||
|
> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in
|
||||||
|
> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative.
|
||||||
|
>
|
||||||
|
> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Lightweight and easy to use
|
||||||
|
- Fully integrated with Llama Stack
|
||||||
|
- Apache 2.0 license terms
|
||||||
|
- Store embeddings and their metadata
|
||||||
|
- Supports search by
|
||||||
|
[Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/)
|
||||||
|
and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search
|
||||||
|
- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/)
|
||||||
|
- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/)
|
||||||
|
- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/)
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To use Qdrant in your Llama Stack project, follow these steps:
|
||||||
|
|
||||||
|
1. Install the necessary dependencies.
|
||||||
|
2. Configure your Llama Stack project to use Qdrant.
|
||||||
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
You can install Qdrant using docker:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker pull qdrant/qdrant
|
||||||
|
```
|
||||||
|
## Documentation
|
||||||
|
See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general.
|
||||||
|
""",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
Api.vector_io,
|
Api.vector_io,
|
||||||
|
@ -110,6 +510,9 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["qdrant-client"],
|
pip_packages=["qdrant-client"],
|
||||||
module="llama_stack.providers.remote.vector_io.qdrant",
|
module="llama_stack.providers.remote.vector_io.qdrant",
|
||||||
config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig",
|
config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig",
|
||||||
|
description="""
|
||||||
|
Please refer to the inline provider documentation.
|
||||||
|
""",
|
||||||
),
|
),
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
),
|
),
|
||||||
|
@ -120,6 +523,110 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["pymilvus"],
|
pip_packages=["pymilvus"],
|
||||||
module="llama_stack.providers.remote.vector_io.milvus",
|
module="llama_stack.providers.remote.vector_io.milvus",
|
||||||
config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig",
|
config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig",
|
||||||
|
description="""
|
||||||
|
[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
|
||||||
|
allows you to store and query vectors directly within a Milvus database.
|
||||||
|
That means you're not limited to storing vectors in memory or in a separate service.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Easy to use
|
||||||
|
- Fully integrated with Llama Stack
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To use Milvus in your Llama Stack project, follow these steps:
|
||||||
|
|
||||||
|
1. Install the necessary dependencies.
|
||||||
|
2. Configure your Llama Stack project to use Milvus.
|
||||||
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
You can install Milvus using pymilvus:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install pymilvus
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
In Llama Stack, Milvus can be configured in two ways:
|
||||||
|
- **Inline (Local) Configuration** - Uses Milvus-Lite for local storage
|
||||||
|
- **Remote Configuration** - Connects to a remote Milvus server
|
||||||
|
|
||||||
|
### Inline (Local) Configuration
|
||||||
|
|
||||||
|
The simplest method is local configuration, which requires setting `db_path`, a path for locally storing Milvus-Lite files:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
vector_io:
|
||||||
|
- provider_id: milvus
|
||||||
|
provider_type: inline::milvus
|
||||||
|
config:
|
||||||
|
db_path: ~/.llama/distributions/together/milvus_store.db
|
||||||
|
```
|
||||||
|
|
||||||
|
### Remote Configuration
|
||||||
|
|
||||||
|
Remote configuration is suitable for larger data storage requirements:
|
||||||
|
|
||||||
|
#### Standard Remote Connection
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
vector_io:
|
||||||
|
- provider_id: milvus
|
||||||
|
provider_type: remote::milvus
|
||||||
|
config:
|
||||||
|
uri: "http://<host>:<port>"
|
||||||
|
token: "<user>:<password>"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### TLS-Enabled Remote Connection (One-way TLS)
|
||||||
|
|
||||||
|
For connections to Milvus instances with one-way TLS enabled:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
vector_io:
|
||||||
|
- provider_id: milvus
|
||||||
|
provider_type: remote::milvus
|
||||||
|
config:
|
||||||
|
uri: "https://<host>:<port>"
|
||||||
|
token: "<user>:<password>"
|
||||||
|
secure: True
|
||||||
|
server_pem_path: "/path/to/server.pem"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Mutual TLS (mTLS) Remote Connection
|
||||||
|
|
||||||
|
For connections to Milvus instances with mutual TLS (mTLS) enabled:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
vector_io:
|
||||||
|
- provider_id: milvus
|
||||||
|
provider_type: remote::milvus
|
||||||
|
config:
|
||||||
|
uri: "https://<host>:<port>"
|
||||||
|
token: "<user>:<password>"
|
||||||
|
secure: True
|
||||||
|
ca_pem_path: "/path/to/ca.pem"
|
||||||
|
client_pem_path: "/path/to/client.pem"
|
||||||
|
client_key_path: "/path/to/client.key"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Key Parameters for TLS Configuration
|
||||||
|
|
||||||
|
- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
|
||||||
|
- **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS).
|
||||||
|
- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
|
||||||
|
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
||||||
|
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
||||||
|
|
||||||
|
For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
|
||||||
|
""",
|
||||||
),
|
),
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
),
|
),
|
||||||
|
@ -131,5 +638,8 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
|
config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
|
||||||
api_dependencies=[Api.inference],
|
api_dependencies=[Api.inference],
|
||||||
optional_api_dependencies=[Api.files],
|
optional_api_dependencies=[Api.files],
|
||||||
|
description="""
|
||||||
|
Please refer to the remote provider documentation.
|
||||||
|
""",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -6,17 +6,19 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from pydantic import BaseModel, ConfigDict
|
from pydantic import BaseModel, ConfigDict, Field
|
||||||
|
|
||||||
from llama_stack.schema_utils import json_schema_type
|
from llama_stack.schema_utils import json_schema_type
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class MilvusVectorIOConfig(BaseModel):
|
class MilvusVectorIOConfig(BaseModel):
|
||||||
uri: str
|
uri: str = Field(description="The URI of the Milvus server")
|
||||||
token: str | None = None
|
token: str | None = Field(description="The token of the Milvus server")
|
||||||
consistency_level: str = "Strong"
|
consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
|
||||||
|
|
||||||
|
# This configuration allows additional fields to be passed through to the underlying Milvus client.
|
||||||
|
# See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general.
|
||||||
model_config = ConfigDict(extra="allow")
|
model_config = ConfigDict(extra="allow")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
332
scripts/provider_codegen.py
Executable file
332
scripts/provider_codegen.py
Executable file
|
@ -0,0 +1,332 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
||||||
|
|
||||||
|
from llama_stack.distribution.distribution import get_provider_registry
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).parent.parent
|
||||||
|
|
||||||
|
|
||||||
|
class ChangedPathTracker:
|
||||||
|
"""Track a list of paths we may have changed."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._changed_paths = []
|
||||||
|
|
||||||
|
def add_paths(self, *paths):
|
||||||
|
for path in paths:
|
||||||
|
path = str(path)
|
||||||
|
if path not in self._changed_paths:
|
||||||
|
self._changed_paths.append(path)
|
||||||
|
|
||||||
|
def changed_paths(self):
|
||||||
|
return self._changed_paths
|
||||||
|
|
||||||
|
|
||||||
|
def get_config_class_info(config_class_path: str) -> dict[str, Any]:
|
||||||
|
"""Extract configuration information from a config class."""
|
||||||
|
try:
|
||||||
|
module_path, class_name = config_class_path.rsplit(".", 1)
|
||||||
|
module = __import__(module_path, fromlist=[class_name])
|
||||||
|
config_class = getattr(module, class_name)
|
||||||
|
|
||||||
|
docstring = config_class.__doc__ or ""
|
||||||
|
|
||||||
|
accepts_extra_config = False
|
||||||
|
try:
|
||||||
|
schema = config_class.model_json_schema()
|
||||||
|
if schema.get("additionalProperties") is True:
|
||||||
|
accepts_extra_config = True
|
||||||
|
except Exception:
|
||||||
|
if hasattr(config_class, "model_config"):
|
||||||
|
model_config = config_class.model_config
|
||||||
|
if hasattr(model_config, "extra") and model_config.extra == "allow":
|
||||||
|
accepts_extra_config = True
|
||||||
|
elif isinstance(model_config, dict) and model_config.get("extra") == "allow":
|
||||||
|
accepts_extra_config = True
|
||||||
|
|
||||||
|
fields_info = {}
|
||||||
|
if hasattr(config_class, "model_fields"):
|
||||||
|
for field_name, field in config_class.model_fields.items():
|
||||||
|
field_type = str(field.annotation) if field.annotation else "Any"
|
||||||
|
field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
|
||||||
|
field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
|
||||||
|
field_type = field_type.replace("llama_stack.apis.inference.inference.", "")
|
||||||
|
field_type = field_type.replace("llama_stack.providers.", "")
|
||||||
|
|
||||||
|
default_value = field.default
|
||||||
|
if field.default_factory is not None:
|
||||||
|
try:
|
||||||
|
default_value = field.default_factory()
|
||||||
|
# HACK ALERT:
|
||||||
|
# If the default value contains a path that looks like it came from RUNTIME_BASE_DIR,
|
||||||
|
# replace it with a generic ~/.llama/ path for documentation
|
||||||
|
if isinstance(default_value, str) and "/.llama/" in default_value:
|
||||||
|
if ".llama/" in default_value:
|
||||||
|
path_part = default_value.split(".llama/")[-1]
|
||||||
|
default_value = f"~/.llama/{path_part}"
|
||||||
|
except Exception:
|
||||||
|
default_value = ""
|
||||||
|
elif field.default is None:
|
||||||
|
default_value = ""
|
||||||
|
|
||||||
|
field_info = {
|
||||||
|
"type": field_type,
|
||||||
|
"description": field.description or "",
|
||||||
|
"default": default_value,
|
||||||
|
"required": field.default is None and not field.is_required,
|
||||||
|
}
|
||||||
|
fields_info[field_name] = field_info
|
||||||
|
|
||||||
|
if accepts_extra_config:
|
||||||
|
config_description = "Additional configuration options that will be forwarded to the underlying provider"
|
||||||
|
try:
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
source = inspect.getsource(config_class)
|
||||||
|
lines = source.split("\n")
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if "model_config" in line and "ConfigDict" in line and 'extra="allow"' in line:
|
||||||
|
comments = []
|
||||||
|
for j in range(i - 1, -1, -1):
|
||||||
|
stripped = lines[j].strip()
|
||||||
|
if stripped.startswith("#"):
|
||||||
|
comments.append(stripped[1:].strip())
|
||||||
|
elif stripped == "":
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
if comments:
|
||||||
|
config_description = " ".join(reversed(comments))
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
fields_info["config"] = {
|
||||||
|
"type": "dict",
|
||||||
|
"description": config_description,
|
||||||
|
"default": "{}",
|
||||||
|
"required": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"docstring": docstring,
|
||||||
|
"fields": fields_info,
|
||||||
|
"sample_config": getattr(config_class, "sample_run_config", None),
|
||||||
|
"accepts_extra_config": accepts_extra_config,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"error": f"Failed to load config class {config_class_path}: {str(e)}",
|
||||||
|
"docstring": "",
|
||||||
|
"fields": {},
|
||||||
|
"sample_config": None,
|
||||||
|
"accepts_extra_config": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def generate_provider_docs(provider_spec: Any, api_name: str) -> str:
|
||||||
|
"""Generate markdown documentation for a provider."""
|
||||||
|
provider_type = provider_spec.provider_type
|
||||||
|
config_class = provider_spec.config_class
|
||||||
|
|
||||||
|
config_info = get_config_class_info(config_class)
|
||||||
|
|
||||||
|
md_lines = []
|
||||||
|
md_lines.append(f"# {provider_type}")
|
||||||
|
md_lines.append("")
|
||||||
|
|
||||||
|
description = ""
|
||||||
|
if hasattr(provider_spec, "description") and provider_spec.description:
|
||||||
|
description = provider_spec.description
|
||||||
|
elif (
|
||||||
|
hasattr(provider_spec, "adapter")
|
||||||
|
and hasattr(provider_spec.adapter, "description")
|
||||||
|
and provider_spec.adapter.description
|
||||||
|
):
|
||||||
|
description = provider_spec.adapter.description
|
||||||
|
elif config_info.get("docstring"):
|
||||||
|
description = config_info["docstring"]
|
||||||
|
|
||||||
|
if description:
|
||||||
|
md_lines.append("## Description")
|
||||||
|
md_lines.append("")
|
||||||
|
md_lines.append(description)
|
||||||
|
md_lines.append("")
|
||||||
|
|
||||||
|
if config_info.get("fields"):
|
||||||
|
md_lines.append("## Configuration")
|
||||||
|
md_lines.append("")
|
||||||
|
md_lines.append("| Field | Type | Required | Default | Description |")
|
||||||
|
md_lines.append("|-------|------|----------|---------|-------------|")
|
||||||
|
|
||||||
|
for field_name, field_info in config_info["fields"].items():
|
||||||
|
field_type = field_info["type"].replace("|", "\\|")
|
||||||
|
required = "Yes" if field_info["required"] else "No"
|
||||||
|
default = str(field_info["default"]) if field_info["default"] is not None else ""
|
||||||
|
description = field_info["description"] or ""
|
||||||
|
|
||||||
|
md_lines.append(f"| `{field_name}` | `{field_type}` | {required} | {default} | {description} |")
|
||||||
|
|
||||||
|
md_lines.append("")
|
||||||
|
|
||||||
|
if config_info.get("accepts_extra_config"):
|
||||||
|
md_lines.append(
|
||||||
|
"> **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider."
|
||||||
|
)
|
||||||
|
md_lines.append("")
|
||||||
|
|
||||||
|
if config_info.get("sample_config"):
|
||||||
|
md_lines.append("## Sample Configuration")
|
||||||
|
md_lines.append("")
|
||||||
|
md_lines.append("```yaml")
|
||||||
|
try:
|
||||||
|
sample_config_func = config_info["sample_config"]
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
if sample_config_func is not None:
|
||||||
|
sig = inspect.signature(sample_config_func)
|
||||||
|
if "__distro_dir__" in sig.parameters:
|
||||||
|
sample_config = sample_config_func(__distro_dir__="~/.llama/dummy")
|
||||||
|
else:
|
||||||
|
sample_config = sample_config_func()
|
||||||
|
|
||||||
|
def convert_pydantic_to_dict(obj):
|
||||||
|
if hasattr(obj, "model_dump"):
|
||||||
|
return obj.model_dump()
|
||||||
|
elif hasattr(obj, "dict"):
|
||||||
|
return obj.dict()
|
||||||
|
elif isinstance(obj, dict):
|
||||||
|
return {k: convert_pydantic_to_dict(v) for k, v in obj.items()}
|
||||||
|
elif isinstance(obj, list):
|
||||||
|
return [convert_pydantic_to_dict(item) for item in obj]
|
||||||
|
else:
|
||||||
|
return obj
|
||||||
|
|
||||||
|
sample_config_dict = convert_pydantic_to_dict(sample_config)
|
||||||
|
md_lines.append(yaml.dump(sample_config_dict, default_flow_style=False, sort_keys=False))
|
||||||
|
else:
|
||||||
|
md_lines.append("# No sample configuration available.")
|
||||||
|
except Exception as e:
|
||||||
|
md_lines.append(f"# Error generating sample config: {str(e)}")
|
||||||
|
md_lines.append("```")
|
||||||
|
md_lines.append("")
|
||||||
|
|
||||||
|
if hasattr(provider_spec, "deprecation_warning") and provider_spec.deprecation_warning:
|
||||||
|
md_lines.append("## Deprecation Notice")
|
||||||
|
md_lines.append("")
|
||||||
|
md_lines.append(f"⚠️ **Warning**: {provider_spec.deprecation_warning}")
|
||||||
|
md_lines.append("")
|
||||||
|
|
||||||
|
if hasattr(provider_spec, "deprecation_error") and provider_spec.deprecation_error:
|
||||||
|
md_lines.append("## Deprecation Error")
|
||||||
|
md_lines.append("")
|
||||||
|
md_lines.append(f"❌ **Error**: {provider_spec.deprecation_error}")
|
||||||
|
|
||||||
|
return "\n".join(md_lines) + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
def process_provider_registry(progress, change_tracker: ChangedPathTracker) -> None:
|
||||||
|
"""Process the complete provider registry."""
|
||||||
|
progress.print("Processing provider registry")
|
||||||
|
|
||||||
|
try:
|
||||||
|
provider_registry = get_provider_registry()
|
||||||
|
|
||||||
|
for api, providers in provider_registry.items():
|
||||||
|
api_name = api.value
|
||||||
|
|
||||||
|
doc_output_dir = REPO_ROOT / "docs" / "source" / "providers" / api_name
|
||||||
|
doc_output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
change_tracker.add_paths(doc_output_dir)
|
||||||
|
|
||||||
|
index_content = []
|
||||||
|
index_content.append(f"# {api_name.title()} Providers")
|
||||||
|
index_content.append("")
|
||||||
|
index_content.append(
|
||||||
|
f"This section contains documentation for all available providers for the **{api_name}** API."
|
||||||
|
)
|
||||||
|
index_content.append("")
|
||||||
|
|
||||||
|
for provider_type, provider in sorted(providers.items()):
|
||||||
|
provider_doc_file = doc_output_dir / f"{provider_type.replace('::', '_').replace(':', '_')}.md"
|
||||||
|
|
||||||
|
provider_docs = generate_provider_docs(provider, api_name)
|
||||||
|
|
||||||
|
provider_doc_file.write_text(provider_docs)
|
||||||
|
change_tracker.add_paths(provider_doc_file)
|
||||||
|
|
||||||
|
index_content.append(f"- [{provider_type}]({provider_doc_file.name})")
|
||||||
|
|
||||||
|
index_file = doc_output_dir / "index.md"
|
||||||
|
index_file.write_text("\n".join(index_content))
|
||||||
|
change_tracker.add_paths(index_file)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
progress.print(f"[red]Error processing provider registry: {str(e)}")
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def check_for_changes(change_tracker: ChangedPathTracker) -> bool:
|
||||||
|
"""Check if there are any uncommitted changes, including new files."""
|
||||||
|
has_changes = False
|
||||||
|
for path in change_tracker.changed_paths():
|
||||||
|
result = subprocess.run(
|
||||||
|
["git", "diff", "--exit-code", path],
|
||||||
|
cwd=REPO_ROOT,
|
||||||
|
capture_output=True,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(f"Change detected in '{path}'.", file=sys.stderr)
|
||||||
|
has_changes = True
|
||||||
|
status_result = subprocess.run(
|
||||||
|
["git", "status", "--porcelain", path],
|
||||||
|
cwd=REPO_ROOT,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
for line in status_result.stdout.splitlines():
|
||||||
|
if line.startswith("??"):
|
||||||
|
print(f"New file detected: '{path}'.", file=sys.stderr)
|
||||||
|
has_changes = True
|
||||||
|
return has_changes
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
change_tracker = ChangedPathTracker()
|
||||||
|
|
||||||
|
with Progress(
|
||||||
|
SpinnerColumn(),
|
||||||
|
TextColumn("[progress.description]{task.description}"),
|
||||||
|
) as progress:
|
||||||
|
task = progress.add_task("Processing provider registry...", total=1)
|
||||||
|
|
||||||
|
process_provider_registry(progress, change_tracker)
|
||||||
|
progress.update(task, advance=1)
|
||||||
|
|
||||||
|
if check_for_changes(change_tracker):
|
||||||
|
print(
|
||||||
|
"Provider documentation changes detected. Please commit the changes.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Add table
Add a link
Reference in a new issue