mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
chore: Enabling Milvus for VectorIO CI
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
709eb7da33
commit
c8d41d45ec
115 changed files with 2919 additions and 184 deletions
|
@ -22,7 +22,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "remote::chromadb", "remote::pgvector"]
|
vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::chromadb", "remote::pgvector"]
|
||||||
python-version: ["3.12", "3.13"]
|
python-version: ["3.12", "3.13"]
|
||||||
fail-fast: false # we want to run all tests regardless of failure
|
fail-fast: false # we want to run all tests regardless of failure
|
||||||
|
|
||||||
|
|
2
.github/workflows/python-build-test.yml
vendored
2
.github/workflows/python-build-test.yml
vendored
|
@ -20,7 +20,7 @@ jobs:
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@445689ea25e0de0a23313031f5fe577c74ae45a1 # v6.3.0
|
uses: astral-sh/setup-uv@bd01e18f51369d5a26f1651c3cb451d3417e3bba # v6.3.1
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
activate-environment: true
|
activate-environment: true
|
||||||
|
|
|
@ -14,7 +14,7 @@ repos:
|
||||||
- id: check-added-large-files
|
- id: check-added-large-files
|
||||||
args: ['--maxkb=1000']
|
args: ['--maxkb=1000']
|
||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
exclude: '^(.*\.svg)$'
|
exclude: '^(.*\.svg|.*\.md)$'
|
||||||
- id: no-commit-to-branch
|
- id: no-commit-to-branch
|
||||||
- id: check-yaml
|
- id: check-yaml
|
||||||
args: ["--unsafe"]
|
args: ["--unsafe"]
|
||||||
|
@ -95,6 +95,15 @@ repos:
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
require_serial: true
|
require_serial: true
|
||||||
files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
||||||
|
- id: provider-codegen
|
||||||
|
name: Provider Codegen
|
||||||
|
additional_dependencies:
|
||||||
|
- uv==0.7.8
|
||||||
|
entry: uv run --group codegen ./scripts/provider_codegen.py
|
||||||
|
language: python
|
||||||
|
pass_filenames: false
|
||||||
|
require_serial: true
|
||||||
|
files: ^llama_stack/providers/.*$
|
||||||
- id: openapi-codegen
|
- id: openapi-codegen
|
||||||
name: API Spec Codegen
|
name: API Spec Codegen
|
||||||
additional_dependencies:
|
additional_dependencies:
|
||||||
|
|
|
@ -139,6 +139,8 @@ uv sync
|
||||||
justification for bypassing the check.
|
justification for bypassing the check.
|
||||||
* Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or
|
* Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or
|
||||||
readability reasons.
|
readability reasons.
|
||||||
|
* Providers configuration class should be Pydantic Field class. It should have a `description` field
|
||||||
|
that describes the configuration. These descriptions will be used to generate the provider documentation.
|
||||||
|
|
||||||
## Common Tasks
|
## Common Tasks
|
||||||
|
|
||||||
|
@ -157,10 +159,19 @@ cd llama-stack
|
||||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...>
|
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Updating distribution configurations
|
||||||
|
|
||||||
### Updating Provider Configurations
|
If you have made changes to a provider's configuration in any form (introducing a new config key, or
|
||||||
|
changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML
|
||||||
|
files as well as the documentation. You should not change `docs/source/.../distributions/` files
|
||||||
|
manually as they are auto-generated.
|
||||||
|
|
||||||
If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated.
|
### Updating the provider documentation
|
||||||
|
|
||||||
|
If you have made changes to a provider's configuration, you should run `./scripts/distro_codegen.py`
|
||||||
|
to re-generate the documentation. You should not change `docs/source/.../providers/` files manually
|
||||||
|
as they are auto-generated.
|
||||||
|
Note that the provider "description" field will be used to generate the provider documentation.
|
||||||
|
|
||||||
### Building the Documentation
|
### Building the Documentation
|
||||||
|
|
||||||
|
|
151
docs/_static/llama-stack-spec.html
vendored
151
docs/_static/llama-stack-spec.html
vendored
|
@ -817,6 +817,90 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/openai/v1/responses/{response_id}": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "An OpenAIResponseObject.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseObject"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Agents"
|
||||||
|
],
|
||||||
|
"description": "Retrieve an OpenAI response by its ID.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "response_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The ID of the OpenAI response to retrieve.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"delete": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "An OpenAIDeleteResponseObject",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIDeleteResponseObject"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Agents"
|
||||||
|
],
|
||||||
|
"description": "Delete an OpenAI response by its ID.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "response_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The ID of the OpenAI response to delete.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/inference/embeddings": {
|
"/v1/inference/embeddings": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -1284,49 +1368,6 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/openai/v1/responses/{response_id}": {
|
|
||||||
"get": {
|
|
||||||
"responses": {
|
|
||||||
"200": {
|
|
||||||
"description": "An OpenAIResponseObject.",
|
|
||||||
"content": {
|
|
||||||
"application/json": {
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseObject"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"400": {
|
|
||||||
"$ref": "#/components/responses/BadRequest400"
|
|
||||||
},
|
|
||||||
"429": {
|
|
||||||
"$ref": "#/components/responses/TooManyRequests429"
|
|
||||||
},
|
|
||||||
"500": {
|
|
||||||
"$ref": "#/components/responses/InternalServerError500"
|
|
||||||
},
|
|
||||||
"default": {
|
|
||||||
"$ref": "#/components/responses/DefaultError"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"tags": [
|
|
||||||
"Agents"
|
|
||||||
],
|
|
||||||
"description": "Retrieve an OpenAI response by its ID.",
|
|
||||||
"parameters": [
|
|
||||||
{
|
|
||||||
"name": "response_id",
|
|
||||||
"in": "path",
|
|
||||||
"description": "The ID of the OpenAI response to retrieve.",
|
|
||||||
"required": true,
|
|
||||||
"schema": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"/v1/scoring-functions/{scoring_fn_id}": {
|
"/v1/scoring-functions/{scoring_fn_id}": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -9063,6 +9104,30 @@
|
||||||
],
|
],
|
||||||
"title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching"
|
"title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching"
|
||||||
},
|
},
|
||||||
|
"OpenAIDeleteResponseObject": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "response",
|
||||||
|
"default": "response"
|
||||||
|
},
|
||||||
|
"deleted": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"object",
|
||||||
|
"deleted"
|
||||||
|
],
|
||||||
|
"title": "OpenAIDeleteResponseObject"
|
||||||
|
},
|
||||||
"EmbeddingsRequest": {
|
"EmbeddingsRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
106
docs/_static/llama-stack-spec.yaml
vendored
106
docs/_static/llama-stack-spec.yaml
vendored
|
@ -558,6 +558,64 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
/v1/openai/v1/responses/{response_id}:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: An OpenAIResponseObject.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseObject'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Agents
|
||||||
|
description: Retrieve an OpenAI response by its ID.
|
||||||
|
parameters:
|
||||||
|
- name: response_id
|
||||||
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the OpenAI response to retrieve.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
delete:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: An OpenAIDeleteResponseObject
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/OpenAIDeleteResponseObject'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Agents
|
||||||
|
description: Delete an OpenAI response by its ID.
|
||||||
|
parameters:
|
||||||
|
- name: response_id
|
||||||
|
in: path
|
||||||
|
description: The ID of the OpenAI response to delete.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
/v1/inference/embeddings:
|
/v1/inference/embeddings:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
@ -883,36 +941,6 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
/v1/openai/v1/responses/{response_id}:
|
|
||||||
get:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: An OpenAIResponseObject.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/OpenAIResponseObject'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Agents
|
|
||||||
description: Retrieve an OpenAI response by its ID.
|
|
||||||
parameters:
|
|
||||||
- name: response_id
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The ID of the OpenAI response to retrieve.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
/v1/scoring-functions/{scoring_fn_id}:
|
/v1/scoring-functions/{scoring_fn_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
@ -6404,6 +6432,24 @@ components:
|
||||||
- type
|
- type
|
||||||
title: >-
|
title: >-
|
||||||
OpenAIResponseObjectStreamResponseWebSearchCallSearching
|
OpenAIResponseObjectStreamResponseWebSearchCallSearching
|
||||||
|
OpenAIDeleteResponseObject:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: response
|
||||||
|
default: response
|
||||||
|
deleted:
|
||||||
|
type: boolean
|
||||||
|
default: true
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- object
|
||||||
|
- deleted
|
||||||
|
title: OpenAIDeleteResponseObject
|
||||||
EmbeddingsRequest:
|
EmbeddingsRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -156,7 +156,7 @@ def _validate_api_delete_method_returns_none(method) -> str | None:
|
||||||
|
|
||||||
# Allow OpenAI endpoints to return response objects since they follow OpenAI specification
|
# Allow OpenAI endpoints to return response objects since they follow OpenAI specification
|
||||||
method_name = getattr(method, '__name__', '')
|
method_name = getattr(method, '__name__', '')
|
||||||
if method_name.startswith('openai_'):
|
if method_name.__contains__('openai_'):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if return_type is not None and return_type is not type(None):
|
if return_type is not None and return_type is not type(None):
|
||||||
|
|
|
@ -64,10 +64,9 @@ options:
|
||||||
--template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
|
--template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
|
||||||
--list-templates Show the available templates for building a Llama Stack distribution (default: False)
|
--list-templates Show the available templates for building a Llama Stack distribution (default: False)
|
||||||
--image-type {conda,container,venv}
|
--image-type {conda,container,venv}
|
||||||
Image Type to use for the build. This can be either conda or container or venv. If not specified, will use the image type from the template config. (default:
|
Image Type to use for the build. If not specified, will use the image type from the template config. (default: None)
|
||||||
conda)
|
|
||||||
--image-name IMAGE_NAME
|
--image-name IMAGE_NAME
|
||||||
[for image-type=conda|container|venv] Name of the conda or virtual environment to use for the build. If not specified, currently active Conda environment will be used if
|
[for image-type=conda|container|venv] Name of the conda or virtual environment to use for the build. If not specified, currently active environment will be used if
|
||||||
found. (default: None)
|
found. (default: None)
|
||||||
--print-deps-only Print the dependencies for the stack only, without building the stack (default: False)
|
--print-deps-only Print the dependencies for the stack only, without building the stack (default: False)
|
||||||
--run Run the stack after building using the same image type, name, and other applicable arguments (default: False)
|
--run Run the stack after building using the same image type, name, and other applicable arguments (default: False)
|
||||||
|
@ -89,32 +88,53 @@ llama stack build --list-templates
|
||||||
------------------------------+-----------------------------------------------------------------------------+
|
------------------------------+-----------------------------------------------------------------------------+
|
||||||
| Template Name | Description |
|
| Template Name | Description |
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
| hf-serverless | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
| watsonx | Use watsonx for running LLM inference |
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| together | Use Together.AI for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
| vllm-gpu | Use a built-in vLLM engine for running LLM inference |
|
| vllm-gpu | Use a built-in vLLM engine for running LLM inference |
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
| experimental-post-training | Experimental template for post training |
|
| together | Use Together.AI for running LLM inference |
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| remote-vllm | Use (an external) vLLM server for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| fireworks | Use Fireworks.AI for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
| tgi | Use (an external) TGI server for running LLM inference |
|
| tgi | Use (an external) TGI server for running LLM inference |
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
| bedrock | Use AWS Bedrock for running LLM inference and safety |
|
| starter | Quick start template for running Llama Stack with several popular providers |
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
| meta-reference-gpu | Use Meta Reference for running LLM inference |
|
| sambanova | Use SambaNova for running LLM inference and safety |
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
| nvidia | Use NVIDIA NIM for running LLM inference |
|
| remote-vllm | Use (an external) vLLM server for running LLM inference |
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
| cerebras | Use Cerebras for running LLM inference |
|
| postgres-demo | Quick start template for running Llama Stack with several popular providers |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| passthrough | Use Passthrough hosted llama-stack endpoint for LLM inference |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| open-benchmark | Distribution for running open benchmarks |
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
| ollama | Use (an external) Ollama server for running LLM inference |
|
| ollama | Use (an external) Ollama server for running LLM inference |
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| nvidia | Use NVIDIA NIM for running LLM inference, evaluation and safety |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| meta-reference-gpu | Use Meta Reference for running LLM inference |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| llama_api | Distribution for running e2e tests in CI |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| hf-serverless | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
| hf-endpoint | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
| hf-endpoint | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| groq | Use Groq for running LLM inference |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| fireworks | Use Fireworks.AI for running LLM inference |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| experimental-post-training | Experimental template for post training |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| dell | Dell's distribution of Llama Stack. TGI inference via Dell's custom |
|
||||||
|
| | container |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| ci-tests | Distribution for running e2e tests in CI |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| cerebras | Use Cerebras for running LLM inference |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
|
| bedrock | Use AWS Bedrock for running LLM inference and safety |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+
|
||||||
```
|
```
|
||||||
|
|
||||||
You may then pick a template to build your distribution with providers fitted to your liking.
|
You may then pick a template to build your distribution with providers fitted to your liking.
|
||||||
|
@ -256,6 +276,7 @@ $ llama stack build --template ollama --image-type container
|
||||||
...
|
...
|
||||||
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
|
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
|
||||||
...
|
...
|
||||||
|
```
|
||||||
|
|
||||||
You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml`
|
You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml`
|
||||||
```
|
```
|
||||||
|
@ -305,30 +326,28 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
|
||||||
|
|
||||||
```
|
```
|
||||||
llama stack run -h
|
llama stack run -h
|
||||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE] [--tls-keyfile TLS_KEYFILE] [--tls-certfile TLS_CERTFILE]
|
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE]
|
||||||
[--image-type {conda,container,venv}]
|
[--image-type {conda,venv}] [--enable-ui]
|
||||||
config
|
[config | template]
|
||||||
|
|
||||||
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
config Path to config file to use for the run
|
config | template Path to config file to use for the run or name of known template (`llama stack list` for a list). (default: None)
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
||||||
--image-name IMAGE_NAME
|
--image-name IMAGE_NAME
|
||||||
Name of the image to run. Defaults to the current environment (default: None)
|
Name of the image to run. Defaults to the current environment (default: None)
|
||||||
--env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: [])
|
--env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: None)
|
||||||
--tls-keyfile TLS_KEYFILE
|
--image-type {conda,venv}
|
||||||
Path to TLS key file for HTTPS (default: None)
|
Image Type used during the build. This can be either conda or venv. (default: None)
|
||||||
--tls-certfile TLS_CERTFILE
|
--enable-ui Start the UI server (default: False)
|
||||||
Path to TLS certificate file for HTTPS (default: None)
|
|
||||||
--image-type {conda,container,venv}
|
|
||||||
Image Type used during the build. This can be either conda or container or venv. (default: conda)
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Note:** Container images built with `llama stack build --image-type container` cannot be run using `llama stack run`. Instead, they must be run directly using Docker or Podman commands as shown in the container building section above.
|
||||||
|
|
||||||
```
|
```
|
||||||
# Start using template name
|
# Start using template name
|
||||||
llama stack run tgi
|
llama stack run tgi
|
||||||
|
@ -372,6 +391,7 @@ INFO: Application startup complete.
|
||||||
INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
|
INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
|
||||||
INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
|
INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
|
||||||
```
|
```
|
||||||
|
|
||||||
### Listing Distributions
|
### Listing Distributions
|
||||||
Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
|
Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
|
||||||
|
|
||||||
|
@ -391,6 +411,20 @@ Example Usage
|
||||||
llama stack list
|
llama stack list
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
------------------------------+-----------------------------------------------------------------------------+--------------+------------+
|
||||||
|
| Stack Name | Path | Build Config | Run Config |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+--------------+------------+
|
||||||
|
| together | /home/wenzhou/.llama/distributions/together | Yes | No |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+--------------+------------+
|
||||||
|
| bedrock | /home/wenzhou/.llama/distributions/bedrock | Yes | No |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+--------------+------------+
|
||||||
|
| starter | /home/wenzhou/.llama/distributions/starter | No | No |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+--------------+------------+
|
||||||
|
| remote-vllm | /home/wenzhou/.llama/distributions/remote-vllm | Yes | Yes |
|
||||||
|
+------------------------------+-----------------------------------------------------------------------------+--------------+------------+
|
||||||
|
```
|
||||||
|
|
||||||
### Removing a Distribution
|
### Removing a Distribution
|
||||||
Use the remove command to delete a distribution you've previously built.
|
Use the remove command to delete a distribution you've previously built.
|
||||||
|
|
||||||
|
@ -413,7 +447,7 @@ Example
|
||||||
llama stack rm llamastack-test
|
llama stack rm llamastack-test
|
||||||
```
|
```
|
||||||
|
|
||||||
To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they’re no longer needed.
|
To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they're no longer needed.
|
||||||
|
|
||||||
### Troubleshooting
|
### Troubleshooting
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ Llama Stack is a stateful service with REST APIs to support the seamless transit
|
||||||
environments. You can build and test using a local server first and deploy to a hosted endpoint for production.
|
environments. You can build and test using a local server first and deploy to a hosted endpoint for production.
|
||||||
|
|
||||||
In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
|
In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
|
||||||
as the inference [provider](../providers/index.md#inference) for a Llama Model.
|
as the inference [provider](../providers/inference/index) for a Llama Model.
|
||||||
|
|
||||||
#### Step 1: Install and setup
|
#### Step 1: Install and setup
|
||||||
1. Install [uv](https://docs.astral.sh/uv/)
|
1. Install [uv](https://docs.astral.sh/uv/)
|
||||||
|
|
5
docs/source/providers/agents/index.md
Normal file
5
docs/source/providers/agents/index.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Agents Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **agents** API.
|
||||||
|
|
||||||
|
- [inline::meta-reference](inline_meta-reference.md)
|
26
docs/source/providers/agents/inline_meta-reference.md
Normal file
26
docs/source/providers/agents/inline_meta-reference.md
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
# inline::meta-reference
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
| `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
persistence_store:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
|
||||||
|
responses_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
7
docs/source/providers/datasetio/index.md
Normal file
7
docs/source/providers/datasetio/index.md
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# Datasetio Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **datasetio** API.
|
||||||
|
|
||||||
|
- [inline::localfs](inline_localfs.md)
|
||||||
|
- [remote::huggingface](remote_huggingface.md)
|
||||||
|
- [remote::nvidia](remote_nvidia.md)
|
22
docs/source/providers/datasetio/inline_localfs.md
Normal file
22
docs/source/providers/datasetio/inline_localfs.md
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# inline::localfs
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
22
docs/source/providers/datasetio/remote_huggingface.md
Normal file
22
docs/source/providers/datasetio/remote_huggingface.md
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# remote::huggingface
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
25
docs/source/providers/datasetio/remote_nvidia.md
Normal file
25
docs/source/providers/datasetio/remote_nvidia.md
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# remote::nvidia
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The NVIDIA API key. |
|
||||||
|
| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
|
||||||
|
| `project_id` | `str \| None` | No | test-project | The NVIDIA project ID. |
|
||||||
|
| `datasets_url` | `<class 'str'>` | No | http://nemo.test | Base URL for the NeMo Dataset API |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.NVIDIA_API_KEY:+}
|
||||||
|
dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
|
||||||
|
project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
|
||||||
|
datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
6
docs/source/providers/eval/index.md
Normal file
6
docs/source/providers/eval/index.md
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
# Eval Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **eval** API.
|
||||||
|
|
||||||
|
- [inline::meta-reference](inline_meta-reference.md)
|
||||||
|
- [remote::nvidia](remote_nvidia.md)
|
22
docs/source/providers/eval/inline_meta-reference.md
Normal file
22
docs/source/providers/eval/inline_meta-reference.md
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# inline::meta-reference
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/eval/remote_nvidia.md
Normal file
19
docs/source/providers/eval/remote_nvidia.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::nvidia
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `evaluator_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
5
docs/source/providers/files/index.md
Normal file
5
docs/source/providers/files/index.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Files Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **files** API.
|
||||||
|
|
||||||
|
- [inline::localfs](inline_localfs.md)
|
24
docs/source/providers/files/inline_localfs.md
Normal file
24
docs/source/providers/files/inline_localfs.md
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
# inline::localfs
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Local filesystem-based file storage provider for managing files and documents locally.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `storage_dir` | `<class 'str'>` | No | PydanticUndefined | Directory to store uploaded files |
|
||||||
|
| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
|
||||||
|
| `ttl_secs` | `<class 'int'>` | No | 31536000 | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -18,60 +18,92 @@ Llama Stack supports external providers that live outside of the main codebase.
|
||||||
## Agents
|
## Agents
|
||||||
Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc.
|
Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
agents/index
|
||||||
|
```
|
||||||
|
|
||||||
## DatasetIO
|
## DatasetIO
|
||||||
Interfaces with datasets and data loaders.
|
Interfaces with datasets and data loaders.
|
||||||
|
|
||||||
## Eval
|
|
||||||
Generates outputs (via Inference or Agents) and perform scoring.
|
|
||||||
|
|
||||||
## Inference
|
|
||||||
Runs inference with an LLM.
|
|
||||||
|
|
||||||
## Post Training
|
|
||||||
Fine-tunes a model.
|
|
||||||
|
|
||||||
#### Post Training Providers
|
|
||||||
The following providers are available for Post Training:
|
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
external
|
datasetio/index
|
||||||
post_training/huggingface
|
```
|
||||||
post_training/torchtune
|
|
||||||
post_training/nvidia_nemo
|
## Eval
|
||||||
|
Generates outputs (via Inference or Agents) and perform scoring.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
eval/index
|
||||||
|
```
|
||||||
|
|
||||||
|
## Inference
|
||||||
|
Runs inference with an LLM.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
inference/index
|
||||||
|
```
|
||||||
|
|
||||||
|
## Post Training
|
||||||
|
Fine-tunes a model.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
post_training/index
|
||||||
```
|
```
|
||||||
|
|
||||||
## Safety
|
## Safety
|
||||||
Applies safety policies to the output at a Systems (not only model) level.
|
Applies safety policies to the output at a Systems (not only model) level.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
safety/index
|
||||||
|
```
|
||||||
|
|
||||||
## Scoring
|
## Scoring
|
||||||
Evaluates the outputs of the system.
|
Evaluates the outputs of the system.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
scoring/index
|
||||||
|
```
|
||||||
|
|
||||||
## Telemetry
|
## Telemetry
|
||||||
Collects telemetry data from the system.
|
Collects telemetry data from the system.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
telemetry/index
|
||||||
|
```
|
||||||
|
|
||||||
## Tool Runtime
|
## Tool Runtime
|
||||||
Is associated with the ToolGroup resouces.
|
Is associated with the ToolGroup resouces.
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
tool_runtime/index
|
||||||
|
```
|
||||||
|
|
||||||
## Vector IO
|
## Vector IO
|
||||||
|
|
||||||
Vector IO refers to operations on vector databases, such as adding documents, searching, and deleting documents.
|
Vector IO refers to operations on vector databases, such as adding documents, searching, and deleting documents.
|
||||||
Vector IO plays a crucial role in [Retreival Augmented Generation (RAG)](../..//building_applications/rag), where the vector
|
Vector IO plays a crucial role in [Retreival Augmented Generation (RAG)](../..//building_applications/rag), where the vector
|
||||||
io and database are used to store and retrieve documents for retrieval.
|
io and database are used to store and retrieve documents for retrieval.
|
||||||
|
|
||||||
#### Vector IO Providers
|
|
||||||
The following providers (i.e., databases) are available for Vector IO:
|
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
external
|
vector_io/index
|
||||||
vector_io/faiss
|
|
||||||
vector_io/sqlite-vec
|
|
||||||
vector_io/chromadb
|
|
||||||
vector_io/pgvector
|
|
||||||
vector_io/qdrant
|
|
||||||
vector_io/milvus
|
|
||||||
vector_io/weaviate
|
|
||||||
```
|
```
|
||||||
|
|
32
docs/source/providers/inference/index.md
Normal file
32
docs/source/providers/inference/index.md
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# Inference Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **inference** API.
|
||||||
|
|
||||||
|
- [inline::meta-reference](inline_meta-reference.md)
|
||||||
|
- [inline::sentence-transformers](inline_sentence-transformers.md)
|
||||||
|
- [inline::vllm](inline_vllm.md)
|
||||||
|
- [remote::anthropic](remote_anthropic.md)
|
||||||
|
- [remote::bedrock](remote_bedrock.md)
|
||||||
|
- [remote::cerebras](remote_cerebras.md)
|
||||||
|
- [remote::cerebras-openai-compat](remote_cerebras-openai-compat.md)
|
||||||
|
- [remote::databricks](remote_databricks.md)
|
||||||
|
- [remote::fireworks](remote_fireworks.md)
|
||||||
|
- [remote::fireworks-openai-compat](remote_fireworks-openai-compat.md)
|
||||||
|
- [remote::gemini](remote_gemini.md)
|
||||||
|
- [remote::groq](remote_groq.md)
|
||||||
|
- [remote::groq-openai-compat](remote_groq-openai-compat.md)
|
||||||
|
- [remote::hf::endpoint](remote_hf_endpoint.md)
|
||||||
|
- [remote::hf::serverless](remote_hf_serverless.md)
|
||||||
|
- [remote::llama-openai-compat](remote_llama-openai-compat.md)
|
||||||
|
- [remote::nvidia](remote_nvidia.md)
|
||||||
|
- [remote::ollama](remote_ollama.md)
|
||||||
|
- [remote::openai](remote_openai.md)
|
||||||
|
- [remote::passthrough](remote_passthrough.md)
|
||||||
|
- [remote::runpod](remote_runpod.md)
|
||||||
|
- [remote::sambanova](remote_sambanova.md)
|
||||||
|
- [remote::sambanova-openai-compat](remote_sambanova-openai-compat.md)
|
||||||
|
- [remote::tgi](remote_tgi.md)
|
||||||
|
- [remote::together](remote_together.md)
|
||||||
|
- [remote::together-openai-compat](remote_together-openai-compat.md)
|
||||||
|
- [remote::vllm](remote_vllm.md)
|
||||||
|
- [remote::watsonx](remote_watsonx.md)
|
32
docs/source/providers/inference/inline_meta-reference.md
Normal file
32
docs/source/providers/inference/inline_meta-reference.md
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# inline::meta-reference
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Meta's reference implementation of inference with support for various model formats and optimization techniques.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `model` | `str \| None` | No | | |
|
||||||
|
| `torch_seed` | `int \| None` | No | | |
|
||||||
|
| `max_seq_len` | `<class 'int'>` | No | 4096 | |
|
||||||
|
| `max_batch_size` | `<class 'int'>` | No | 1 | |
|
||||||
|
| `model_parallel_size` | `int \| None` | No | | |
|
||||||
|
| `create_distributed_process_group` | `<class 'bool'>` | No | True | |
|
||||||
|
| `checkpoint_dir` | `str \| None` | No | | |
|
||||||
|
| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig, annotation=NoneType, required=True, discriminator='type'` | No | | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model: Llama3.2-3B-Instruct
|
||||||
|
checkpoint_dir: ${env.CHECKPOINT_DIR:=null}
|
||||||
|
quantization:
|
||||||
|
type: ${env.QUANTIZATION_TYPE:=bf16}
|
||||||
|
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
|
||||||
|
max_batch_size: ${env.MAX_BATCH_SIZE:=1}
|
||||||
|
max_seq_len: ${env.MAX_SEQ_LEN:=4096}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
# inline::sentence-transformers
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Sentence Transformers inference provider for text embeddings and similarity search.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
29
docs/source/providers/inference/inline_vllm.md
Normal file
29
docs/source/providers/inference/inline_vllm.md
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
# inline::vllm
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `tensor_parallel_size` | `<class 'int'>` | No | 1 | Number of tensor parallel replicas (number of GPUs to use). |
|
||||||
|
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
|
||||||
|
| `max_model_len` | `<class 'int'>` | No | 4096 | Maximum context length to use during serving. |
|
||||||
|
| `max_num_seqs` | `<class 'int'>` | No | 4 | Maximum parallel batch size for generation. |
|
||||||
|
| `enforce_eager` | `<class 'bool'>` | No | False | Whether to use eager mode for inference (otherwise cuda graphs are used). |
|
||||||
|
| `gpu_memory_utilization` | `<class 'float'>` | No | 0.3 | How much GPU memory will be allocated when this provider has finished loading, including memory that was already allocated before loading. |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:=1}
|
||||||
|
max_tokens: ${env.MAX_TOKENS:=4096}
|
||||||
|
max_model_len: ${env.MAX_MODEL_LEN:=4096}
|
||||||
|
max_num_seqs: ${env.MAX_NUM_SEQS:=4}
|
||||||
|
enforce_eager: ${env.ENFORCE_EAGER:=False}
|
||||||
|
gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:=0.3}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/inference/remote_anthropic.md
Normal file
19
docs/source/providers/inference/remote_anthropic.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::anthropic
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Anthropic inference provider for accessing Claude models and Anthropic's AI services.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | API key for Anthropic models |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.ANTHROPIC_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
28
docs/source/providers/inference/remote_bedrock.md
Normal file
28
docs/source/providers/inference/remote_bedrock.md
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# remote::bedrock
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
AWS Bedrock inference provider for accessing various AI models through AWS's managed service.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||||
|
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||||
|
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||||
|
| `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
|
||||||
|
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
||||||
|
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
||||||
|
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
||||||
|
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
||||||
|
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
||||||
|
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::cerebras-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Cerebras API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.cerebras.ai/v1 | The URL for the Cerebras API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.cerebras.ai/v1
|
||||||
|
api_key: ${env.CEREBRAS_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_cerebras.md
Normal file
21
docs/source/providers/inference/remote_cerebras.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::cerebras
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Cerebras inference provider for running models on Cerebras Cloud platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | Cerebras API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
base_url: https://api.cerebras.ai
|
||||||
|
api_key: ${env.CEREBRAS_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_databricks.md
Normal file
21
docs/source/providers/inference/remote_databricks.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::databricks
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Databricks inference provider for running models on Databricks' unified analytics platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | | The URL for the Databricks model serving endpoint |
|
||||||
|
| `api_token` | `<class 'str'>` | No | | The Databricks API token |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.DATABRICKS_URL}
|
||||||
|
api_token: ${env.DATABRICKS_API_TOKEN}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::fireworks-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Fireworks API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.fireworks.ai/inference/v1
|
||||||
|
api_key: ${env.FIREWORKS_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_fireworks.md
Normal file
21
docs/source/providers/inference/remote_fireworks.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::fireworks
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Fireworks.ai API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: https://api.fireworks.ai/inference/v1
|
||||||
|
api_key: ${env.FIREWORKS_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/inference/remote_gemini.md
Normal file
19
docs/source/providers/inference/remote_gemini.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::gemini
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Google Gemini inference provider for accessing Gemini models and Google's AI services.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | API key for Gemini models |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.GEMINI_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_groq-openai-compat.md
Normal file
21
docs/source/providers/inference/remote_groq-openai-compat.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::groq-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Groq OpenAI-compatible provider for using Groq models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Groq API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.groq.com/openai/v1 | The URL for the Groq API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.groq.com/openai/v1
|
||||||
|
api_key: ${env.GROQ_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_groq.md
Normal file
21
docs/source/providers/inference/remote_groq.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::groq
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Groq inference provider for ultra-fast inference using Groq's LPU technology.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Groq API key |
|
||||||
|
| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: https://api.groq.com
|
||||||
|
api_key: ${env.GROQ_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_hf_endpoint.md
Normal file
21
docs/source/providers/inference/remote_hf_endpoint.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::hf::endpoint
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
HuggingFace Inference Endpoints provider for dedicated model serving.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `endpoint_name` | `<class 'str'>` | No | PydanticUndefined | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. |
|
||||||
|
| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
endpoint_name: ${env.INFERENCE_ENDPOINT_NAME}
|
||||||
|
api_token: ${env.HF_API_TOKEN}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_hf_serverless.md
Normal file
21
docs/source/providers/inference/remote_hf_serverless.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::hf::serverless
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
HuggingFace Inference API serverless provider for on-demand model inference.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `huggingface_repo` | `<class 'str'>` | No | PydanticUndefined | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') |
|
||||||
|
| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
huggingface_repo: ${env.INFERENCE_MODEL}
|
||||||
|
api_token: ${env.HF_API_TOKEN}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::llama-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Llama API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.llama.com/compat/v1/
|
||||||
|
api_key: ${env.LLAMA_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
24
docs/source/providers/inference/remote_nvidia.md
Normal file
24
docs/source/providers/inference/remote_nvidia.md
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
# remote::nvidia
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The NVIDIA API key, only needed of using the hosted service |
|
||||||
|
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||||
|
| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||||
|
api_key: ${env.NVIDIA_API_KEY:+}
|
||||||
|
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_ollama.md
Normal file
21
docs/source/providers/inference/remote_ollama.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::ollama
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Ollama inference provider for running local models through the Ollama runtime.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | http://localhost:11434 | |
|
||||||
|
| `raise_on_connect_error` | `<class 'bool'>` | No | True | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||||
|
raise_on_connect_error: true
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/inference/remote_openai.md
Normal file
19
docs/source/providers/inference/remote_openai.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::openai
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
OpenAI inference provider for accessing GPT models and other OpenAI services.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | API key for OpenAI models |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.OPENAI_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_passthrough.md
Normal file
21
docs/source/providers/inference/remote_passthrough.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::passthrough
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Passthrough inference provider for connecting to any external inference service not directly supported.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | | The URL for the passthrough endpoint |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.PASSTHROUGH_URL}
|
||||||
|
api_key: ${env.PASSTHROUGH_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_runpod.md
Normal file
21
docs/source/providers/inference/remote_runpod.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::runpod
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
RunPod inference provider for running models on RunPod's cloud GPU platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint |
|
||||||
|
| `api_token` | `str \| None` | No | | The API token |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.RUNPOD_URL:+}
|
||||||
|
api_token: ${env.RUNPOD_API_TOKEN:+}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::sambanova-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The SambaNova API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.sambanova.ai/v1
|
||||||
|
api_key: ${env.SAMBANOVA_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_sambanova.md
Normal file
21
docs/source/providers/inference/remote_sambanova.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::sambanova
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
SambaNova inference provider for running models on SambaNova's dataflow architecture.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: https://api.sambanova.ai/v1
|
||||||
|
api_key: ${env.SAMBANOVA_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/inference/remote_tgi.md
Normal file
19
docs/source/providers/inference/remote_tgi.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::tgi
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Text Generation Inference (TGI) provider for HuggingFace model serving.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | PydanticUndefined | The URL for the TGI serving endpoint |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.TGI_URL}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::together-openai-compat
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Together AI OpenAI-compatible provider for using Together models with OpenAI API format.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Together API key |
|
||||||
|
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together API server |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_compat_api_base: https://api.together.xyz/v1
|
||||||
|
api_key: ${env.TOGETHER_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/inference/remote_together.md
Normal file
21
docs/source/providers/inference/remote_together.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::together
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Together AI inference provider for open-source models and collaborative AI development.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Together AI API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: https://api.together.xyz/v1
|
||||||
|
api_key: ${env.TOGETHER_API_KEY:+}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
25
docs/source/providers/inference/remote_vllm.md
Normal file
25
docs/source/providers/inference/remote_vllm.md
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# remote::vllm
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Remote vLLM inference provider for connecting to vLLM servers.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint |
|
||||||
|
| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
|
||||||
|
| `api_token` | `str \| None` | No | fake | The API token |
|
||||||
|
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.VLLM_URL}
|
||||||
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||||
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||||
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
24
docs/source/providers/inference/remote_watsonx.md
Normal file
24
docs/source/providers/inference/remote_watsonx.md
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
# remote::watsonx
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key, only needed of using the hosted service |
|
||||||
|
| `project_id` | `str \| None` | No | | The Project ID key, only needed of using the hosted service |
|
||||||
|
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||||
|
api_key: ${env.WATSONX_API_KEY:+}
|
||||||
|
project_id: ${env.WATSONX_PROJECT_ID:+}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
7
docs/source/providers/post_training/index.md
Normal file
7
docs/source/providers/post_training/index.md
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# Post_Training Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **post_training** API.
|
||||||
|
|
||||||
|
- [inline::huggingface](inline_huggingface.md)
|
||||||
|
- [inline::torchtune](inline_torchtune.md)
|
||||||
|
- [remote::nvidia](remote_nvidia.md)
|
36
docs/source/providers/post_training/inline_huggingface.md
Normal file
36
docs/source/providers/post_training/inline_huggingface.md
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# inline::huggingface
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `device` | `<class 'str'>` | No | cuda | |
|
||||||
|
| `distributed_backend` | `Literal['fsdp', 'deepspeed'` | No | | |
|
||||||
|
| `checkpoint_format` | `Literal['full_state', 'huggingface'` | No | huggingface | |
|
||||||
|
| `chat_template` | `<class 'str'>` | No | <|user|>
|
||||||
|
{input}
|
||||||
|
<|assistant|>
|
||||||
|
{output} | |
|
||||||
|
| `model_specific_config` | `<class 'dict'>` | No | {'trust_remote_code': True, 'attn_implementation': 'sdpa'} | |
|
||||||
|
| `max_seq_length` | `<class 'int'>` | No | 2048 | |
|
||||||
|
| `gradient_checkpointing` | `<class 'bool'>` | No | False | |
|
||||||
|
| `save_total_limit` | `<class 'int'>` | No | 3 | |
|
||||||
|
| `logging_steps` | `<class 'int'>` | No | 10 | |
|
||||||
|
| `warmup_ratio` | `<class 'float'>` | No | 0.1 | |
|
||||||
|
| `weight_decay` | `<class 'float'>` | No | 0.01 | |
|
||||||
|
| `dataloader_num_workers` | `<class 'int'>` | No | 4 | |
|
||||||
|
| `dataloader_pin_memory` | `<class 'bool'>` | No | True | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
checkpoint_format: huggingface
|
||||||
|
distributed_backend: null
|
||||||
|
device: cpu
|
||||||
|
|
||||||
|
```
|
||||||
|
|
20
docs/source/providers/post_training/inline_torchtune.md
Normal file
20
docs/source/providers/post_training/inline_torchtune.md
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# inline::torchtune
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `torch_seed` | `int \| None` | No | | |
|
||||||
|
| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
checkpoint_format: meta
|
||||||
|
|
||||||
|
```
|
||||||
|
|
28
docs/source/providers/post_training/remote_nvidia.md
Normal file
28
docs/source/providers/post_training/remote_nvidia.md
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# remote::nvidia
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The NVIDIA API key. |
|
||||||
|
| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
|
||||||
|
| `project_id` | `str \| None` | No | test-example-model@v1 | The NVIDIA project ID. |
|
||||||
|
| `customizer_url` | `str \| None` | No | | Base URL for the NeMo Customizer API |
|
||||||
|
| `timeout` | `<class 'int'>` | No | 300 | Timeout for the NVIDIA Post Training API |
|
||||||
|
| `max_retries` | `<class 'int'>` | No | 3 | Maximum number of retries for the NVIDIA Post Training API |
|
||||||
|
| `output_model_dir` | `<class 'str'>` | No | test-example-model@v1 | Directory to save the output model |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.NVIDIA_API_KEY:+}
|
||||||
|
dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
|
||||||
|
project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
|
||||||
|
customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
10
docs/source/providers/safety/index.md
Normal file
10
docs/source/providers/safety/index.md
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# Safety Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **safety** API.
|
||||||
|
|
||||||
|
- [inline::code-scanner](inline_code-scanner.md)
|
||||||
|
- [inline::llama-guard](inline_llama-guard.md)
|
||||||
|
- [inline::prompt-guard](inline_prompt-guard.md)
|
||||||
|
- [remote::bedrock](remote_bedrock.md)
|
||||||
|
- [remote::nvidia](remote_nvidia.md)
|
||||||
|
- [remote::sambanova](remote_sambanova.md)
|
13
docs/source/providers/safety/inline_code-scanner.md
Normal file
13
docs/source/providers/safety/inline_code-scanner.md
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# inline::code-scanner
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/safety/inline_llama-guard.md
Normal file
19
docs/source/providers/safety/inline_llama-guard.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# inline::llama-guard
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `excluded_categories` | `list[str` | No | [] | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
excluded_categories: []
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/safety/inline_prompt-guard.md
Normal file
19
docs/source/providers/safety/inline_prompt-guard.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# inline::prompt-guard
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Prompt Guard safety provider for detecting and filtering unsafe prompts and content.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `guard_type` | `<class 'str'>` | No | injection | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
guard_type: injection
|
||||||
|
|
||||||
|
```
|
||||||
|
|
28
docs/source/providers/safety/remote_bedrock.md
Normal file
28
docs/source/providers/safety/remote_bedrock.md
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# remote::bedrock
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
AWS Bedrock safety provider for content moderation using AWS's safety services.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
|
||||||
|
| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
|
||||||
|
| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
|
||||||
|
| `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
|
||||||
|
| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
|
||||||
|
| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
|
||||||
|
| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
|
||||||
|
| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
|
||||||
|
| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
|
||||||
|
| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/safety/remote_nvidia.md
Normal file
21
docs/source/providers/safety/remote_nvidia.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::nvidia
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
NVIDIA's safety provider for content moderation and safety filtering.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `guardrails_service_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service |
|
||||||
|
| `config_id` | `str \| None` | No | self-check | Guardrails configuration ID to use from the Guardrails configuration store |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
|
||||||
|
config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/safety/remote_sambanova.md
Normal file
21
docs/source/providers/safety/remote_sambanova.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::sambanova
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
SambaNova's safety provider for content moderation and safety filtering.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||||
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: https://api.sambanova.ai/v1
|
||||||
|
api_key: ${env.SAMBANOVA_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
7
docs/source/providers/scoring/index.md
Normal file
7
docs/source/providers/scoring/index.md
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# Scoring Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **scoring** API.
|
||||||
|
|
||||||
|
- [inline::basic](inline_basic.md)
|
||||||
|
- [inline::braintrust](inline_braintrust.md)
|
||||||
|
- [inline::llm-as-judge](inline_llm-as-judge.md)
|
13
docs/source/providers/scoring/inline_basic.md
Normal file
13
docs/source/providers/scoring/inline_basic.md
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# inline::basic
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Basic scoring provider for simple evaluation metrics and scoring functions.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/scoring/inline_braintrust.md
Normal file
19
docs/source/providers/scoring/inline_braintrust.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# inline::braintrust
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Braintrust scoring provider for evaluation and scoring using the Braintrust platform.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `openai_api_key` | `str \| None` | No | | The OpenAI API Key |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
openai_api_key: ${env.OPENAI_API_KEY:+}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
13
docs/source/providers/scoring/inline_llm-as-judge.md
Normal file
13
docs/source/providers/scoring/inline_llm-as-judge.md
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# inline::llm-as-judge
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
LLM-as-judge scoring provider that uses language models to evaluate and score responses.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
5
docs/source/providers/telemetry/index.md
Normal file
5
docs/source/providers/telemetry/index.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Telemetry Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **telemetry** API.
|
||||||
|
|
||||||
|
- [inline::meta-reference](inline_meta-reference.md)
|
25
docs/source/providers/telemetry/inline_meta-reference.md
Normal file
25
docs/source/providers/telemetry/inline_meta-reference.md
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# inline::meta-reference
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Meta's reference implementation of telemetry and observability using OpenTelemetry.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `otel_trace_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL for traces |
|
||||||
|
| `otel_metric_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL for metrics |
|
||||||
|
| `service_name` | `<class 'str'>` | No | | The service name to use for telemetry |
|
||||||
|
| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.CONSOLE: 'console'>, <TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel, sqlite, console) |
|
||||||
|
| `sqlite_db_path` | `<class 'str'>` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||||
|
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||||
|
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/trace_store.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
10
docs/source/providers/tool_runtime/index.md
Normal file
10
docs/source/providers/tool_runtime/index.md
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# Tool_Runtime Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **tool_runtime** API.
|
||||||
|
|
||||||
|
- [inline::rag-runtime](inline_rag-runtime.md)
|
||||||
|
- [remote::bing-search](remote_bing-search.md)
|
||||||
|
- [remote::brave-search](remote_brave-search.md)
|
||||||
|
- [remote::model-context-protocol](remote_model-context-protocol.md)
|
||||||
|
- [remote::tavily-search](remote_tavily-search.md)
|
||||||
|
- [remote::wolfram-alpha](remote_wolfram-alpha.md)
|
13
docs/source/providers/tool_runtime/inline_rag-runtime.md
Normal file
13
docs/source/providers/tool_runtime/inline_rag-runtime.md
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# inline::rag-runtime
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
20
docs/source/providers/tool_runtime/remote_bing-search.md
Normal file
20
docs/source/providers/tool_runtime/remote_bing-search.md
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# remote::bing-search
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Bing Search tool for web search capabilities using Microsoft's search engine.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | |
|
||||||
|
| `top_k` | `<class 'int'>` | No | 3 | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.BING_API_KEY:}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/tool_runtime/remote_brave-search.md
Normal file
21
docs/source/providers/tool_runtime/remote_brave-search.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::brave-search
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Brave Search tool for web search capabilities with privacy-focused results.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Brave Search API Key |
|
||||||
|
| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
||||||
|
max_results: 3
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
# remote::model-context-protocol
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Model Context Protocol (MCP) tool for standardized tool calling and context management.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
21
docs/source/providers/tool_runtime/remote_tavily-search.md
Normal file
21
docs/source/providers/tool_runtime/remote_tavily-search.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# remote::tavily-search
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Tavily Search tool for AI-optimized web search with structured results.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | The Tavily Search API Key |
|
||||||
|
| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
|
||||||
|
max_results: 3
|
||||||
|
|
||||||
|
```
|
||||||
|
|
19
docs/source/providers/tool_runtime/remote_wolfram-alpha.md
Normal file
19
docs/source/providers/tool_runtime/remote_wolfram-alpha.md
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# remote::wolfram-alpha
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Wolfram Alpha tool for computational knowledge and mathematical calculations.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `str \| None` | No | | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
16
docs/source/providers/vector_io/index.md
Normal file
16
docs/source/providers/vector_io/index.md
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
# Vector_Io Providers
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **vector_io** API.
|
||||||
|
|
||||||
|
- [inline::chromadb](inline_chromadb.md)
|
||||||
|
- [inline::faiss](inline_faiss.md)
|
||||||
|
- [inline::meta-reference](inline_meta-reference.md)
|
||||||
|
- [inline::milvus](inline_milvus.md)
|
||||||
|
- [inline::qdrant](inline_qdrant.md)
|
||||||
|
- [inline::sqlite-vec](inline_sqlite-vec.md)
|
||||||
|
- [inline::sqlite_vec](inline_sqlite_vec.md)
|
||||||
|
- [remote::chromadb](remote_chromadb.md)
|
||||||
|
- [remote::milvus](remote_milvus.md)
|
||||||
|
- [remote::pgvector](remote_pgvector.md)
|
||||||
|
- [remote::qdrant](remote_qdrant.md)
|
||||||
|
- [remote::weaviate](remote_weaviate.md)
|
52
docs/source/providers/vector_io/inline_chromadb.md
Normal file
52
docs/source/providers/vector_io/inline_chromadb.md
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
# inline::chromadb
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
|
||||||
|
[Chroma](https://www.trychroma.com/) is an inline and remote vector
|
||||||
|
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
|
||||||
|
That means you're not limited to storing vectors in memory or in a separate service.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
Chroma supports:
|
||||||
|
- Store embeddings and their metadata
|
||||||
|
- Vector search
|
||||||
|
- Full-text search
|
||||||
|
- Document storage
|
||||||
|
- Metadata filtering
|
||||||
|
- Multi-modal retrieval
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To use Chrome in your Llama Stack project, follow these steps:
|
||||||
|
|
||||||
|
1. Install the necessary dependencies.
|
||||||
|
2. Configure your Llama Stack project to use chroma.
|
||||||
|
3. Start storing and querying vectors.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
You can install chroma using pip:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install chromadb
|
||||||
|
```
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
db_path: ${env.CHROMADB_PATH}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# inline::faiss
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Faiss
|
|
||||||
|
|
||||||
[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It
|
[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It
|
||||||
allows you to store and query vectors directly in memory.
|
allows you to store and query vectors directly in memory.
|
||||||
|
@ -31,3 +31,21 @@ pip install faiss-cpu
|
||||||
## Documentation
|
## Documentation
|
||||||
See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for
|
See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for
|
||||||
more details about Faiss in general.
|
more details about Faiss in general.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
26
docs/source/providers/vector_io/inline_meta-reference.md
Normal file
26
docs/source/providers/vector_io/inline_meta-reference.md
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
# inline::meta-reference
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Meta's reference implementation of a vector database.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deprecation Notice
|
||||||
|
|
||||||
|
⚠️ **Warning**: Please use the `inline::faiss` provider instead.
|
||||||
|
|
26
docs/source/providers/vector_io/inline_milvus.md
Normal file
26
docs/source/providers/vector_io/inline_milvus.md
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
# inline::milvus
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
|
||||||
|
Please refer to the remote provider documentation.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy/milvus.db}
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/${env.MILVUS_KVSTORE_DB_PATH:=~/.llama/dummy/milvus_registry.db}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# inline::qdrant
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Qdrant
|
|
||||||
|
|
||||||
[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
|
[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
|
||||||
allows you to store and query vectors directly in memory.
|
allows you to store and query vectors directly in memory.
|
||||||
|
@ -44,3 +44,18 @@ docker pull qdrant/qdrant
|
||||||
```
|
```
|
||||||
## Documentation
|
## Documentation
|
||||||
See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general.
|
See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `path` | `<class 'str'>` | No | PydanticUndefined | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# inline::sqlite-vec
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# SQLite-Vec
|
|
||||||
|
|
||||||
[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It
|
[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It
|
||||||
allows you to store and query vectors directly within an SQLite database.
|
allows you to store and query vectors directly within an SQLite database.
|
||||||
|
@ -199,3 +199,18 @@ pip install sqlite-vec
|
||||||
See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
|
See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
|
||||||
|
|
||||||
[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
|
[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
25
docs/source/providers/vector_io/inline_sqlite_vec.md
Normal file
25
docs/source/providers/vector_io/inline_sqlite_vec.md
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# inline::sqlite_vec
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
|
||||||
|
Please refer to the sqlite-vec provider documentation.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deprecation Notice
|
||||||
|
|
||||||
|
⚠️ **Warning**: Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# remote::chromadb
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Chroma
|
|
||||||
|
|
||||||
[Chroma](https://www.trychroma.com/) is an inline and remote vector
|
[Chroma](https://www.trychroma.com/) is an inline and remote vector
|
||||||
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
|
database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
|
||||||
|
@ -34,3 +34,18 @@ pip install chromadb
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
|
See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `url` | `str \| None` | No | PydanticUndefined | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
url: ${env.CHROMADB_URL}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# remote::milvus
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Milvus
|
|
||||||
|
|
||||||
[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
|
[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
|
||||||
allows you to store and query vectors directly within a Milvus database.
|
allows you to store and query vectors directly within a Milvus database.
|
||||||
|
@ -96,7 +96,7 @@ vector_io:
|
||||||
#### Key Parameters for TLS Configuration
|
#### Key Parameters for TLS Configuration
|
||||||
|
|
||||||
- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
|
- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
|
||||||
- **`server_pem_path`**: Path to the **server certificate** for verifying the server’s identity (used in one-way TLS).
|
- **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS).
|
||||||
- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
|
- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
|
||||||
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
|
||||||
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
|
||||||
|
@ -105,3 +105,24 @@ vector_io:
|
||||||
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
|
||||||
|
|
||||||
For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
|
For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `uri` | `<class 'str'>` | No | PydanticUndefined | The URI of the Milvus server |
|
||||||
|
| `token` | `str \| None` | No | PydanticUndefined | The token of the Milvus server |
|
||||||
|
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
|
||||||
|
| `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
|
||||||
|
|
||||||
|
> **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider.
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
uri: ${env.MILVUS_ENDPOINT}
|
||||||
|
token: ${env.MILVUS_TOKEN}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# remote::pgvector
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Postgres PGVector
|
|
||||||
|
|
||||||
[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It
|
[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It
|
||||||
allows you to store and query vectors directly in memory.
|
allows you to store and query vectors directly in memory.
|
||||||
|
@ -29,3 +29,26 @@ docker pull pgvector/pgvector:pg17
|
||||||
```
|
```
|
||||||
## Documentation
|
## Documentation
|
||||||
See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
|
See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `host` | `str \| None` | No | localhost | |
|
||||||
|
| `port` | `int \| None` | No | 5432 | |
|
||||||
|
| `db` | `str \| None` | No | postgres | |
|
||||||
|
| `user` | `str \| None` | No | postgres | |
|
||||||
|
| `password` | `str \| None` | No | mysecretpassword | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
host: ${env.PGVECTOR_HOST:=localhost}
|
||||||
|
port: ${env.PGVECTOR_PORT:=5432}
|
||||||
|
db: ${env.PGVECTOR_DB}
|
||||||
|
user: ${env.PGVECTOR_USER}
|
||||||
|
password: ${env.PGVECTOR_PASSWORD}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
30
docs/source/providers/vector_io/remote_qdrant.md
Normal file
30
docs/source/providers/vector_io/remote_qdrant.md
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
# remote::qdrant
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
|
||||||
|
Please refer to the inline provider documentation.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `location` | `str \| None` | No | | |
|
||||||
|
| `url` | `str \| None` | No | | |
|
||||||
|
| `port` | `int \| None` | No | 6333 | |
|
||||||
|
| `grpc_port` | `<class 'int'>` | No | 6334 | |
|
||||||
|
| `prefer_grpc` | `<class 'bool'>` | No | False | |
|
||||||
|
| `https` | `bool \| None` | No | | |
|
||||||
|
| `api_key` | `str \| None` | No | | |
|
||||||
|
| `prefix` | `str \| None` | No | | |
|
||||||
|
| `timeout` | `int \| None` | No | | |
|
||||||
|
| `host` | `str \| None` | No | | |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.QDRANT_API_KEY}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
# remote::weaviate
|
||||||
orphan: true
|
|
||||||
---
|
## Description
|
||||||
# Weaviate
|
|
||||||
|
|
||||||
[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack.
|
[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack.
|
||||||
It allows you to store and query vectors directly within a Weaviate database.
|
It allows you to store and query vectors directly within a Weaviate database.
|
||||||
|
@ -31,3 +31,12 @@ To install Weaviate see the [Weaviate quickstart documentation](https://weaviate
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
|
See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
|
||||||
|
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
@ -32,6 +32,7 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
|
||||||
from .openai_responses import (
|
from .openai_responses import (
|
||||||
ListOpenAIResponseInputItem,
|
ListOpenAIResponseInputItem,
|
||||||
ListOpenAIResponseObject,
|
ListOpenAIResponseObject,
|
||||||
|
OpenAIDeleteResponseObject,
|
||||||
OpenAIResponseInput,
|
OpenAIResponseInput,
|
||||||
OpenAIResponseInputTool,
|
OpenAIResponseInputTool,
|
||||||
OpenAIResponseObject,
|
OpenAIResponseObject,
|
||||||
|
@ -647,3 +648,12 @@ class Agents(Protocol):
|
||||||
:returns: An ListOpenAIResponseInputItem.
|
:returns: An ListOpenAIResponseInputItem.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE")
|
||||||
|
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||||
|
"""Delete an OpenAI response by its ID.
|
||||||
|
|
||||||
|
:param response_id: The ID of the OpenAI response to delete.
|
||||||
|
:returns: An OpenAIDeleteResponseObject
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
|
@ -229,6 +229,13 @@ class OpenAIResponseObject(BaseModel):
|
||||||
user: str | None = None
|
user: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OpenAIDeleteResponseObject(BaseModel):
|
||||||
|
id: str
|
||||||
|
object: Literal["response"] = "response"
|
||||||
|
deleted: bool = True
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseCreated(BaseModel):
|
class OpenAIResponseObjectStreamResponseCreated(BaseModel):
|
||||||
response: OpenAIResponseObject
|
response: OpenAIResponseObject
|
||||||
|
|
|
@ -141,6 +141,12 @@ Fully-qualified name of the module to import. The module is expected to have:
|
||||||
provider_data_validator: str | None = Field(
|
provider_data_validator: str | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
|
description: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="""
|
||||||
|
A description of the provider. This is used to display in the documentation.
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
@ -167,6 +173,12 @@ Fully-qualified name of the module to import. The module is expected to have:
|
||||||
provider_data_validator: str | None = Field(
|
provider_data_validator: str | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
|
description: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="""
|
||||||
|
A description of the provider. This is used to display in the documentation.
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RemoteProviderConfig(BaseModel):
|
class RemoteProviderConfig(BaseModel):
|
||||||
|
|
|
@ -359,3 +359,6 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
return await self.openai_responses_impl.list_openai_response_input_items(
|
return await self.openai_responses_impl.list_openai_response_input_items(
|
||||||
response_id, after, before, include, limit, order
|
response_id, after, before, include, limit, order
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def delete_openai_response(self, response_id: str) -> None:
|
||||||
|
return await self.openai_responses_impl.delete_openai_response(response_id)
|
||||||
|
|
|
@ -19,6 +19,7 @@ from llama_stack.apis.agents.openai_responses import (
|
||||||
AllowedToolsFilter,
|
AllowedToolsFilter,
|
||||||
ListOpenAIResponseInputItem,
|
ListOpenAIResponseInputItem,
|
||||||
ListOpenAIResponseObject,
|
ListOpenAIResponseObject,
|
||||||
|
OpenAIDeleteResponseObject,
|
||||||
OpenAIResponseInput,
|
OpenAIResponseInput,
|
||||||
OpenAIResponseInputFunctionToolCallOutput,
|
OpenAIResponseInputFunctionToolCallOutput,
|
||||||
OpenAIResponseInputMessageContent,
|
OpenAIResponseInputMessageContent,
|
||||||
|
@ -574,6 +575,9 @@ class OpenAIResponsesImpl:
|
||||||
input=input,
|
input=input,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
||||||
|
return await self.responses_store.delete_response_object(response_id)
|
||||||
|
|
||||||
async def _convert_response_tools_to_chat_tools(
|
async def _convert_response_tools_to_chat_tools(
|
||||||
self, tools: list[OpenAIResponseInputTool]
|
self, tools: list[OpenAIResponseInputTool]
|
||||||
) -> tuple[
|
) -> tuple[
|
||||||
|
|
|
@ -38,7 +38,7 @@ class TelemetryConfig(BaseModel):
|
||||||
description="List of telemetry sinks to enable (possible values: otel, sqlite, console)",
|
description="List of telemetry sinks to enable (possible values: otel, sqlite, console)",
|
||||||
)
|
)
|
||||||
sqlite_db_path: str = Field(
|
sqlite_db_path: str = Field(
|
||||||
default=(RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
|
default_factory=lambda: (RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
|
||||||
description="The path to the SQLite database to use for storing traces",
|
description="The path to the SQLite database to use for storing traces",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -35,5 +35,6 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
Api.tool_runtime,
|
Api.tool_runtime,
|
||||||
Api.tool_groups,
|
Api.tool_groups,
|
||||||
],
|
],
|
||||||
|
description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.inline.datasetio.localfs",
|
module="llama_stack.providers.inline.datasetio.localfs",
|
||||||
config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig",
|
config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig",
|
||||||
api_dependencies=[],
|
api_dependencies=[],
|
||||||
|
description="Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.datasetio,
|
api=Api.datasetio,
|
||||||
|
@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.datasetio.huggingface",
|
module="llama_stack.providers.remote.datasetio.huggingface",
|
||||||
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
|
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
|
||||||
|
description="HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.datasetio.nvidia",
|
module="llama_stack.providers.remote.datasetio.nvidia",
|
||||||
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
|
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
|
||||||
|
description="NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
Api.inference,
|
Api.inference,
|
||||||
Api.agents,
|
Api.agents,
|
||||||
],
|
],
|
||||||
|
description="Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.eval,
|
api=Api.eval,
|
||||||
|
@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.eval.nvidia",
|
module="llama_stack.providers.remote.eval.nvidia",
|
||||||
config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig",
|
config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig",
|
||||||
|
description="NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.",
|
||||||
),
|
),
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
Api.datasetio,
|
Api.datasetio,
|
||||||
|
|
|
@ -21,5 +21,6 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=sql_store_pip_packages,
|
pip_packages=sql_store_pip_packages,
|
||||||
module="llama_stack.providers.inline.files.localfs",
|
module="llama_stack.providers.inline.files.localfs",
|
||||||
config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
|
config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
|
||||||
|
description="Local filesystem-based file storage provider for managing files and documents locally.",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -35,6 +35,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=META_REFERENCE_DEPS,
|
pip_packages=META_REFERENCE_DEPS,
|
||||||
module="llama_stack.providers.inline.inference.meta_reference",
|
module="llama_stack.providers.inline.inference.meta_reference",
|
||||||
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
|
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
|
||||||
|
description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
|
@ -44,6 +45,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.inline.inference.vllm",
|
module="llama_stack.providers.inline.inference.vllm",
|
||||||
config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
|
config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
|
||||||
|
description="vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
|
@ -54,6 +56,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.inline.inference.sentence_transformers",
|
module="llama_stack.providers.inline.inference.sentence_transformers",
|
||||||
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
|
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
|
||||||
|
description="Sentence Transformers inference provider for text embeddings and similarity search.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
|
@ -64,6 +67,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.inference.cerebras",
|
module="llama_stack.providers.remote.inference.cerebras",
|
||||||
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
||||||
|
description="Cerebras inference provider for running models on Cerebras Cloud platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -73,6 +77,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
|
pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
|
||||||
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
||||||
module="llama_stack.providers.remote.inference.ollama",
|
module="llama_stack.providers.remote.inference.ollama",
|
||||||
|
description="Ollama inference provider for running local models through the Ollama runtime.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["openai"],
|
pip_packages=["openai"],
|
||||||
module="llama_stack.providers.remote.inference.vllm",
|
module="llama_stack.providers.remote.inference.vllm",
|
||||||
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
||||||
|
description="Remote vLLM inference provider for connecting to vLLM servers.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -91,6 +97,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["huggingface_hub", "aiohttp"],
|
pip_packages=["huggingface_hub", "aiohttp"],
|
||||||
module="llama_stack.providers.remote.inference.tgi",
|
module="llama_stack.providers.remote.inference.tgi",
|
||||||
config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
|
config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
|
||||||
|
description="Text Generation Inference (TGI) provider for HuggingFace model serving.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -100,6 +107,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["huggingface_hub", "aiohttp"],
|
pip_packages=["huggingface_hub", "aiohttp"],
|
||||||
module="llama_stack.providers.remote.inference.tgi",
|
module="llama_stack.providers.remote.inference.tgi",
|
||||||
config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
|
config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
|
||||||
|
description="HuggingFace Inference API serverless provider for on-demand model inference.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -109,6 +117,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["huggingface_hub", "aiohttp"],
|
pip_packages=["huggingface_hub", "aiohttp"],
|
||||||
module="llama_stack.providers.remote.inference.tgi",
|
module="llama_stack.providers.remote.inference.tgi",
|
||||||
config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
|
config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
|
||||||
|
description="HuggingFace Inference Endpoints provider for dedicated model serving.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -121,6 +130,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.fireworks",
|
module="llama_stack.providers.remote.inference.fireworks",
|
||||||
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
|
||||||
|
description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -133,6 +143,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.together",
|
module="llama_stack.providers.remote.inference.together",
|
||||||
config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
|
config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
|
||||||
|
description="Together AI inference provider for open-source models and collaborative AI development.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -142,6 +153,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["boto3"],
|
pip_packages=["boto3"],
|
||||||
module="llama_stack.providers.remote.inference.bedrock",
|
module="llama_stack.providers.remote.inference.bedrock",
|
||||||
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
|
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
|
||||||
|
description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -153,6 +165,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.inference.databricks",
|
module="llama_stack.providers.remote.inference.databricks",
|
||||||
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
||||||
|
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -164,6 +177,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.remote.inference.nvidia",
|
module="llama_stack.providers.remote.inference.nvidia",
|
||||||
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
||||||
|
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -173,6 +187,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["openai"],
|
pip_packages=["openai"],
|
||||||
module="llama_stack.providers.remote.inference.runpod",
|
module="llama_stack.providers.remote.inference.runpod",
|
||||||
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
||||||
|
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -183,6 +198,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.openai",
|
module="llama_stack.providers.remote.inference.openai",
|
||||||
config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
|
config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
|
||||||
|
description="OpenAI inference provider for accessing GPT models and other OpenAI services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -193,6 +209,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.anthropic",
|
module="llama_stack.providers.remote.inference.anthropic",
|
||||||
config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
|
config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
|
||||||
|
description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -203,6 +220,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.gemini",
|
module="llama_stack.providers.remote.inference.gemini",
|
||||||
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
||||||
|
description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -213,6 +231,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.groq",
|
module="llama_stack.providers.remote.inference.groq",
|
||||||
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
||||||
|
description="Groq inference provider for ultra-fast inference using Groq's LPU technology.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -223,6 +242,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.fireworks_openai_compat",
|
module="llama_stack.providers.remote.inference.fireworks_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig",
|
config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator",
|
||||||
|
description="Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -233,6 +253,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.llama_openai_compat",
|
module="llama_stack.providers.remote.inference.llama_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
|
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
|
||||||
|
description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -243,6 +264,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.together_openai_compat",
|
module="llama_stack.providers.remote.inference.together_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig",
|
config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator",
|
||||||
|
description="Together AI OpenAI-compatible provider for using Together models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -253,6 +275,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.groq_openai_compat",
|
module="llama_stack.providers.remote.inference.groq_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig",
|
config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator",
|
||||||
|
description="Groq OpenAI-compatible provider for using Groq models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -263,6 +286,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.sambanova_openai_compat",
|
module="llama_stack.providers.remote.inference.sambanova_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig",
|
config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator",
|
||||||
|
description="SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -273,6 +297,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.cerebras_openai_compat",
|
module="llama_stack.providers.remote.inference.cerebras_openai_compat",
|
||||||
config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig",
|
config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator",
|
||||||
|
description="Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -283,6 +308,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.sambanova",
|
module="llama_stack.providers.remote.inference.sambanova",
|
||||||
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
||||||
|
description="SambaNova inference provider for running models on SambaNova's dataflow architecture.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -293,6 +319,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.passthrough",
|
module="llama_stack.providers.remote.inference.passthrough",
|
||||||
config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig",
|
config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
|
||||||
|
description="Passthrough inference provider for connecting to any external inference service not directly supported.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -303,6 +330,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.inference.watsonx",
|
module="llama_stack.providers.remote.inference.watsonx",
|
||||||
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
||||||
|
description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
Api.datasetio,
|
Api.datasetio,
|
||||||
Api.datasets,
|
Api.datasets,
|
||||||
],
|
],
|
||||||
|
description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.post_training,
|
api=Api.post_training,
|
||||||
|
@ -31,6 +32,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
Api.datasetio,
|
Api.datasetio,
|
||||||
Api.datasets,
|
Api.datasets,
|
||||||
],
|
],
|
||||||
|
description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.post_training,
|
api=Api.post_training,
|
||||||
|
@ -39,6 +41,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["requests", "aiohttp"],
|
pip_packages=["requests", "aiohttp"],
|
||||||
module="llama_stack.providers.remote.post_training.nvidia",
|
module="llama_stack.providers.remote.post_training.nvidia",
|
||||||
config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig",
|
config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig",
|
||||||
|
description="NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.inline.safety.prompt_guard",
|
module="llama_stack.providers.inline.safety.prompt_guard",
|
||||||
config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig",
|
config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig",
|
||||||
|
description="Prompt Guard safety provider for detecting and filtering unsafe prompts and content.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.safety,
|
api=Api.safety,
|
||||||
|
@ -35,6 +36,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
Api.inference,
|
Api.inference,
|
||||||
],
|
],
|
||||||
|
description="Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.safety,
|
api=Api.safety,
|
||||||
|
@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.inline.safety.code_scanner",
|
module="llama_stack.providers.inline.safety.code_scanner",
|
||||||
config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig",
|
config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig",
|
||||||
|
description="Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.safety,
|
api=Api.safety,
|
||||||
|
@ -52,6 +55,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["boto3"],
|
pip_packages=["boto3"],
|
||||||
module="llama_stack.providers.remote.safety.bedrock",
|
module="llama_stack.providers.remote.safety.bedrock",
|
||||||
config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig",
|
config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig",
|
||||||
|
description="AWS Bedrock safety provider for content moderation using AWS's safety services.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -61,6 +65,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
pip_packages=["requests"],
|
pip_packages=["requests"],
|
||||||
module="llama_stack.providers.remote.safety.nvidia",
|
module="llama_stack.providers.remote.safety.nvidia",
|
||||||
config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig",
|
config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig",
|
||||||
|
description="NVIDIA's safety provider for content moderation and safety filtering.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -71,6 +76,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
module="llama_stack.providers.remote.safety.sambanova",
|
module="llama_stack.providers.remote.safety.sambanova",
|
||||||
config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
|
config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
|
||||||
provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
|
||||||
|
description="SambaNova's safety provider for content moderation and safety filtering.",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue