This commit is contained in:
Sébastien Han 2025-07-25 17:05:58 +02:00 committed by GitHub
commit 7a5be8d0c0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 607 additions and 319 deletions

View file

@ -186,6 +186,35 @@ docker run \
--port $LLAMA_STACK_PORT
```
To run the distribution with a Postgres store, you can use the following command to override the entrypoint:
```bash
docker run \
-it \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-e OPENAI_API_KEY=your_openai_key \
-e FIREWORKS_API_KEY=your_fireworks_key \
-e TOGETHER_API_KEY=your_together_key \
-e POSTGRES_HOST=your_postgres_host \
-e POSTGRES_PORT=your_postgres_port \
-e POSTGRES_DB=your_postgres_db \
-e POSTGRES_USER=your_postgres_user \
-e POSTGRES_PASSWORD=your_postgres_password \
--entrypoint python \
llamastack/distribution-starter \
-m llama_stack.distribution.server.server \
--config run-with-postgres-store.yaml
```
Postgres environment variables:
- `POSTGRES_HOST`: Postgres host (default: `localhost`)
- `POSTGRES_PORT`: Postgres port (default: `5432`)
- `POSTGRES_DB`: Postgres database name (default: `llamastack`)
- `POSTGRES_USER`: Postgres username (default: `llamastack`)
- `POSTGRES_PASSWORD`: Postgres password (default: `llamastack`)
### Via Conda or venv
Ensure you have configured the starter distribution using the environment variables explained above.

View file

@ -27,6 +27,9 @@ RUN_CONFIG_PATH=/app/run.yaml
BUILD_CONTEXT_DIR=$(pwd)
# Placeholder for template files
TEMPLATE_FILES=()
set -euo pipefail
# Define color codes
@ -272,6 +275,19 @@ EOF
add_to_container << EOF
COPY run.yaml $RUN_CONFIG_PATH
EOF
# This is a template, we don't need to copy the run config but we still want to include potential
# other run files like -with-postgres-store.yaml or -with-safety.yaml
else
template_files=$(find "$BUILD_CONTEXT_DIR"/llama_stack/templates/"$template_or_config" -name "*.yaml" -not -name "run.yaml" -not -name "build.yaml")
echo "Copying template files: $template_files"
for file in $template_files; do
template_file_name=$(basename "$file")
TEMPLATE_FILES+=("$template_file_name")
cp "$file" "$BUILD_CONTEXT_DIR/$template_file_name"
add_to_container << EOF
COPY $template_file_name /app/$template_file_name
EOF
done
fi
stack_mount="/app/llama-stack-source"
@ -333,6 +349,11 @@ fi
RUN pip uninstall -y uv
EOF
# We can have a custom entrypoint.sh that figures out which server command to run based on something
# simple, like an env var. So if someone wants to use Postgres, the script will kick off the server
# with the right run YAML. Or, if they prefer, they can just override the entrypoint themselves and
# point it to whatever run YAML they want.
#
# If a run config is provided, we use the --config flag
if [[ -n "$run_config" ]]; then
add_to_container << EOF
@ -414,7 +435,8 @@ $CONTAINER_BINARY build \
"$BUILD_CONTEXT_DIR"
# clean up tmp/configs
rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR"
rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR" "${TEMPLATE_FILES[@]}"
set +x
echo "Success!"

View file

@ -98,4 +98,5 @@ image_name: ci-tests
additional_pip_packages:
- aiosqlite
- asyncpg
- psycopg2-binary
- sqlalchemy[asyncio]

View file

@ -0,0 +1,260 @@
version: 2
image_name: ci-tests
apis:
- agents
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: ${env.ENABLE_CEREBRAS:=__disabled__}
provider_type: remote::cerebras
config:
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY}
- provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
- provider_id: ${env.ENABLE_VLLM:=__disabled__}
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.ENABLE_TGI:=__disabled__}
provider_type: remote::tgi
config:
url: ${env.TGI_URL}
- provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__}
provider_type: remote::hf::serverless
config:
huggingface_repo: ${env.INFERENCE_MODEL}
api_token: ${env.HF_API_TOKEN}
- provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__}
provider_type: remote::hf::endpoint
config:
endpoint_name: ${env.INFERENCE_ENDPOINT_NAME}
api_token: ${env.HF_API_TOKEN}
- provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY}
- provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY}
- provider_id: ${env.ENABLE_BEDROCK:=__disabled__}
provider_type: remote::bedrock
- provider_id: ${env.ENABLE_DATABRICKS:=__disabled__}
provider_type: remote::databricks
config:
url: ${env.DATABRICKS_URL}
api_token: ${env.DATABRICKS_API_TOKEN}
- provider_id: ${env.ENABLE_NVIDIA:=__disabled__}
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: ${env.ENABLE_RUNPOD:=__disabled__}
provider_type: remote::runpod
config:
url: ${env.RUNPOD_URL:=}
api_token: ${env.RUNPOD_API_TOKEN}
- provider_id: ${env.ENABLE_OPENAI:=__disabled__}
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY}
- provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY}
- provider_id: ${env.ENABLE_GEMINI:=__disabled__}
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY}
- provider_id: ${env.ENABLE_GROQ:=__disabled__}
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY}
- provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__}
provider_type: remote::llama-openai-compat
config:
openai_compat_api_base: https://api.llama.com/compat/v1/
api_key: ${env.LLAMA_API_KEY}
- provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
provider_type: remote::sambanova
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY}
- provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__}
provider_type: remote::passthrough
config:
url: ${env.PASSTHROUGH_URL}
api_key: ${env.PASSTHROUGH_API_KEY}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: ${env.ENABLE_FAISS:=faiss}
provider_type: inline::faiss
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db
- provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__}
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db
- provider_id: ${env.ENABLE_MILVUS:=__disabled__}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db
- provider_id: ${env.ENABLE_CHROMADB:=__disabled__}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/chroma_remote_registry.db
- provider_id: ${env.ENABLE_PGVECTOR:=__disabled__}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB}
user: ${env.PGVECTOR_USER}
password: ${env.PGVECTOR_PASSWORD}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db
files:
- provider_id: localfs
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
post_training:
- provider_id: huggingface
provider_type: inline::huggingface
config:
checkpoint_format: huggingface
distributed_backend: null
device: cpu
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
metadata_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
inference_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups: []
server:
port: 8321

View file

@ -112,39 +112,39 @@ providers:
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db
- provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__}
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db
- provider_id: ${env.ENABLE_MILVUS:=__disabled__}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db
- provider_id: ${env.ENABLE_CHROMADB:=__disabled__}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
url: ${env.CHROMADB_URL}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/chroma_remote_registry.db
- provider_id: ${env.ENABLE_PGVECTOR:=__disabled__}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
db: ${env.PGVECTOR_DB}
user: ${env.PGVECTOR_USER}
password: ${env.PGVECTOR_PASSWORD}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db
files:
- provider_id: meta-reference-files
provider_type: inline::localfs

View file

@ -1,7 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .postgres_demo import get_distribution_template # noqa: F401

View file

@ -1,36 +0,0 @@
version: 2
distribution_spec:
description: Quick start template for running Llama Stack with several popular providers
providers:
inference:
- provider_id: vllm-inference
provider_type: remote::vllm
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: chromadb
provider_type: remote::chromadb
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
- provider_id: tavily-search
provider_type: remote::tavily-search
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
image_type: conda
image_name: postgres-demo
additional_pip_packages:
- asyncpg
- psycopg2-binary
- sqlalchemy[asyncio]

View file

@ -1,148 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.models import ModelType
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
from llama_stack.templates.template import (
DistributionTemplate,
RunConfigSettings,
)
def get_distribution_template() -> DistributionTemplate:
inference_providers = [
Provider(
provider_id="vllm-inference",
provider_type="remote::vllm",
config=VLLMInferenceAdapterConfig.sample_run_config(
url="${env.VLLM_URL:=http://localhost:8000/v1}",
),
),
]
providers = {
"inference": inference_providers
+ [
Provider(provider_id="sentence-transformers", provider_type="inline::sentence-transformers"),
],
"vector_io": [
Provider(provider_id="chromadb", provider_type="remote::chromadb"),
],
"safety": [Provider(provider_id="llama-guard", provider_type="inline::llama-guard")],
"agents": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
"telemetry": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
"tool_runtime": [
Provider(provider_id="brave-search", provider_type="remote::brave-search"),
Provider(provider_id="tavily-search", provider_type="remote::tavily-search"),
Provider(provider_id="rag-runtime", provider_type="inline::rag-runtime"),
Provider(
provider_id="model-context-protocol",
provider_type="remote::model-context-protocol",
),
],
}
name = "postgres-demo"
vector_io_providers = [
Provider(
provider_id="${env.ENABLE_CHROMADB:+chromadb}",
provider_type="remote::chromadb",
config=ChromaVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
url="${env.CHROMADB_URL:=}",
),
),
]
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::rag",
provider_id="rag-runtime",
),
]
default_models = [
ModelInput(
model_id="${env.INFERENCE_MODEL}",
provider_id="vllm-inference",
)
]
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
provider_id=embedding_provider.provider_id,
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 384,
},
)
postgres_config = PostgresSqlStoreConfig.sample_run_config()
return DistributionTemplate(
name=name,
distro_type="self_hosted",
description="Quick start template for running Llama Stack with several popular providers",
container_image=None,
template_path=None,
providers=providers,
available_models_by_provider={},
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": inference_providers + [embedding_provider],
"vector_io": vector_io_providers,
"agents": [
Provider(
provider_id="meta-reference",
provider_type="inline::meta-reference",
config=dict(
persistence_store=postgres_config,
responses_store=postgres_config,
),
)
],
"telemetry": [
Provider(
provider_id="meta-reference",
provider_type="inline::meta-reference",
config=dict(
service_name="${env.OTEL_SERVICE_NAME:=\u200b}",
sinks="${env.TELEMETRY_SINKS:=console,otel_trace}",
otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}",
),
)
],
},
default_models=default_models + [embedding_model],
default_tool_groups=default_tool_groups,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
metadata_store=PostgresKVStoreConfig.sample_run_config(),
inference_store=postgres_config,
),
},
run_config_env_vars={
"LLAMA_STACK_PORT": (
"8321",
"Port for the Llama Stack distribution server",
),
},
)

View file

@ -1,111 +0,0 @@
version: 2
image_name: postgres-demo
apis:
- agents
- inference
- safety
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/chroma_remote_registry.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console,otel_trace}
otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
metadata_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
inference_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321

View file

@ -98,4 +98,5 @@ image_name: starter
additional_pip_packages:
- aiosqlite
- asyncpg
- psycopg2-binary
- sqlalchemy[asyncio]

View file

@ -0,0 +1,260 @@
version: 2
image_name: starter
apis:
- agents
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: ${env.ENABLE_CEREBRAS:=__disabled__}
provider_type: remote::cerebras
config:
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY}
- provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
- provider_id: ${env.ENABLE_VLLM:=__disabled__}
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.ENABLE_TGI:=__disabled__}
provider_type: remote::tgi
config:
url: ${env.TGI_URL}
- provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__}
provider_type: remote::hf::serverless
config:
huggingface_repo: ${env.INFERENCE_MODEL}
api_token: ${env.HF_API_TOKEN}
- provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__}
provider_type: remote::hf::endpoint
config:
endpoint_name: ${env.INFERENCE_ENDPOINT_NAME}
api_token: ${env.HF_API_TOKEN}
- provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY}
- provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY}
- provider_id: ${env.ENABLE_BEDROCK:=__disabled__}
provider_type: remote::bedrock
- provider_id: ${env.ENABLE_DATABRICKS:=__disabled__}
provider_type: remote::databricks
config:
url: ${env.DATABRICKS_URL}
api_token: ${env.DATABRICKS_API_TOKEN}
- provider_id: ${env.ENABLE_NVIDIA:=__disabled__}
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: ${env.ENABLE_RUNPOD:=__disabled__}
provider_type: remote::runpod
config:
url: ${env.RUNPOD_URL:=}
api_token: ${env.RUNPOD_API_TOKEN}
- provider_id: ${env.ENABLE_OPENAI:=__disabled__}
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY}
- provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY}
- provider_id: ${env.ENABLE_GEMINI:=__disabled__}
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY}
- provider_id: ${env.ENABLE_GROQ:=__disabled__}
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY}
- provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__}
provider_type: remote::llama-openai-compat
config:
openai_compat_api_base: https://api.llama.com/compat/v1/
api_key: ${env.LLAMA_API_KEY}
- provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
provider_type: remote::sambanova
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY}
- provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__}
provider_type: remote::passthrough
config:
url: ${env.PASSTHROUGH_URL}
api_key: ${env.PASSTHROUGH_API_KEY}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: ${env.ENABLE_FAISS:=faiss}
provider_type: inline::faiss
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
- provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__}
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
- provider_id: ${env.ENABLE_MILVUS:=__disabled__}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
- provider_id: ${env.ENABLE_CHROMADB:=__disabled__}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/chroma_remote_registry.db
- provider_id: ${env.ENABLE_PGVECTOR:=__disabled__}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB}
user: ${env.PGVECTOR_USER}
password: ${env.PGVECTOR_PASSWORD}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
files:
- provider_id: localfs
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
post_training:
- provider_id: huggingface
provider_type: inline::huggingface
config:
checkpoint_format: huggingface
distributed_backend: null
device: cpu
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
metadata_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
inference_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups: []
server:
port: 8321

View file

@ -130,18 +130,18 @@ providers:
- provider_id: ${env.ENABLE_CHROMADB:=__disabled__}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
url: ${env.CHROMADB_URL}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/chroma_remote_registry.db
- provider_id: ${env.ENABLE_PGVECTOR:=__disabled__}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
db: ${env.PGVECTOR_DB}
user: ${env.PGVECTOR_USER}
password: ${env.PGVECTOR_PASSWORD}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db

View file

@ -379,6 +379,7 @@ def get_distribution_template() -> DistributionTemplate:
available_safety_models = get_safety_models_for_providers(remote_inference_providers)
shields = get_shield_registry(available_safety_models, ids_conflict_in_models)
postgres_config = PostgresSqlStoreConfig.sample_run_config()
return DistributionTemplate(
name=name,
distro_type="self_hosted",
@ -401,6 +402,22 @@ def get_distribution_template() -> DistributionTemplate:
# TODO: add a way to enable/disable shields on the fly
default_shields=shields,
),
"run-with-postgres-store.yaml": RunConfigSettings(
provider_overrides={
"agents": [
Provider(
provider_id="meta-reference",
provider_type="inline::meta-reference",
config=dict(
persistence_store=postgres_config,
responses_store=postgres_config,
),
)
],
},
inference_store=postgres_config,
metadata_store=postgres_config,
),
},
run_config_env_vars={
"LLAMA_STACK_PORT": (