mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
feat: ability to use postgres as store for starter distro
The starter distribution now comes with all the required packages to support persistent stores—like the agent store, metadata, and inference—using PostgreSQL. We’ve added a new run YAML file, run-with-postgres-store.yaml, to make this setup easy. The file is included in the container image, so users can simply override the entrypoint to use the Postgres-specific config. The documentation has been updated with instructions on how to do that. Closes: #2619 Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
b6cb817897
commit
046d85fa79
15 changed files with 426 additions and 336 deletions
4
.github/workflows/providers-build.yml
vendored
4
.github/workflows/providers-build.yml
vendored
|
@ -112,7 +112,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[/app/entrypoint.sh /app/run.yaml]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
@ -150,7 +150,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[/app/entrypoint.sh /app/run.yaml]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
@ -164,7 +164,41 @@ docker run \
|
|||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
### Via venv
|
||||
The container will run the distribution with a SQLite store by default. This store is used for the following components:
|
||||
|
||||
- Metadata store: store metadata about the models, providers, etc.
|
||||
- Inference store: collect of responses from the inference provider
|
||||
- Agents store: store agent configurations (sessions, turns, etc.)
|
||||
- Agents Responses store: store responses from the agents
|
||||
|
||||
However, you can override the default behavior by setting the `ENABLE_POSTGRES_STORE` environment variable to `1`.
|
||||
To run the distribution with a Postgres store, you can use the following command:
|
||||
|
||||
```bash
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-e OPENAI_API_KEY=your_openai_key \
|
||||
-e FIREWORKS_API_KEY=your_fireworks_key \
|
||||
-e TOGETHER_API_KEY=your_together_key \
|
||||
-e POSTGRES_HOST=your_postgres_host \
|
||||
-e POSTGRES_PORT=your_postgres_port \
|
||||
-e POSTGRES_DB=your_postgres_db \
|
||||
-e POSTGRES_USER=your_postgres_user \
|
||||
-e POSTGRES_PASSWORD=your_postgres_password \
|
||||
-e ENABLE_POSTGRES_STORE=1
|
||||
```
|
||||
|
||||
Postgres environment variables:
|
||||
|
||||
- `POSTGRES_HOST`: Postgres host (default: `localhost`)
|
||||
- `POSTGRES_PORT`: Postgres port (default: `5432`)
|
||||
- `POSTGRES_DB`: Postgres database name (default: `llamastack`)
|
||||
- `POSTGRES_USER`: Postgres username (default: `llamastack`)
|
||||
- `POSTGRES_PASSWORD`: Postgres password (default: `llamastack`)
|
||||
|
||||
### Via Conda or venv
|
||||
|
||||
Ensure you have configured the starter distribution using the environment variables explained above.
|
||||
|
||||
|
|
|
@ -23,6 +23,9 @@ RUN_CONFIG_PATH=/app/run.yaml
|
|||
|
||||
BUILD_CONTEXT_DIR=$(pwd)
|
||||
|
||||
# Placeholder for template files
|
||||
TEMPLATE_FILES=()
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Define color codes
|
||||
|
@ -324,14 +327,19 @@ fi
|
|||
RUN pip uninstall -y uv
|
||||
EOF
|
||||
|
||||
# If a run config is provided, we use the --config flag
|
||||
# Copy the entrypoint script to build context and then into the container
|
||||
cp llama_stack/core/entrypoint.sh "$BUILD_CONTEXT_DIR/entrypoint.sh"
|
||||
add_to_container << EOF
|
||||
COPY entrypoint.sh /app/entrypoint.sh
|
||||
RUN chmod +x /app/entrypoint.sh
|
||||
EOF
|
||||
if [[ -n "$run_config" ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$RUN_CONFIG_PATH"]
|
||||
ENTRYPOINT ["/app/entrypoint.sh", "$RUN_CONFIG_PATH"]
|
||||
EOF
|
||||
elif [[ "$distro_or_config" != *.yaml ]]; then
|
||||
add_to_container << EOF
|
||||
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$distro_or_config"]
|
||||
ENTRYPOINT ["/app/entrypoint.sh", "$distro_or_config"]
|
||||
EOF
|
||||
fi
|
||||
|
||||
|
@ -404,7 +412,8 @@ $CONTAINER_BINARY build \
|
|||
"$BUILD_CONTEXT_DIR"
|
||||
|
||||
# clean up tmp/configs
|
||||
rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR"
|
||||
rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$BUILD_CONTEXT_DIR/entrypoint.sh" "$TEMP_DIR" "${TEMPLATE_FILES[@]}"
|
||||
|
||||
set +x
|
||||
|
||||
echo "Success!"
|
||||
|
|
38
llama_stack/core/entrypoint.sh
Executable file
38
llama_stack/core/entrypoint.sh
Executable file
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
determine_server_command() {
|
||||
local server_args=()
|
||||
|
||||
# The env variable will take precedence over the config file
|
||||
if [ -n "${ENABLE_POSTGRES_STORE:-}" ]; then
|
||||
server_args=("python3" "-m" "llama_stack.core.server.server" "starter::run-with-postgres-store")
|
||||
elif [ -n "$1" ]; then
|
||||
server_args=("python3" "-m" "llama_stack.core.server.server" "$1")
|
||||
fi
|
||||
echo "${server_args[@]}"
|
||||
}
|
||||
|
||||
main() {
|
||||
echo "Starting Llama Stack server..."
|
||||
|
||||
local server_command
|
||||
server_command=$(determine_server_command "$@")
|
||||
|
||||
if [[ -z "$server_command" ]]; then
|
||||
echo "Error: Could not determine server command"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf "Executing: %s\n" "$server_command"
|
||||
exec $server_command
|
||||
}
|
||||
|
||||
main "$@"
|
|
@ -52,7 +52,17 @@ def resolve_config_or_distro(
|
|||
logger.info(f"Using distribution: {distro_config}")
|
||||
return distro_config
|
||||
|
||||
# Strategy 3: Try as built distribution name
|
||||
# Strategy 3: Try as distro config path (if no .yaml extension and contains a slash)
|
||||
# eg: starter::run-with-postgres-store.yaml
|
||||
# Use :: to avoid slash and confusion with a filesystem path
|
||||
if "::" in config_or_distro:
|
||||
distro_name, config_name = config_or_distro.split("::")
|
||||
distro_config = _get_distro_config_path(distro_name, config_name)
|
||||
if distro_config.exists():
|
||||
logger.info(f"Using distribution: {distro_config}")
|
||||
return distro_config
|
||||
|
||||
# Strategy 4: Try as built distribution name
|
||||
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
||||
if distrib_config.exists():
|
||||
logger.info(f"Using built distribution: {distrib_config}")
|
||||
|
@ -63,12 +73,15 @@ def resolve_config_or_distro(
|
|||
logger.info(f"Using built distribution: {distrib_config}")
|
||||
return distrib_config
|
||||
|
||||
# Strategy 4: Failed - provide helpful error
|
||||
# Strategy 5: Failed - provide helpful error
|
||||
raise ValueError(_format_resolution_error(config_or_distro, mode))
|
||||
|
||||
|
||||
def _get_distro_config_path(distro_name: str, mode: Mode) -> Path:
|
||||
def _get_distro_config_path(distro_name: str, mode: str) -> Path:
|
||||
"""Get the config file path for a distro."""
|
||||
if mode.endswith(".yaml"):
|
||||
return DISTRO_DIR / distro_name / mode
|
||||
|
||||
return DISTRO_DIR / distro_name / f"{mode}.yaml"
|
||||
|
||||
|
||||
|
|
|
@ -84,6 +84,14 @@ def run_command(command: list[str]) -> int:
|
|||
text=True,
|
||||
check=False,
|
||||
)
|
||||
|
||||
# Print stdout and stderr if command failed
|
||||
if result.returncode != 0:
|
||||
if result.stdout:
|
||||
log.error(f"STDOUT: {result.stdout}")
|
||||
if result.stderr:
|
||||
log.error(f"STDERR: {result.stderr}")
|
||||
|
||||
return result.returncode
|
||||
except subprocess.SubprocessError as e:
|
||||
log.error(f"Subprocess error: {e}")
|
||||
|
|
|
@ -13,5 +13,6 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
template = get_starter_distribution_template(name="ci-tests")
|
||||
template.description = "CI tests for Llama Stack"
|
||||
template.run_configs.pop("run-with-postgres-store.yaml", None)
|
||||
|
||||
return template
|
||||
|
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .postgres_demo import get_distribution_template # noqa: F401
|
|
@ -1,25 +0,0 @@
|
|||
version: 2
|
||||
distribution_spec:
|
||||
description: Quick start template for running Llama Stack with several popular providers
|
||||
providers:
|
||||
inference:
|
||||
- provider_type: remote::vllm
|
||||
- provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- provider_type: remote::chromadb
|
||||
safety:
|
||||
- provider_type: inline::llama-guard
|
||||
agents:
|
||||
- provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- provider_type: inline::meta-reference
|
||||
tool_runtime:
|
||||
- provider_type: remote::brave-search
|
||||
- provider_type: remote::tavily-search
|
||||
- provider_type: inline::rag-runtime
|
||||
- provider_type: remote::model-context-protocol
|
||||
image_type: venv
|
||||
additional_pip_packages:
|
||||
- asyncpg
|
||||
- psycopg2-binary
|
||||
- sqlalchemy[asyncio]
|
|
@ -1,144 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildProvider,
|
||||
ModelInput,
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.distributions.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig
|
||||
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
||||
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
|
||||
from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
inference_providers = [
|
||||
Provider(
|
||||
provider_id="vllm-inference",
|
||||
provider_type="remote::vllm",
|
||||
config=VLLMInferenceAdapterConfig.sample_run_config(
|
||||
url="${env.VLLM_URL:=http://localhost:8000/v1}",
|
||||
),
|
||||
),
|
||||
]
|
||||
providers = {
|
||||
"inference": [
|
||||
BuildProvider(provider_type="remote::vllm"),
|
||||
BuildProvider(provider_type="inline::sentence-transformers"),
|
||||
],
|
||||
"vector_io": [BuildProvider(provider_type="remote::chromadb")],
|
||||
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||
"tool_runtime": [
|
||||
BuildProvider(provider_type="remote::brave-search"),
|
||||
BuildProvider(provider_type="remote::tavily-search"),
|
||||
BuildProvider(provider_type="inline::rag-runtime"),
|
||||
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||
],
|
||||
}
|
||||
name = "postgres-demo"
|
||||
|
||||
vector_io_providers = [
|
||||
Provider(
|
||||
provider_id="${env.ENABLE_CHROMADB:+chromadb}",
|
||||
provider_type="remote::chromadb",
|
||||
config=ChromaVectorIOConfig.sample_run_config(
|
||||
f"~/.llama/distributions/{name}",
|
||||
url="${env.CHROMADB_URL:=}",
|
||||
),
|
||||
),
|
||||
]
|
||||
default_tool_groups = [
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="tavily-search",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
]
|
||||
|
||||
default_models = [
|
||||
ModelInput(
|
||||
model_id="${env.INFERENCE_MODEL}",
|
||||
provider_id="vllm-inference",
|
||||
)
|
||||
]
|
||||
embedding_provider = Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id=embedding_provider.provider_id,
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
postgres_config = PostgresSqlStoreConfig.sample_run_config()
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="self_hosted",
|
||||
description="Quick start template for running Llama Stack with several popular providers",
|
||||
container_image=None,
|
||||
template_path=None,
|
||||
providers=providers,
|
||||
available_models_by_provider={},
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": inference_providers + [embedding_provider],
|
||||
"vector_io": vector_io_providers,
|
||||
"agents": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
config=dict(
|
||||
persistence_store=postgres_config,
|
||||
responses_store=postgres_config,
|
||||
),
|
||||
)
|
||||
],
|
||||
"telemetry": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
config=dict(
|
||||
service_name="${env.OTEL_SERVICE_NAME:=\u200b}",
|
||||
sinks="${env.TELEMETRY_SINKS:=console,otel_trace}",
|
||||
otel_exporter_otlp_endpoint="${env.OTEL_EXPORTER_OTLP_ENDPOINT:=http://localhost:4318/v1/traces}",
|
||||
),
|
||||
)
|
||||
],
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
metadata_store=PostgresKVStoreConfig.sample_run_config(),
|
||||
inference_store=postgres_config,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
},
|
||||
)
|
|
@ -1,111 +0,0 @@
|
|||
version: 2
|
||||
image_name: postgres-demo
|
||||
apis:
|
||||
- agents
|
||||
- inference
|
||||
- safety
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: vllm-inference
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/chroma_remote_registry.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
excluded_categories: []
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
responses_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,otel_trace}
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=http://localhost:4318/v1/traces}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
metadata_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
|
||||
inference_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: vllm-inference
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
server:
|
||||
port: 8321
|
|
@ -14,6 +14,9 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
template = get_starter_distribution_template(name="starter-gpu")
|
||||
template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
|
||||
|
||||
# remove "run-with-postgres-store.yaml" from run_configs
|
||||
template.run_configs.pop("run-with-postgres-store.yaml", None)
|
||||
|
||||
template.providers["post_training"] = [
|
||||
BuildProvider(provider_type="inline::huggingface-gpu"),
|
||||
]
|
||||
|
|
|
@ -57,4 +57,5 @@ image_type: venv
|
|||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- asyncpg
|
||||
- psycopg2-binary
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
251
llama_stack/distributions/starter/run-with-postgres-store.yaml
Normal file
251
llama_stack/distributions/starter/run-with-postgres-store.yaml
Normal file
|
@ -0,0 +1,251 @@
|
|||
version: 2
|
||||
image_name: starter
|
||||
apis:
|
||||
- agents
|
||||
- batches
|
||||
- datasetio
|
||||
- eval
|
||||
- files
|
||||
- inference
|
||||
- post_training
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
api_key: ${env.OPENAI_API_KEY:=}
|
||||
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
|
||||
- provider_id: anthropic
|
||||
provider_type: remote::anthropic
|
||||
config:
|
||||
api_key: ${env.ANTHROPIC_API_KEY:=}
|
||||
- provider_id: gemini
|
||||
provider_type: remote::gemini
|
||||
config:
|
||||
api_key: ${env.GEMINI_API_KEY:=}
|
||||
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
|
||||
provider_type: remote::vertexai
|
||||
config:
|
||||
project: ${env.VERTEX_AI_PROJECT:=}
|
||||
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
||||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
|
||||
- provider_id: sqlite-vec
|
||||
provider_type: inline::sqlite-vec
|
||||
config:
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
|
||||
- provider_id: ${env.MILVUS_URL:+milvus}
|
||||
provider_type: inline::milvus
|
||||
config:
|
||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
|
||||
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
config:
|
||||
url: ${env.CHROMADB_URL:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
|
||||
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
||||
provider_type: remote::pgvector
|
||||
config:
|
||||
host: ${env.PGVECTOR_HOST:=localhost}
|
||||
port: ${env.PGVECTOR_PORT:=5432}
|
||||
db: ${env.PGVECTOR_DB:=}
|
||||
user: ${env.PGVECTOR_USER:=}
|
||||
password: ${env.PGVECTOR_PASSWORD:=}
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
|
||||
files:
|
||||
- provider_id: meta-reference-files
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
excluded_categories: []
|
||||
- provider_id: code-scanner
|
||||
provider_type: inline::code-scanner
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
responses_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
post_training:
|
||||
- provider_id: torchtune-cpu
|
||||
provider_type: inline::torchtune-cpu
|
||||
config:
|
||||
checkpoint_format: meta
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:=}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
||||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
batches:
|
||||
- provider_id: reference
|
||||
provider_type: inline::reference
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/batches.db
|
||||
metadata_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
inference_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
port: ${env.POSTGRES_PORT:=5432}
|
||||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
models: []
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups: []
|
||||
server:
|
||||
port: 8321
|
|
@ -175,6 +175,46 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|||
provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
|
||||
),
|
||||
]
|
||||
postgres_config = PostgresSqlStoreConfig.sample_run_config()
|
||||
default_overrides = {
|
||||
"inference": remote_inference_providers + [embedding_provider],
|
||||
"vector_io": [
|
||||
Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="sqlite-vec",
|
||||
provider_type="inline::sqlite-vec",
|
||||
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.MILVUS_URL:+milvus}",
|
||||
provider_type="inline::milvus",
|
||||
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.CHROMADB_URL:+chromadb}",
|
||||
provider_type="remote::chromadb",
|
||||
config=ChromaVectorIOConfig.sample_run_config(
|
||||
f"~/.llama/distributions/{name}/",
|
||||
url="${env.CHROMADB_URL:=}",
|
||||
),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.PGVECTOR_DB:+pgvector}",
|
||||
provider_type="remote::pgvector",
|
||||
config=PGVectorVectorIOConfig.sample_run_config(
|
||||
f"~/.llama/distributions/{name}",
|
||||
db="${env.PGVECTOR_DB:=}",
|
||||
user="${env.PGVECTOR_USER:=}",
|
||||
password="${env.PGVECTOR_PASSWORD:=}",
|
||||
),
|
||||
),
|
||||
],
|
||||
"files": [files_provider],
|
||||
}
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
@ -186,49 +226,28 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|||
additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": remote_inference_providers + [embedding_provider],
|
||||
"vector_io": [
|
||||
Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="sqlite-vec",
|
||||
provider_type="inline::sqlite-vec",
|
||||
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.MILVUS_URL:+milvus}",
|
||||
provider_type="inline::milvus",
|
||||
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.CHROMADB_URL:+chromadb}",
|
||||
provider_type="remote::chromadb",
|
||||
config=ChromaVectorIOConfig.sample_run_config(
|
||||
f"~/.llama/distributions/{name}/",
|
||||
url="${env.CHROMADB_URL:=}",
|
||||
),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.PGVECTOR_DB:+pgvector}",
|
||||
provider_type="remote::pgvector",
|
||||
config=PGVectorVectorIOConfig.sample_run_config(
|
||||
f"~/.llama/distributions/{name}",
|
||||
db="${env.PGVECTOR_DB:=}",
|
||||
user="${env.PGVECTOR_USER:=}",
|
||||
password="${env.PGVECTOR_PASSWORD:=}",
|
||||
),
|
||||
),
|
||||
],
|
||||
"files": [files_provider],
|
||||
},
|
||||
provider_overrides=default_overrides,
|
||||
default_models=[],
|
||||
default_tool_groups=default_tool_groups,
|
||||
default_shields=default_shields,
|
||||
),
|
||||
"run-with-postgres-store.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
**default_overrides,
|
||||
"agents": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
config=dict(
|
||||
persistence_store=postgres_config,
|
||||
responses_store=postgres_config,
|
||||
),
|
||||
)
|
||||
],
|
||||
},
|
||||
inference_store=postgres_config,
|
||||
metadata_store=postgres_config,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue