From f18361c76dab06705e9d6272d78437e4d5614684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Tue, 22 Jul 2025 11:31:30 +0200 Subject: [PATCH] feat: ability to use postgres as store for starter distro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The starter distribution now comes with all the required packages to support persistent stores—like the agent store, metadata, and inference—using PostgreSQL. We’ve added a new run YAML file, run-with-postgres-store.yaml, to make this setup easy. The file is included in the container image, so users can simply override the entrypoint to use the Postgres-specific config. The documentation has been updated with instructions on how to do that. Closes: #2619 Signed-off-by: Sébastien Han --- .../self_hosted_distro/starter.md | 29 ++ llama_stack/distribution/build_container.sh | 24 +- llama_stack/templates/ci-tests/build.yaml | 1 + .../ci-tests/run-with-postgres-store.yaml | 260 ++++++++++++++++++ llama_stack/templates/ci-tests/run.yaml | 22 +- .../templates/postgres-demo/__init__.py | 7 - .../templates/postgres-demo/build.yaml | 36 --- .../templates/postgres-demo/postgres_demo.py | 148 ---------- llama_stack/templates/postgres-demo/run.yaml | 111 -------- llama_stack/templates/starter/build.yaml | 1 + .../starter/run-with-postgres-store.yaml | 260 ++++++++++++++++++ llama_stack/templates/starter/run.yaml | 10 +- llama_stack/templates/starter/starter.py | 17 ++ 13 files changed, 607 insertions(+), 319 deletions(-) create mode 100644 llama_stack/templates/ci-tests/run-with-postgres-store.yaml delete mode 100644 llama_stack/templates/postgres-demo/__init__.py delete mode 100644 llama_stack/templates/postgres-demo/build.yaml delete mode 100644 llama_stack/templates/postgres-demo/postgres_demo.py delete mode 100644 llama_stack/templates/postgres-demo/run.yaml create mode 100644 llama_stack/templates/starter/run-with-postgres-store.yaml diff --git a/docs/source/distributions/self_hosted_distro/starter.md b/docs/source/distributions/self_hosted_distro/starter.md index 56cdd5e73..79462f2ff 100644 --- a/docs/source/distributions/self_hosted_distro/starter.md +++ b/docs/source/distributions/self_hosted_distro/starter.md @@ -186,6 +186,35 @@ docker run \ --port $LLAMA_STACK_PORT ``` +To run the distribution with a Postgres store, you can use the following command to override the entrypoint: + +```bash +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -e OPENAI_API_KEY=your_openai_key \ + -e FIREWORKS_API_KEY=your_fireworks_key \ + -e TOGETHER_API_KEY=your_together_key \ + -e POSTGRES_HOST=your_postgres_host \ + -e POSTGRES_PORT=your_postgres_port \ + -e POSTGRES_DB=your_postgres_db \ + -e POSTGRES_USER=your_postgres_user \ + -e POSTGRES_PASSWORD=your_postgres_password \ + --entrypoint python \ + llamastack/distribution-starter \ + -m llama_stack.distribution.server.server \ + --config run-with-postgres-store.yaml +``` + +Postgres environment variables: + +- `POSTGRES_HOST`: Postgres host (default: `localhost`) +- `POSTGRES_PORT`: Postgres port (default: `5432`) +- `POSTGRES_DB`: Postgres database name (default: `llamastack`) +- `POSTGRES_USER`: Postgres username (default: `llamastack`) +- `POSTGRES_PASSWORD`: Postgres password (default: `llamastack`) + ### Via Conda or venv Ensure you have configured the starter distribution using the environment variables explained above. diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 7c406d3e7..4126ca13f 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -27,6 +27,9 @@ RUN_CONFIG_PATH=/app/run.yaml BUILD_CONTEXT_DIR=$(pwd) +# Placeholder for template files +TEMPLATE_FILES=() + set -euo pipefail # Define color codes @@ -272,6 +275,19 @@ EOF add_to_container << EOF COPY run.yaml $RUN_CONFIG_PATH EOF +# This is a template, we don't need to copy the run config but we still want to include potential +# other run files like -with-postgres-store.yaml or -with-safety.yaml +else + template_files=$(find "$BUILD_CONTEXT_DIR"/llama_stack/templates/"$template_or_config" -name "*.yaml" -not -name "run.yaml" -not -name "build.yaml") + echo "Copying template files: $template_files" + for file in $template_files; do + template_file_name=$(basename "$file") + TEMPLATE_FILES+=("$template_file_name") + cp "$file" "$BUILD_CONTEXT_DIR/$template_file_name" + add_to_container << EOF +COPY $template_file_name /app/$template_file_name +EOF + done fi stack_mount="/app/llama-stack-source" @@ -333,6 +349,11 @@ fi RUN pip uninstall -y uv EOF +# We can have a custom entrypoint.sh that figures out which server command to run based on something +# simple, like an env var. So if someone wants to use Postgres, the script will kick off the server +# with the right run YAML. Or, if they prefer, they can just override the entrypoint themselves and +# point it to whatever run YAML they want. +# # If a run config is provided, we use the --config flag if [[ -n "$run_config" ]]; then add_to_container << EOF @@ -414,7 +435,8 @@ $CONTAINER_BINARY build \ "$BUILD_CONTEXT_DIR" # clean up tmp/configs -rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR" +rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR" "${TEMPLATE_FILES[@]}" + set +x echo "Success!" diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml index 2f18e5d26..10c4d99c3 100644 --- a/llama_stack/templates/ci-tests/build.yaml +++ b/llama_stack/templates/ci-tests/build.yaml @@ -98,4 +98,5 @@ image_name: ci-tests additional_pip_packages: - aiosqlite - asyncpg +- psycopg2-binary - sqlalchemy[asyncio] diff --git a/llama_stack/templates/ci-tests/run-with-postgres-store.yaml b/llama_stack/templates/ci-tests/run-with-postgres-store.yaml new file mode 100644 index 000000000..fac6762fa --- /dev/null +++ b/llama_stack/templates/ci-tests/run-with-postgres-store.yaml @@ -0,0 +1,260 @@ +version: 2 +image_name: ci-tests +apis: +- agents +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai + api_key: ${env.CEREBRAS_API_KEY} + - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + - provider_id: ${env.ENABLE_VLLM:=__disabled__} + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.ENABLE_TGI:=__disabled__} + provider_type: remote::tgi + config: + url: ${env.TGI_URL} + - provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__} + provider_type: remote::hf::serverless + config: + huggingface_repo: ${env.INFERENCE_MODEL} + api_token: ${env.HF_API_TOKEN} + - provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__} + provider_type: remote::hf::endpoint + config: + endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} + api_token: ${env.HF_API_TOKEN} + - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY} + - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY} + - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} + provider_type: remote::bedrock + - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} + provider_type: remote::databricks + config: + url: ${env.DATABRICKS_URL} + api_token: ${env.DATABRICKS_API_TOKEN} + - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} + provider_type: remote::nvidia + config: + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:=} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} + - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} + provider_type: remote::runpod + config: + url: ${env.RUNPOD_URL:=} + api_token: ${env.RUNPOD_API_TOKEN} + - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY} + - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY} + - provider_id: ${env.ENABLE_GROQ:=__disabled__} + provider_type: remote::groq + config: + url: https://api.groq.com + api_key: ${env.GROQ_API_KEY} + - provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__} + provider_type: remote::llama-openai-compat + config: + openai_compat_api_base: https://api.llama.com/compat/v1/ + api_key: ${env.LLAMA_API_KEY} + - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY} + - provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__} + provider_type: remote::passthrough + config: + url: ${env.PASSTHROUGH_URL} + api_key: ${env.PASSTHROUGH_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: ${env.ENABLE_FAISS:=faiss} + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db + - provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__} + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db + - provider_id: ${env.ENABLE_MILVUS:=__disabled__} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db + - provider_id: ${env.ENABLE_CHROMADB:=__disabled__} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL} + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/chroma_remote_registry.db + - provider_id: ${env.ENABLE_PGVECTOR:=__disabled__} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB} + user: ${env.PGVECTOR_USER} + password: ${env.PGVECTOR_PASSWORD} + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db + files: + - provider_id: localfs + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + responses_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db + otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} + post_training: + - provider_id: huggingface + provider_type: inline::huggingface + config: + checkpoint_format: huggingface + distributed_backend: null + device: cpu + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol +metadata_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} +inference_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} +models: [] +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: [] +server: + port: 8321 diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 6f8a192ee..603b82a89 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -112,39 +112,39 @@ providers: config: kvstore: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db - provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__} provider_type: inline::sqlite-vec config: - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db kvstore: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db - provider_id: ${env.ENABLE_MILVUS:=__disabled__} provider_type: inline::milvus config: - db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db kvstore: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db - provider_id: ${env.ENABLE_CHROMADB:=__disabled__} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:=} + url: ${env.CHROMADB_URL} kvstore: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/chroma_remote_registry.db - provider_id: ${env.ENABLE_PGVECTOR:=__disabled__} provider_type: remote::pgvector config: host: ${env.PGVECTOR_HOST:=localhost} port: ${env.PGVECTOR_PORT:=5432} - db: ${env.PGVECTOR_DB:=} - user: ${env.PGVECTOR_USER:=} - password: ${env.PGVECTOR_PASSWORD:=} + db: ${env.PGVECTOR_DB} + user: ${env.PGVECTOR_USER} + password: ${env.PGVECTOR_PASSWORD} kvstore: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db files: - provider_id: meta-reference-files provider_type: inline::localfs diff --git a/llama_stack/templates/postgres-demo/__init__.py b/llama_stack/templates/postgres-demo/__init__.py deleted file mode 100644 index 81473cb73..000000000 --- a/llama_stack/templates/postgres-demo/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .postgres_demo import get_distribution_template # noqa: F401 diff --git a/llama_stack/templates/postgres-demo/build.yaml b/llama_stack/templates/postgres-demo/build.yaml deleted file mode 100644 index d5e816a54..000000000 --- a/llama_stack/templates/postgres-demo/build.yaml +++ /dev/null @@ -1,36 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers - providers: - inference: - - provider_id: vllm-inference - provider_type: remote::vllm - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: chromadb - provider_type: remote::chromadb - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - - provider_id: tavily-search - provider_type: remote::tavily-search - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol -image_type: conda -image_name: postgres-demo -additional_pip_packages: -- asyncpg -- psycopg2-binary -- sqlalchemy[asyncio] diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py deleted file mode 100644 index 24e3f6f27..000000000 --- a/llama_stack/templates/postgres-demo/postgres_demo.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.apis.models import ModelType -from llama_stack.distribution.datatypes import ( - ModelInput, - Provider, - ShieldInput, - ToolGroupInput, -) -from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig -from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig -from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig -from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig -from llama_stack.templates.template import ( - DistributionTemplate, - RunConfigSettings, -) - - -def get_distribution_template() -> DistributionTemplate: - inference_providers = [ - Provider( - provider_id="vllm-inference", - provider_type="remote::vllm", - config=VLLMInferenceAdapterConfig.sample_run_config( - url="${env.VLLM_URL:=http://localhost:8000/v1}", - ), - ), - ] - providers = { - "inference": inference_providers - + [ - Provider(provider_id="sentence-transformers", provider_type="inline::sentence-transformers"), - ], - "vector_io": [ - Provider(provider_id="chromadb", provider_type="remote::chromadb"), - ], - "safety": [Provider(provider_id="llama-guard", provider_type="inline::llama-guard")], - "agents": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")], - "telemetry": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")], - "tool_runtime": [ - Provider(provider_id="brave-search", provider_type="remote::brave-search"), - Provider(provider_id="tavily-search", provider_type="remote::tavily-search"), - Provider(provider_id="rag-runtime", provider_type="inline::rag-runtime"), - Provider( - provider_id="model-context-protocol", - provider_type="remote::model-context-protocol", - ), - ], - } - name = "postgres-demo" - - vector_io_providers = [ - Provider( - provider_id="${env.ENABLE_CHROMADB:+chromadb}", - provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - url="${env.CHROMADB_URL:=}", - ), - ), - ] - default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::websearch", - provider_id="tavily-search", - ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), - ] - - default_models = [ - ModelInput( - model_id="${env.INFERENCE_MODEL}", - provider_id="vllm-inference", - ) - ] - embedding_provider = Provider( - provider_id="sentence-transformers", - provider_type="inline::sentence-transformers", - config=SentenceTransformersInferenceConfig.sample_run_config(), - ) - embedding_model = ModelInput( - model_id="all-MiniLM-L6-v2", - provider_id=embedding_provider.provider_id, - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 384, - }, - ) - postgres_config = PostgresSqlStoreConfig.sample_run_config() - return DistributionTemplate( - name=name, - distro_type="self_hosted", - description="Quick start template for running Llama Stack with several popular providers", - container_image=None, - template_path=None, - providers=providers, - available_models_by_provider={}, - run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides={ - "inference": inference_providers + [embedding_provider], - "vector_io": vector_io_providers, - "agents": [ - Provider( - provider_id="meta-reference", - provider_type="inline::meta-reference", - config=dict( - persistence_store=postgres_config, - responses_store=postgres_config, - ), - ) - ], - "telemetry": [ - Provider( - provider_id="meta-reference", - provider_type="inline::meta-reference", - config=dict( - service_name="${env.OTEL_SERVICE_NAME:=\u200b}", - sinks="${env.TELEMETRY_SINKS:=console,otel_trace}", - otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}", - ), - ) - ], - }, - default_models=default_models + [embedding_model], - default_tool_groups=default_tool_groups, - default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], - metadata_store=PostgresKVStoreConfig.sample_run_config(), - inference_store=postgres_config, - ), - }, - run_config_env_vars={ - "LLAMA_STACK_PORT": ( - "8321", - "Port for the Llama Stack distribution server", - ), - }, - ) diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml deleted file mode 100644 index 747b7dc53..000000000 --- a/llama_stack/templates/postgres-demo/run.yaml +++ /dev/null @@ -1,111 +0,0 @@ -version: 2 -image_name: postgres-demo -apis: -- agents -- inference -- safety -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: vllm-inference - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - vector_io: - - provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/chroma_remote_registry.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console,otel_trace} - otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding -shields: -- shield_id: meta-llama/Llama-Guard-3-8B -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index 9b540ab62..139037a7a 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -98,4 +98,5 @@ image_name: starter additional_pip_packages: - aiosqlite - asyncpg +- psycopg2-binary - sqlalchemy[asyncio] diff --git a/llama_stack/templates/starter/run-with-postgres-store.yaml b/llama_stack/templates/starter/run-with-postgres-store.yaml new file mode 100644 index 000000000..b1f910174 --- /dev/null +++ b/llama_stack/templates/starter/run-with-postgres-store.yaml @@ -0,0 +1,260 @@ +version: 2 +image_name: starter +apis: +- agents +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai + api_key: ${env.CEREBRAS_API_KEY} + - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + - provider_id: ${env.ENABLE_VLLM:=__disabled__} + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.ENABLE_TGI:=__disabled__} + provider_type: remote::tgi + config: + url: ${env.TGI_URL} + - provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__} + provider_type: remote::hf::serverless + config: + huggingface_repo: ${env.INFERENCE_MODEL} + api_token: ${env.HF_API_TOKEN} + - provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__} + provider_type: remote::hf::endpoint + config: + endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} + api_token: ${env.HF_API_TOKEN} + - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY} + - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY} + - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} + provider_type: remote::bedrock + - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} + provider_type: remote::databricks + config: + url: ${env.DATABRICKS_URL} + api_token: ${env.DATABRICKS_API_TOKEN} + - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} + provider_type: remote::nvidia + config: + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:=} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} + - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} + provider_type: remote::runpod + config: + url: ${env.RUNPOD_URL:=} + api_token: ${env.RUNPOD_API_TOKEN} + - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY} + - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY} + - provider_id: ${env.ENABLE_GROQ:=__disabled__} + provider_type: remote::groq + config: + url: https://api.groq.com + api_key: ${env.GROQ_API_KEY} + - provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__} + provider_type: remote::llama-openai-compat + config: + openai_compat_api_base: https://api.llama.com/compat/v1/ + api_key: ${env.LLAMA_API_KEY} + - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY} + - provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__} + provider_type: remote::passthrough + config: + url: ${env.PASSTHROUGH_URL} + api_key: ${env.PASSTHROUGH_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: ${env.ENABLE_FAISS:=faiss} + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db + - provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__} + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db + - provider_id: ${env.ENABLE_MILVUS:=__disabled__} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db + - provider_id: ${env.ENABLE_CHROMADB:=__disabled__} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL} + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/chroma_remote_registry.db + - provider_id: ${env.ENABLE_PGVECTOR:=__disabled__} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB} + user: ${env.PGVECTOR_USER} + password: ${env.PGVECTOR_PASSWORD} + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db + files: + - provider_id: localfs + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + responses_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db + otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} + post_training: + - provider_id: huggingface + provider_type: inline::huggingface + config: + checkpoint_format: huggingface + distributed_backend: null + device: cpu + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol +metadata_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} +inference_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} +models: [] +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: [] +server: + port: 8321 diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index d60800ebb..c81bc39ec 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -130,18 +130,18 @@ providers: - provider_id: ${env.ENABLE_CHROMADB:=__disabled__} provider_type: remote::chromadb config: - url: ${env.CHROMADB_URL:=} + url: ${env.CHROMADB_URL} kvstore: type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/chroma_remote_registry.db - provider_id: ${env.ENABLE_PGVECTOR:=__disabled__} provider_type: remote::pgvector config: host: ${env.PGVECTOR_HOST:=localhost} port: ${env.PGVECTOR_PORT:=5432} - db: ${env.PGVECTOR_DB:=} - user: ${env.PGVECTOR_USER:=} - password: ${env.PGVECTOR_PASSWORD:=} + db: ${env.PGVECTOR_DB} + user: ${env.PGVECTOR_USER} + password: ${env.PGVECTOR_PASSWORD} kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index 489117702..93d39191a 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -379,6 +379,7 @@ def get_distribution_template() -> DistributionTemplate: available_safety_models = get_safety_models_for_providers(remote_inference_providers) shields = get_shield_registry(available_safety_models, ids_conflict_in_models) + postgres_config = PostgresSqlStoreConfig.sample_run_config() return DistributionTemplate( name=name, distro_type="self_hosted", @@ -401,6 +402,22 @@ def get_distribution_template() -> DistributionTemplate: # TODO: add a way to enable/disable shields on the fly default_shields=shields, ), + "run-with-postgres-store.yaml": RunConfigSettings( + provider_overrides={ + "agents": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + config=dict( + persistence_store=postgres_config, + responses_store=postgres_config, + ), + ) + ], + }, + inference_store=postgres_config, + metadata_store=postgres_config, + ), }, run_config_env_vars={ "LLAMA_STACK_PORT": (