diff --git a/docs/source/distributions/self_hosted_distro/starter.md b/docs/source/distributions/self_hosted_distro/starter.md index 56cdd5e73..79462f2ff 100644 --- a/docs/source/distributions/self_hosted_distro/starter.md +++ b/docs/source/distributions/self_hosted_distro/starter.md @@ -186,6 +186,35 @@ docker run \ --port $LLAMA_STACK_PORT ``` +To run the distribution with a Postgres store, you can use the following command to override the entrypoint: + +```bash +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -e OPENAI_API_KEY=your_openai_key \ + -e FIREWORKS_API_KEY=your_fireworks_key \ + -e TOGETHER_API_KEY=your_together_key \ + -e POSTGRES_HOST=your_postgres_host \ + -e POSTGRES_PORT=your_postgres_port \ + -e POSTGRES_DB=your_postgres_db \ + -e POSTGRES_USER=your_postgres_user \ + -e POSTGRES_PASSWORD=your_postgres_password \ + --entrypoint python \ + llamastack/distribution-starter \ + -m llama_stack.distribution.server.server \ + --config run-with-postgres-store.yaml +``` + +Postgres environment variables: + +- `POSTGRES_HOST`: Postgres host (default: `localhost`) +- `POSTGRES_PORT`: Postgres port (default: `5432`) +- `POSTGRES_DB`: Postgres database name (default: `llamastack`) +- `POSTGRES_USER`: Postgres username (default: `llamastack`) +- `POSTGRES_PASSWORD`: Postgres password (default: `llamastack`) + ### Via Conda or venv Ensure you have configured the starter distribution using the environment variables explained above. diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 6985c1cd0..ae09da719 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -27,6 +27,9 @@ RUN_CONFIG_PATH=/app/run.yaml BUILD_CONTEXT_DIR=$(pwd) +# Placeholder for template files +TEMPLATE_FILES=() + if [ "$#" -lt 4 ]; then # This only works for templates echo "Usage: $0 [] []" >&2 @@ -198,6 +201,19 @@ EOF add_to_container << EOF COPY run.yaml $RUN_CONFIG_PATH EOF +# This is a template, we don't need to copy the run config but we still want to include potential +# other run files like -with-postgres-store.yaml or -with-safety.yaml +else + template_files=$(find "$BUILD_CONTEXT_DIR"/llama_stack/templates/"$template_or_config" -name "*.yaml" -not -name "run.yaml" -not -name "build.yaml") + echo "Copying template files: $template_files" + for file in $template_files; do + template_file_name=$(basename "$file") + TEMPLATE_FILES+=("$template_file_name") + cp "$file" "$BUILD_CONTEXT_DIR/$template_file_name" + add_to_container << EOF +COPY $template_file_name /app/$template_file_name +EOF + done fi stack_mount="/app/llama-stack-source" @@ -259,6 +275,11 @@ fi RUN pip uninstall -y uv EOF +# We can have a custom entrypoint.sh that figures out which server command to run based on something +# simple, like an env var. So if someone wants to use Postgres, the script will kick off the server +# with the right run YAML. Or, if they prefer, they can just override the entrypoint themselves and +# point it to whatever run YAML they want. +# # If a run config is provided, we use the --config flag if [[ -n "$run_config" ]]; then add_to_container << EOF @@ -340,7 +361,8 @@ $CONTAINER_BINARY build \ "$BUILD_CONTEXT_DIR" # clean up tmp/configs -rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR" +rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR" "${TEMPLATE_FILES[@]}" + set +x echo "Success!" diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml index 625e36e4f..7f27eabdf 100644 --- a/llama_stack/templates/ci-tests/build.yaml +++ b/llama_stack/templates/ci-tests/build.yaml @@ -57,4 +57,5 @@ image_type: conda additional_pip_packages: - aiosqlite - asyncpg +- psycopg2-binary - sqlalchemy[asyncio] diff --git a/llama_stack/templates/ci-tests/run-with-postgres-store.yaml b/llama_stack/templates/ci-tests/run-with-postgres-store.yaml new file mode 100644 index 000000000..f46a5520d --- /dev/null +++ b/llama_stack/templates/ci-tests/run-with-postgres-store.yaml @@ -0,0 +1,266 @@ +version: 2 +image_name: ci-tests +apis: +- agents +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: cerebras + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai + api_key: ${env.CEREBRAS_API_KEY} + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + - provider_id: vllm + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: tgi + provider_type: remote::tgi + config: + url: ${env.TGI_URL} + - provider_id: serverless + provider_type: remote::hf::serverless + config: + huggingface_repo: ${env.INFERENCE_MODEL} + api_token: ${env.HF_API_TOKEN} + - provider_id: endpoint + provider_type: remote::hf::endpoint + config: + endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} + api_token: ${env.HF_API_TOKEN} + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY} + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY} + - provider_id: bedrock + provider_type: remote::bedrock + config: {} + - provider_id: databricks + provider_type: remote::databricks + config: + url: ${env.DATABRICKS_URL} + api_token: ${env.DATABRICKS_API_TOKEN} + - provider_id: nvidia + provider_type: remote::nvidia + config: + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:=} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} + - provider_id: runpod + provider_type: remote::runpod + config: + url: ${env.RUNPOD_URL:=} + api_token: ${env.RUNPOD_API_TOKEN} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY} + - provider_id: groq + provider_type: remote::groq + config: + url: https://api.groq.com + api_key: ${env.GROQ_API_KEY} + - provider_id: llama-openai-compat + provider_type: remote::llama-openai-compat + config: + openai_compat_api_base: https://api.llama.com/compat/v1/ + api_key: ${env.LLAMA_API_KEY} + - provider_id: sambanova + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY} + - provider_id: passthrough + provider_type: remote::passthrough + config: + url: ${env.PASSTHROUGH_URL} + api_key: ${env.PASSTHROUGH_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db + - provider_id: milvus + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db + - provider_id: chromadb + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL} + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/chroma_remote_registry.db + - provider_id: pgvector + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB} + user: ${env.PGVECTOR_USER} + password: ${env.PGVECTOR_PASSWORD} + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db + files: + - provider_id: localfs + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + responses_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db + otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} + post_training: + - provider_id: huggingface + provider_type: inline::huggingface + config: + checkpoint_format: huggingface + distributed_backend: null + device: cpu + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} +inference_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} +models: [] +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: [] +server: + port: 8321 diff --git a/llama_stack/templates/postgres-demo/__init__.py b/llama_stack/templates/postgres-demo/__init__.py deleted file mode 100644 index 81473cb73..000000000 --- a/llama_stack/templates/postgres-demo/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .postgres_demo import get_distribution_template # noqa: F401 diff --git a/llama_stack/templates/postgres-demo/build.yaml b/llama_stack/templates/postgres-demo/build.yaml deleted file mode 100644 index 645b59613..000000000 --- a/llama_stack/templates/postgres-demo/build.yaml +++ /dev/null @@ -1,25 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers - providers: - inference: - - remote::vllm - - inline::sentence-transformers - vector_io: - - remote::chromadb - safety: - - inline::llama-guard - agents: - - inline::meta-reference - telemetry: - - inline::meta-reference - tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::rag-runtime - - remote::model-context-protocol -image_type: conda -additional_pip_packages: -- asyncpg -- psycopg2-binary -- sqlalchemy[asyncio] diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py deleted file mode 100644 index c7ab222ec..000000000 --- a/llama_stack/templates/postgres-demo/postgres_demo.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.apis.models import ModelType -from llama_stack.distribution.datatypes import ( - ModelInput, - Provider, - ShieldInput, - ToolGroupInput, -) -from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig -from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig -from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig -from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig -from llama_stack.templates.template import ( - DistributionTemplate, - RunConfigSettings, -) - - -def get_distribution_template() -> DistributionTemplate: - inference_providers = [ - Provider( - provider_id="vllm-inference", - provider_type="remote::vllm", - config=VLLMInferenceAdapterConfig.sample_run_config( - url="${env.VLLM_URL:=http://localhost:8000/v1}", - ), - ), - ] - providers = { - "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), - "vector_io": ["remote::chromadb"], - "safety": ["inline::llama-guard"], - "agents": ["inline::meta-reference"], - "telemetry": ["inline::meta-reference"], - "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::rag-runtime", - "remote::model-context-protocol", - ], - } - name = "postgres-demo" - - vector_io_providers = [ - Provider( - provider_id="${env.ENABLE_CHROMADB:+chromadb}", - provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}", - url="${env.CHROMADB_URL:=}", - ), - ), - ] - default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::websearch", - provider_id="tavily-search", - ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), - ] - - default_models = [ - ModelInput( - model_id="${env.INFERENCE_MODEL}", - provider_id="vllm-inference", - ) - ] - embedding_provider = Provider( - provider_id="sentence-transformers", - provider_type="inline::sentence-transformers", - config=SentenceTransformersInferenceConfig.sample_run_config(), - ) - embedding_model = ModelInput( - model_id="all-MiniLM-L6-v2", - provider_id=embedding_provider.provider_id, - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 384, - }, - ) - postgres_config = PostgresSqlStoreConfig.sample_run_config() - return DistributionTemplate( - name=name, - distro_type="self_hosted", - description="Quick start template for running Llama Stack with several popular providers", - container_image=None, - template_path=None, - providers=providers, - available_models_by_provider={}, - run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides={ - "inference": inference_providers + [embedding_provider], - "vector_io": vector_io_providers, - "agents": [ - Provider( - provider_id="meta-reference", - provider_type="inline::meta-reference", - config=dict( - persistence_store=postgres_config, - responses_store=postgres_config, - ), - ) - ], - "telemetry": [ - Provider( - provider_id="meta-reference", - provider_type="inline::meta-reference", - config=dict( - service_name="${env.OTEL_SERVICE_NAME:=\u200b}", - sinks="${env.TELEMETRY_SINKS:=console,otel_trace}", - otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}", - ), - ) - ], - }, - default_models=default_models + [embedding_model], - default_tool_groups=default_tool_groups, - default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], - metadata_store=PostgresKVStoreConfig.sample_run_config(), - inference_store=postgres_config, - ), - }, - run_config_env_vars={ - "LLAMA_STACK_PORT": ( - "8321", - "Port for the Llama Stack distribution server", - ), - }, - ) diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml deleted file mode 100644 index feb85e316..000000000 --- a/llama_stack/templates/postgres-demo/run.yaml +++ /dev/null @@ -1,114 +0,0 @@ -version: 2 -image_name: postgres-demo -apis: -- agents -- inference -- safety -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: vllm-inference - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - kvstore: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/chroma_remote_registry.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console,otel_trace} - otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:=} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding -shields: -- shield_id: meta-llama/Llama-Guard-3-8B -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index 8180124f6..e84d11478 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -57,4 +57,5 @@ image_type: conda additional_pip_packages: - aiosqlite - asyncpg +- psycopg2-binary - sqlalchemy[asyncio] diff --git a/llama_stack/templates/starter/run-with-postgres-store.yaml b/llama_stack/templates/starter/run-with-postgres-store.yaml new file mode 100644 index 000000000..65328ee8a --- /dev/null +++ b/llama_stack/templates/starter/run-with-postgres-store.yaml @@ -0,0 +1,266 @@ +version: 2 +image_name: starter +apis: +- agents +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: cerebras + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai + api_key: ${env.CEREBRAS_API_KEY} + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + - provider_id: vllm + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: tgi + provider_type: remote::tgi + config: + url: ${env.TGI_URL} + - provider_id: serverless + provider_type: remote::hf::serverless + config: + huggingface_repo: ${env.INFERENCE_MODEL} + api_token: ${env.HF_API_TOKEN} + - provider_id: endpoint + provider_type: remote::hf::endpoint + config: + endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} + api_token: ${env.HF_API_TOKEN} + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY} + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY} + - provider_id: bedrock + provider_type: remote::bedrock + config: {} + - provider_id: databricks + provider_type: remote::databricks + config: + url: ${env.DATABRICKS_URL} + api_token: ${env.DATABRICKS_API_TOKEN} + - provider_id: nvidia + provider_type: remote::nvidia + config: + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:=} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} + - provider_id: runpod + provider_type: remote::runpod + config: + url: ${env.RUNPOD_URL:=} + api_token: ${env.RUNPOD_API_TOKEN} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY} + - provider_id: groq + provider_type: remote::groq + config: + url: https://api.groq.com + api_key: ${env.GROQ_API_KEY} + - provider_id: llama-openai-compat + provider_type: remote::llama-openai-compat + config: + openai_compat_api_base: https://api.llama.com/compat/v1/ + api_key: ${env.LLAMA_API_KEY} + - provider_id: sambanova + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY} + - provider_id: passthrough + provider_type: remote::passthrough + config: + url: ${env.PASSTHROUGH_URL} + api_key: ${env.PASSTHROUGH_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db + - provider_id: milvus + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db + - provider_id: chromadb + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL} + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/chroma_remote_registry.db + - provider_id: pgvector + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB} + user: ${env.PGVECTOR_USER} + password: ${env.PGVECTOR_PASSWORD} + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db + files: + - provider_id: localfs + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + responses_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db + otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} + post_training: + - provider_id: huggingface + provider_type: inline::huggingface + config: + checkpoint_format: huggingface + distributed_backend: null + device: cpu + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} +inference_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} +models: [] +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: [] +server: + port: 8321 diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index cee1094db..885f4e56e 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -309,6 +309,7 @@ def get_distribution_template() -> DistributionTemplate: available_safety_models = get_safety_models_for_providers(remote_inference_providers) shields = get_shield_registry(available_safety_models, ids_conflict_in_models) + postgres_config = PostgresSqlStoreConfig.sample_run_config() return DistributionTemplate( name=name, distro_type="self_hosted", @@ -331,6 +332,22 @@ def get_distribution_template() -> DistributionTemplate: # TODO: add a way to enable/disable shields on the fly default_shields=shields, ), + "run-with-postgres-store.yaml": RunConfigSettings( + provider_overrides={ + "agents": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + config=dict( + persistence_store=postgres_config, + responses_store=postgres_config, + ), + ) + ], + }, + inference_store=postgres_config, + metadata_store=postgres_config, + ), }, run_config_env_vars={ "LLAMA_STACK_PORT": (