chore(package): migrate to src/ layout (#3920)

Migrates package structure to src/ layout following Python packaging best practices. All code moved from `llama_stack/` to `src/llama_stack/`. Public API unchanged - imports remain `import llama_stack.*`. Updated build configs, pre-commit hooks, scripts, and GitHub workflows accordingly. All hooks pass, package builds cleanly. **Developer note**: Reinstall after pulling: `pip install -e .`
2025-12-03 18:00:36 +00:00 · 2025-10-27 12:02:21 -07:00 · 2025-10-27 12:02:21 -07:00 · 471b1b248b
commit 471b1b248b
parent 98a5047f9d
791 changed files with 2983 additions and 456 deletions
--- a/llama_stack/distributions/init.py
+++ b/llama_stack/distributions/init.py
@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
--- a/llama_stack/distributions/ci-tests/init.py
+++ b/llama_stack/distributions/ci-tests/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .ci_tests import get_distribution_template  # noqa: F401
--- a/llama_stack/distributions/ci-tests/ci_tests.py
+++ b/llama_stack/distributions/ci-tests/ci_tests.py
@ -1,17 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from llama_stack.distributions.template import DistributionTemplate
-
-from ..starter.starter import get_distribution_template as get_starter_distribution_template
-
-
-def get_distribution_template() -> DistributionTemplate:
-    template = get_starter_distribution_template(name="ci-tests")
-    template.description = "CI tests for Llama Stack"
-
-    return template
--- a/llama_stack/distributions/dell/init.py
+++ b/llama_stack/distributions/dell/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .dell import get_distribution_template  # noqa: F401
--- a/llama_stack/distributions/dell/dell.py
+++ b/llama_stack/distributions/dell/dell.py
@ -1,158 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.apis.models import ModelType
-from llama_stack.core.datatypes import (
-    BuildProvider,
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": [
-            BuildProvider(provider_type="remote::tgi"),
-            BuildProvider(provider_type="inline::sentence-transformers"),
-        ],
-        "vector_io": [
-            BuildProvider(provider_type="inline::faiss"),
-            BuildProvider(provider_type="remote::chromadb"),
-            BuildProvider(provider_type="remote::pgvector"),
-        ],
-        "safety": [BuildProvider(provider_type="inline::llama-guard")],
-        "agents": [BuildProvider(provider_type="inline::meta-reference")],
-        "eval": [BuildProvider(provider_type="inline::meta-reference")],
-        "datasetio": [
-            BuildProvider(provider_type="remote::huggingface"),
-            BuildProvider(provider_type="inline::localfs"),
-        ],
-        "scoring": [
-            BuildProvider(provider_type="inline::basic"),
-            BuildProvider(provider_type="inline::llm-as-judge"),
-            BuildProvider(provider_type="inline::braintrust"),
-        ],
-        "tool_runtime": [
-            BuildProvider(provider_type="remote::brave-search"),
-            BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
-        ],
-    }
-    name = "dell"
-    inference_provider = Provider(
-        provider_id="tgi0",
-        provider_type="remote::tgi",
-        config={
-            "url": "${env.DEH_URL}",
-        },
-    )
-    safety_inference_provider = Provider(
-        provider_id="tgi1",
-        provider_type="remote::tgi",
-        config={
-            "url": "${env.DEH_SAFETY_URL}",
-        },
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    chromadb_provider = Provider(
-        provider_id="chromadb",
-        provider_type="remote::chromadb",
-        config=ChromaVectorIOConfig.sample_run_config(
-            f"~/.llama/distributions/{name}/",
-            url="${env.CHROMADB_URL:=}",
-        ),
-    )
-
-    inference_model = ModelInput(
-        model_id="${env.INFERENCE_MODEL}",
-        provider_id="tgi0",
-    )
-    safety_model = ModelInput(
-        model_id="${env.SAFETY_MODEL}",
-        provider_id="tgi1",
-    )
-    embedding_model = ModelInput(
-        model_id="nomic-embed-text-v1.5",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 768,
-        },
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="brave-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container",
-        container_image=None,
-        providers=providers,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [chromadb_provider],
-                },
-                default_models=[inference_model, embedding_model],
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        safety_inference_provider,
-                        embedding_provider,
-                    ],
-                    "vector_io": [chromadb_provider],
-                },
-                default_models=[inference_model, safety_model, embedding_model],
-                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "DEH_URL": (
-                "http://0.0.0.0:8181",
-                "URL for the Dell inference server",
-            ),
-            "DEH_SAFETY_URL": (
-                "http://0.0.0.0:8282",
-                "URL for the Dell safety inference server",
-            ),
-            "CHROMA_URL": (
-                "http://localhost:6601",
-                "URL for the Chroma server",
-            ),
-            "INFERENCE_MODEL": (
-                "meta-llama/Llama-3.2-3B-Instruct",
-                "Inference model loaded into the TGI server",
-            ),
-            "SAFETY_MODEL": (
-                "meta-llama/Llama-Guard-3-1B",
-                "Name of the safety (Llama-Guard) model to use",
-            ),
-        },
-    )
--- a/llama_stack/distributions/dell/doc_template.md
+++ b/llama_stack/distributions/dell/doc_template.md
@ -1,178 +0,0 @@
---
-orphan: true
---
-
-# Dell Distribution of Llama Stack
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-You can use this distribution if you have GPUs and want to run an independent TGI or Dell Enterprise Hub container for running inference.
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-
-## Setting up Inference server using Dell Enterprise Hub's custom TGI container.
-
-NOTE: This is a placeholder to run inference with TGI. This will be updated to use [Dell Enterprise Hub's containers](https://dell.huggingface.co/authenticated/models) once verified.
-
-```bash
-export INFERENCE_PORT=8181
-export DEH_URL=http://0.0.0.0:$INFERENCE_PORT
-export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
-export CHROMADB_HOST=localhost
-export CHROMADB_PORT=6601
-export CHROMA_URL=http://$CHROMADB_HOST:$CHROMADB_PORT
-export CUDA_VISIBLE_DEVICES=0
-export LLAMA_STACK_PORT=8321
-
-docker run --rm -it \
-  --pull always \
-  --network host \
-  -v $HOME/.cache/huggingface:/data \
-  -e HF_TOKEN=$HF_TOKEN \
-  -p $INFERENCE_PORT:$INFERENCE_PORT \
-  --gpus $CUDA_VISIBLE_DEVICES \
-  ghcr.io/huggingface/text-generation-inference \
-  --dtype bfloat16 \
-  --usage-stats off \
-  --sharded false \
-  --cuda-memory-fraction 0.7 \
-  --model-id $INFERENCE_MODEL \
-  --port $INFERENCE_PORT --hostname 0.0.0.0
-```
-
-If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a TGI with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
-
-```bash
-export SAFETY_INFERENCE_PORT=8282
-export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-export CUDA_VISIBLE_DEVICES=1
-
-docker run --rm -it \
-  --pull always \
-  --network host \
-  -v $HOME/.cache/huggingface:/data \
-  -e HF_TOKEN=$HF_TOKEN \
-  -p $SAFETY_INFERENCE_PORT:$SAFETY_INFERENCE_PORT \
-  --gpus $CUDA_VISIBLE_DEVICES \
-  ghcr.io/huggingface/text-generation-inference \
-  --dtype bfloat16 \
-  --usage-stats off \
-  --sharded false \
-  --cuda-memory-fraction 0.7 \
-  --model-id $SAFETY_MODEL \
-  --hostname 0.0.0.0 \
-  --port $SAFETY_INFERENCE_PORT
-```
-
-## Dell distribution relies on ChromaDB for vector database usage
-
-You can start a chroma-db easily using docker.
-```bash
-# This is where the indices are persisted
-mkdir -p $HOME/chromadb
-
-podman run --rm -it \
-  --network host \
-  --name chromadb \
-  -v $HOME/chromadb:/chroma/chroma \
-  -e IS_PERSISTENT=TRUE \
-  chromadb/chroma:latest \
-  --port $CHROMADB_PORT \
-  --host $CHROMADB_HOST
-```
-
-## Running Llama Stack
-
-Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-docker run -it \
-  --pull always \
-  --network host \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v $HOME/.llama:/root/.llama \
-  # NOTE: mount the llama-stack directory if testing local changes else not needed
-  -v $HOME/git/llama-stack:/app/llama-stack-source \
-  # localhost/distribution-dell:dev if building / testing locally
-  -e INFERENCE_MODEL=$INFERENCE_MODEL \
-  -e DEH_URL=$DEH_URL \
-  -e CHROMA_URL=$CHROMA_URL \
-  llamastack/distribution-{{ name }}\
-  --port $LLAMA_STACK_PORT
-
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-# You need a local checkout of llama-stack to run this, get it using
-# git clone https://github.com/meta-llama/llama-stack.git
-cd /path/to/llama-stack
-
-export SAFETY_INFERENCE_PORT=8282
-export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v $HOME/.llama:/root/.llama \
-  -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
-  -e INFERENCE_MODEL=$INFERENCE_MODEL \
-  -e DEH_URL=$DEH_URL \
-  -e SAFETY_MODEL=$SAFETY_MODEL \
-  -e DEH_SAFETY_URL=$DEH_SAFETY_URL \
-  -e CHROMA_URL=$CHROMA_URL \
-  llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT
-```
-
-### Via Conda
-
-Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
-
-```bash
-llama stack list-deps {{ name }} | xargs -L1 pip install
-INFERENCE_MODEL=$INFERENCE_MODEL \
-DEH_URL=$DEH_URL \
-CHROMA_URL=$CHROMA_URL \
-llama stack run {{ name }} \
-  --port $LLAMA_STACK_PORT
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-INFERENCE_MODEL=$INFERENCE_MODEL \
-DEH_URL=$DEH_URL \
-SAFETY_MODEL=$SAFETY_MODEL \
-DEH_SAFETY_URL=$DEH_SAFETY_URL \
-CHROMA_URL=$CHROMA_URL \
-llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT
-```
--- a/llama_stack/distributions/meta-reference-gpu/init.py
+++ b/llama_stack/distributions/meta-reference-gpu/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .meta_reference import get_distribution_template  # noqa: F401
--- a/llama_stack/distributions/meta-reference-gpu/doc_template.md
+++ b/llama_stack/distributions/meta-reference-gpu/doc_template.md
@ -1,89 +0,0 @@
---
-orphan: true
---
-# Meta Reference GPU Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations:
-
-{{ providers_table }}
-
-Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-
-## Prerequisite: Downloading Models
-
-Please check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models using the Hugging Face CLI.
-```
-
-## Running the Distribution
-
-You can do this via venv or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  --gpu all \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-docker run \
-  -it \
-  --pull always \
-  --gpu all \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  -e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
-  llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT
-```
-
-### Via venv
-
-Make sure you have the Llama Stack CLI available.
-
-```bash
-llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
-INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-llama stack run distributions/{{ name }}/run.yaml \
-  --port 8321
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
-llama stack run distributions/{{ name }}/run-with-safety.yaml \
-  --port 8321
-```
--- a/llama_stack/distributions/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/distributions/meta-reference-gpu/meta_reference.py
@ -1,163 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.apis.models import ModelType
-from llama_stack.core.datatypes import (
-    BuildProvider,
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
-from llama_stack.providers.inline.inference.meta_reference import (
-    MetaReferenceInferenceConfig,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": [BuildProvider(provider_type="inline::meta-reference")],
-        "vector_io": [
-            BuildProvider(provider_type="inline::faiss"),
-            BuildProvider(provider_type="remote::chromadb"),
-            BuildProvider(provider_type="remote::pgvector"),
-        ],
-        "safety": [BuildProvider(provider_type="inline::llama-guard")],
-        "agents": [BuildProvider(provider_type="inline::meta-reference")],
-        "eval": [BuildProvider(provider_type="inline::meta-reference")],
-        "datasetio": [
-            BuildProvider(provider_type="remote::huggingface"),
-            BuildProvider(provider_type="inline::localfs"),
-        ],
-        "scoring": [
-            BuildProvider(provider_type="inline::basic"),
-            BuildProvider(provider_type="inline::llm-as-judge"),
-            BuildProvider(provider_type="inline::braintrust"),
-        ],
-        "tool_runtime": [
-            BuildProvider(provider_type="remote::brave-search"),
-            BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
-            BuildProvider(provider_type="remote::model-context-protocol"),
-        ],
-    }
-    name = "meta-reference-gpu"
-    inference_provider = Provider(
-        provider_id="meta-reference-inference",
-        provider_type="inline::meta-reference",
-        config=MetaReferenceInferenceConfig.sample_run_config(
-            model="${env.INFERENCE_MODEL}",
-            checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:=null}",
-        ),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    vector_io_provider = Provider(
-        provider_id="faiss",
-        provider_type="inline::faiss",
-        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-
-    inference_model = ModelInput(
-        model_id="${env.INFERENCE_MODEL}",
-        provider_id="meta-reference-inference",
-    )
-    embedding_model = ModelInput(
-        model_id="nomic-embed-text-v1.5",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 768,
-        },
-    )
-    safety_model = ModelInput(
-        model_id="${env.SAFETY_MODEL}",
-        provider_id="meta-reference-safety",
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use Meta Reference for running LLM inference",
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=[inference_model, embedding_model],
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        embedding_provider,
-                        Provider(
-                            provider_id="meta-reference-safety",
-                            provider_type="inline::meta-reference",
-                            config=MetaReferenceInferenceConfig.sample_run_config(
-                                model="${env.SAFETY_MODEL}",
-                                checkpoint_dir="${env.SAFETY_CHECKPOINT_DIR:=null}",
-                            ),
-                        ),
-                    ],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=[
-                    inference_model,
-                    safety_model,
-                    embedding_model,
-                ],
-                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "INFERENCE_MODEL": (
-                "meta-llama/Llama-3.2-3B-Instruct",
-                "Inference model loaded into the Meta Reference server",
-            ),
-            "INFERENCE_CHECKPOINT_DIR": (
-                "null",
-                "Directory containing the Meta Reference model checkpoint",
-            ),
-            "SAFETY_MODEL": (
-                "meta-llama/Llama-Guard-3-1B",
-                "Name of the safety (Llama-Guard) model to use",
-            ),
-            "SAFETY_CHECKPOINT_DIR": (
-                "null",
-                "Directory containing the Llama-Guard model checkpoint",
-            ),
-        },
-    )
--- a/llama_stack/distributions/nvidia/init.py
+++ b/llama_stack/distributions/nvidia/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .nvidia import get_distribution_template  # noqa: F401
--- a/llama_stack/distributions/nvidia/doc_template.md
+++ b/llama_stack/distributions/nvidia/doc_template.md
@ -1,141 +0,0 @@
---
-orphan: true
---
-# NVIDIA Distribution
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-{% if default_models %}
-### Models
-
-The following models are available by default:
-
-{% for model in default_models %}
- `{{ model.model_id }} {{ model.doc_string }}`
-{% endfor %}
-{% endif %}
-
-
-## Prerequisites
-### NVIDIA API Keys
-
-Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). Use this key for the `NVIDIA_API_KEY` environment variable.
-
-### Deploy NeMo Microservices Platform
-The NVIDIA NeMo microservices platform supports end-to-end microservice deployment of a complete AI flywheel on your Kubernetes cluster through the NeMo Microservices Helm Chart. Please reference the [NVIDIA NeMo Microservices documentation](https://docs.nvidia.com/nemo/microservices/latest/about/index.html) for platform prerequisites and instructions to install and deploy the platform.
-
-## Supported Services
-Each Llama Stack API corresponds to a specific NeMo microservice. The core microservices (Customizer, Evaluator, Guardrails) are exposed by the same endpoint. The platform components (Data Store) are each exposed by separate endpoints.
-
-### Inference: NVIDIA NIM
-NVIDIA NIM is used for running inference with registered models. There are two ways to access NVIDIA NIMs:
-  1. Hosted (default): Preview APIs hosted at https://integrate.api.nvidia.com (Requires an API key)
-  2. Self-hosted: NVIDIA NIMs that run on your own infrastructure.
-
-The deployed platform includes the NIM Proxy microservice, which is the service that provides to access your NIMs (for example, to run inference on a model). Set the `NVIDIA_BASE_URL` environment variable to use your NVIDIA NIM Proxy deployment.
-
-### Datasetio API: NeMo Data Store
-The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint.
-
-See the [NVIDIA Datasetio docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/datasetio/nvidia/README.md) for supported features and example usage.
-
-### Eval API: NeMo Evaluator
-The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint.
-
-See the [NVIDIA Eval docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/eval/nvidia/README.md) for supported features and example usage.
-
-### Post-Training API: NeMo Customizer
-The NeMo Customizer microservice supports fine-tuning models. You can reference [this list of supported models](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/post_training/nvidia/models.py) that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint.
-
-See the [NVIDIA Post-Training docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/post_training/nvidia/README.md) for supported features and example usage.
-
-### Safety API: NeMo Guardrails
-The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint.
-
-See the [NVIDIA Safety docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/safety/nvidia/README.md) for supported features and example usage.
-
-## Deploying models
-In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`.
-
-Note: For improved inference speeds, we need to use NIM with `fast_outlines` guided decoding system (specified in the request body). This is the default if you deployed the platform with the NeMo Microservices Helm Chart.
-```sh
-# URL to NeMo NIM Proxy service
-export NEMO_URL="http://nemo.test"
-
-curl --location "$NEMO_URL/v1/deployment/model-deployments" \
-   -H 'accept: application/json' \
-   -H 'Content-Type: application/json' \
-   -d '{
-      "name": "llama-3.2-1b-instruct",
-      "namespace": "meta",
-      "config": {
-         "model": "meta/llama-3.2-1b-instruct",
-         "nim_deployment": {
-            "image_name": "nvcr.io/nim/meta/llama-3.2-1b-instruct",
-            "image_tag": "1.8.3",
-            "pvc_size": "25Gi",
-            "gpu": 1,
-            "additional_envs": {
-               "NIM_GUIDED_DECODING_BACKEND": "fast_outlines"
-            }
-         }
-      }
-   }'
-```
-This NIM deployment should take approximately 10 minutes to go live. [See the docs](https://docs.nvidia.com/nemo/microservices/latest/get-started/tutorials/deploy-nims.html) for more information on how to deploy a NIM and verify it's available for inference.
-
-You can also remove a deployed NIM to free up GPU resources, if needed.
-```sh
-export NEMO_URL="http://nemo.test"
-
-curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-instruct"
-```
-
-## Running Llama Stack with NVIDIA
-
-You can do this via venv (build code), or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
-  -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
-  llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT
-```
-
-### Via venv
-
-If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
-
-```bash
-INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
-llama stack list-deps nvidia | xargs -L1 uv pip install
-NVIDIA_API_KEY=$NVIDIA_API_KEY \
-INFERENCE_MODEL=$INFERENCE_MODEL \
-llama stack run ./run.yaml \
-  --port 8321
-```
-
-## Example Notebooks
-For examples of how to use the NVIDIA Distribution to run inference, fine-tune, evaluate, and run safety checks on your LLMs, you can reference the example notebooks in [docs/notebooks/nvidia](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks/nvidia).
--- a/llama_stack/distributions/nvidia/nvidia.py
+++ b/llama_stack/distributions/nvidia/nvidia.py
@ -1,154 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ShieldInput, ToolGroupInput
-from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
-from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
-from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig
-from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
-from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
-from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
-
-
-def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
-    providers = {
-        "inference": [BuildProvider(provider_type="remote::nvidia")],
-        "vector_io": [BuildProvider(provider_type="inline::faiss")],
-        "safety": [BuildProvider(provider_type="remote::nvidia")],
-        "agents": [BuildProvider(provider_type="inline::meta-reference")],
-        "eval": [BuildProvider(provider_type="remote::nvidia")],
-        "post_training": [BuildProvider(provider_type="remote::nvidia")],
-        "datasetio": [
-            BuildProvider(provider_type="inline::localfs"),
-            BuildProvider(provider_type="remote::nvidia"),
-        ],
-        "scoring": [BuildProvider(provider_type="inline::basic")],
-        "tool_runtime": [BuildProvider(provider_type="inline::rag-runtime")],
-        "files": [BuildProvider(provider_type="inline::localfs")],
-    }
-
-    inference_provider = Provider(
-        provider_id="nvidia",
-        provider_type="remote::nvidia",
-        config=NVIDIAConfig.sample_run_config(),
-    )
-    safety_provider = Provider(
-        provider_id="nvidia",
-        provider_type="remote::nvidia",
-        config=NVIDIASafetyConfig.sample_run_config(),
-    )
-    datasetio_provider = Provider(
-        provider_id="nvidia",
-        provider_type="remote::nvidia",
-        config=NvidiaDatasetIOConfig.sample_run_config(),
-    )
-    eval_provider = Provider(
-        provider_id="nvidia",
-        provider_type="remote::nvidia",
-        config=NVIDIAEvalConfig.sample_run_config(),
-    )
-    files_provider = Provider(
-        provider_id="meta-reference-files",
-        provider_type="inline::localfs",
-        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-    inference_model = ModelInput(
-        model_id="${env.INFERENCE_MODEL}",
-        provider_id="nvidia",
-    )
-    safety_model = ModelInput(
-        model_id="${env.SAFETY_MODEL}",
-        provider_id="nvidia",
-    )
-
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use NVIDIA NIM for running LLM inference, evaluation and safety",
-        container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider],
-                    "datasetio": [datasetio_provider],
-                    "eval": [eval_provider],
-                    "files": [files_provider],
-                },
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        safety_provider,
-                    ],
-                    "eval": [eval_provider],
-                    "files": [files_provider],
-                },
-                default_models=[inference_model, safety_model],
-                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "NVIDIA_API_KEY": (
-                "",
-                "NVIDIA API Key",
-            ),
-            "NVIDIA_APPEND_API_VERSION": (
-                "True",
-                "Whether to append the API version to the base_url",
-            ),
-            ## Nemo Customizer related variables
-            "NVIDIA_DATASET_NAMESPACE": (
-                "default",
-                "NVIDIA Dataset Namespace",
-            ),
-            "NVIDIA_PROJECT_ID": (
-                "test-project",
-                "NVIDIA Project ID",
-            ),
-            "NVIDIA_CUSTOMIZER_URL": (
-                "https://customizer.api.nvidia.com",
-                "NVIDIA Customizer URL",
-            ),
-            "NVIDIA_OUTPUT_MODEL_DIR": (
-                "test-example-model@v1",
-                "NVIDIA Output Model Directory",
-            ),
-            "GUARDRAILS_SERVICE_URL": (
-                "http://0.0.0.0:7331",
-                "URL for the NeMo Guardrails Service",
-            ),
-            "NVIDIA_GUARDRAILS_CONFIG_ID": (
-                "self-check",
-                "NVIDIA Guardrail Configuration ID",
-            ),
-            "NVIDIA_EVALUATOR_URL": (
-                "http://0.0.0.0:7331",
-                "URL for the NeMo Evaluator Service",
-            ),
-            "INFERENCE_MODEL": (
-                "Llama3.1-8B-Instruct",
-                "Inference model",
-            ),
-            "SAFETY_MODEL": (
-                "meta/llama-3.1-8b-instruct",
-                "Name of the model to use for safety",
-            ),
-        },
-    )
--- a/llama_stack/distributions/open-benchmark/init.py
+++ b/llama_stack/distributions/open-benchmark/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .open_benchmark import get_distribution_template  # noqa: F401
--- a/llama_stack/distributions/open-benchmark/open_benchmark.py
+++ b/llama_stack/distributions/open-benchmark/open_benchmark.py
@ -1,303 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
-from llama_stack.apis.models import ModelType
-from llama_stack.core.datatypes import (
-    BenchmarkInput,
-    BuildProvider,
-    DatasetInput,
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.distributions.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-    get_model_registry,
-)
-from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
-    SQLiteVectorIOConfig,
-)
-from llama_stack.providers.remote.inference.anthropic.config import AnthropicConfig
-from llama_stack.providers.remote.inference.gemini.config import GeminiConfig
-from llama_stack.providers.remote.inference.groq.config import GroqConfig
-from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
-from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
-from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
-from llama_stack.providers.remote.vector_io.pgvector.config import (
-    PGVectorVectorIOConfig,
-)
-from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
-
-
-def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
-    # in this template, we allow each API key to be optional
-    providers = [
-        (
-            "openai",
-            [
-                ProviderModelEntry(
-                    provider_model_id="gpt-4o",
-                    model_type=ModelType.llm,
-                )
-            ],
-            OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:=}"),
-        ),
-        (
-            "anthropic",
-            [
-                ProviderModelEntry(
-                    provider_model_id="claude-3-5-sonnet-latest",
-                    model_type=ModelType.llm,
-                )
-            ],
-            AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:=}"),
-        ),
-        (
-            "gemini",
-            [
-                ProviderModelEntry(
-                    provider_model_id="gemini/gemini-1.5-flash",
-                    model_type=ModelType.llm,
-                )
-            ],
-            GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:=}"),
-        ),
-        (
-            "groq",
-            [],
-            GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:=}"),
-        ),
-        (
-            "together",
-            [],
-            TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:=}"),
-        ),
-    ]
-    inference_providers = []
-    available_models = {}
-    for provider_id, model_entries, config in providers:
-        inference_providers.append(
-            Provider(
-                provider_id=provider_id,
-                provider_type=f"remote::{provider_id}",
-                config=config,
-            )
-        )
-        available_models[provider_id] = model_entries
-    return inference_providers, available_models
-
-
-def get_distribution_template() -> DistributionTemplate:
-    inference_providers, available_models = get_inference_providers()
-    providers = {
-        "inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in inference_providers],
-        "vector_io": [
-            BuildProvider(provider_type="inline::sqlite-vec"),
-            BuildProvider(provider_type="remote::chromadb"),
-            BuildProvider(provider_type="remote::pgvector"),
-        ],
-        "safety": [BuildProvider(provider_type="inline::llama-guard")],
-        "agents": [BuildProvider(provider_type="inline::meta-reference")],
-        "eval": [BuildProvider(provider_type="inline::meta-reference")],
-        "datasetio": [
-            BuildProvider(provider_type="remote::huggingface"),
-            BuildProvider(provider_type="inline::localfs"),
-        ],
-        "scoring": [
-            BuildProvider(provider_type="inline::basic"),
-            BuildProvider(provider_type="inline::llm-as-judge"),
-            BuildProvider(provider_type="inline::braintrust"),
-        ],
-        "tool_runtime": [
-            BuildProvider(provider_type="remote::brave-search"),
-            BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
-            BuildProvider(provider_type="remote::model-context-protocol"),
-        ],
-    }
-    name = "open-benchmark"
-
-    vector_io_providers = [
-        Provider(
-            provider_id="sqlite-vec",
-            provider_type="inline::sqlite-vec",
-            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-        ),
-        Provider(
-            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
-            provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(
-                f"~/.llama/distributions/{name}", url="${env.CHROMADB_URL:=}"
-            ),
-        ),
-        Provider(
-            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
-            provider_type="remote::pgvector",
-            config=PGVectorVectorIOConfig.sample_run_config(
-                f"~/.llama/distributions/{name}",
-                db="${env.PGVECTOR_DB:=}",
-                user="${env.PGVECTOR_USER:=}",
-                password="${env.PGVECTOR_PASSWORD:=}",
-            ),
-        ),
-    ]
-
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    models, _ = get_model_registry(available_models)
-    default_models = models + [
-        ModelInput(
-            model_id="meta-llama/Llama-3.3-70B-Instruct",
-            provider_id="groq",
-            provider_model_id="groq/llama-3.3-70b-versatile",
-            model_type=ModelType.llm,
-        ),
-        ModelInput(
-            model_id="meta-llama/Llama-3.1-405B-Instruct",
-            provider_id="together",
-            provider_model_id="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
-            model_type=ModelType.llm,
-        ),
-    ]
-
-    default_datasets = [
-        DatasetInput(
-            dataset_id="simpleqa",
-            purpose=DatasetPurpose.eval_messages_answer,
-            source=URIDataSource(
-                uri="huggingface://datasets/llamastack/simpleqa?split=train",
-            ),
-        ),
-        DatasetInput(
-            dataset_id="mmlu_cot",
-            purpose=DatasetPurpose.eval_messages_answer,
-            source=URIDataSource(
-                uri="huggingface://datasets/llamastack/mmlu_cot?split=test&name=all",
-            ),
-        ),
-        DatasetInput(
-            dataset_id="gpqa_cot",
-            purpose=DatasetPurpose.eval_messages_answer,
-            source=URIDataSource(
-                uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main",
-            ),
-        ),
-        DatasetInput(
-            dataset_id="math_500",
-            purpose=DatasetPurpose.eval_messages_answer,
-            source=URIDataSource(
-                uri="huggingface://datasets/llamastack/math_500?split=test",
-            ),
-        ),
-        DatasetInput(
-            dataset_id="ifeval",
-            purpose=DatasetPurpose.eval_messages_answer,
-            source=URIDataSource(
-                uri="huggingface://datasets/llamastack/IfEval?split=train",
-            ),
-        ),
-        DatasetInput(
-            dataset_id="docvqa",
-            purpose=DatasetPurpose.eval_messages_answer,
-            source=URIDataSource(
-                uri="huggingface://datasets/llamastack/docvqa?split=val",
-            ),
-        ),
-    ]
-
-    default_benchmarks = [
-        BenchmarkInput(
-            benchmark_id="meta-reference-simpleqa",
-            dataset_id="simpleqa",
-            scoring_functions=["llm-as-judge::405b-simpleqa"],
-        ),
-        BenchmarkInput(
-            benchmark_id="meta-reference-mmlu-cot",
-            dataset_id="mmlu_cot",
-            scoring_functions=["basic::regex_parser_multiple_choice_answer"],
-        ),
-        BenchmarkInput(
-            benchmark_id="meta-reference-gpqa-cot",
-            dataset_id="gpqa_cot",
-            scoring_functions=["basic::regex_parser_multiple_choice_answer"],
-        ),
-        BenchmarkInput(
-            benchmark_id="meta-reference-math-500",
-            dataset_id="math_500",
-            scoring_functions=["basic::regex_parser_math_response"],
-        ),
-        BenchmarkInput(
-            benchmark_id="meta-reference-ifeval",
-            dataset_id="ifeval",
-            scoring_functions=["basic::ifeval"],
-        ),
-        BenchmarkInput(
-            benchmark_id="meta-reference-docvqa",
-            dataset_id="docvqa",
-            scoring_functions=["basic::docvqa"],
-        ),
-    ]
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Distribution for running open benchmarks",
-        container_image=None,
-        template_path=None,
-        providers=providers,
-        available_models_by_provider=available_models,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": inference_providers,
-                    "vector_io": vector_io_providers,
-                },
-                default_models=default_models,
-                default_tool_groups=default_tool_groups,
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-                default_datasets=default_datasets,
-                default_benchmarks=default_benchmarks,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "TOGETHER_API_KEY": (
-                "",
-                "Together API Key",
-            ),
-            "OPENAI_API_KEY": (
-                "",
-                "OpenAI API Key",
-            ),
-            "GEMINI_API_KEY": (
-                "",
-                "Gemini API Key",
-            ),
-            "ANTHROPIC_API_KEY": (
-                "",
-                "Anthropic API Key",
-            ),
-            "GROQ_API_KEY": (
-                "",
-                "Groq API Key",
-            ),
-        },
-    )
--- a/llama_stack/distributions/postgres-demo/init.py
+++ b/llama_stack/distributions/postgres-demo/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .postgres_demo import get_distribution_template  # noqa: F401
--- a/llama_stack/distributions/postgres-demo/postgres_demo.py
+++ b/llama_stack/distributions/postgres-demo/postgres_demo.py
@ -1,125 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from llama_stack.apis.models import ModelType
-from llama_stack.core.datatypes import (
-    BuildProvider,
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.distributions.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig
-from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
-from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
-from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
-
-
-def get_distribution_template() -> DistributionTemplate:
-    inference_providers = [
-        Provider(
-            provider_id="vllm-inference",
-            provider_type="remote::vllm",
-            config=VLLMInferenceAdapterConfig.sample_run_config(
-                url="${env.VLLM_URL:=http://localhost:8000/v1}",
-            ),
-        ),
-    ]
-    providers = {
-        "inference": [
-            BuildProvider(provider_type="remote::vllm"),
-            BuildProvider(provider_type="inline::sentence-transformers"),
-        ],
-        "vector_io": [BuildProvider(provider_type="remote::chromadb")],
-        "safety": [BuildProvider(provider_type="inline::llama-guard")],
-        "agents": [BuildProvider(provider_type="inline::meta-reference")],
-        "tool_runtime": [
-            BuildProvider(provider_type="remote::brave-search"),
-            BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
-            BuildProvider(provider_type="remote::model-context-protocol"),
-        ],
-    }
-    name = "postgres-demo"
-
-    vector_io_providers = [
-        Provider(
-            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
-            provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(
-                f"~/.llama/distributions/{name}",
-                url="${env.CHROMADB_URL:=}",
-            ),
-        ),
-    ]
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    default_models = [
-        ModelInput(
-            model_id="${env.INFERENCE_MODEL}",
-            provider_id="vllm-inference",
-        )
-    ]
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    embedding_model = ModelInput(
-        model_id="nomic-embed-text-v1.5",
-        provider_id=embedding_provider.provider_id,
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 768,
-        },
-    )
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Quick start template for running Llama Stack with several popular providers",
-        container_image=None,
-        template_path=None,
-        providers=providers,
-        available_models_by_provider={},
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": inference_providers + [embedding_provider],
-                    "vector_io": vector_io_providers,
-                },
-                default_models=default_models + [embedding_model],
-                default_tool_groups=default_tool_groups,
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-                storage_backends={
-                    "kv_default": PostgresKVStoreConfig.sample_run_config(
-                        table_name="llamastack_kvstore",
-                    ),
-                    "sql_default": PostgresSqlStoreConfig.sample_run_config(),
-                },
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-        },
-    )
--- a/llama_stack/distributions/starter-gpu/init.py
+++ b/llama_stack/distributions/starter-gpu/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .starter_gpu import get_distribution_template  # noqa: F401
--- a/llama_stack/distributions/starter-gpu/starter_gpu.py
+++ b/llama_stack/distributions/starter-gpu/starter_gpu.py
@ -1,20 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from llama_stack.distributions.template import BuildProvider, DistributionTemplate
-
-from ..starter.starter import get_distribution_template as get_starter_distribution_template
-
-
-def get_distribution_template() -> DistributionTemplate:
-    template = get_starter_distribution_template(name="starter-gpu")
-    template.description = "Quick start template for running Llama Stack with several popular providers. This distribution is intended for GPU-enabled environments."
-
-    template.providers["post_training"] = [
-        BuildProvider(provider_type="inline::huggingface-gpu"),
-    ]
-    return template
--- a/llama_stack/distributions/starter/init.py
+++ b/llama_stack/distributions/starter/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .starter import get_distribution_template  # noqa: F401
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@ -1,331 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from typing import Any
-
-from llama_stack.core.datatypes import (
-    BuildProvider,
-    Provider,
-    ProviderSpec,
-    QualifiedModel,
-    SafetyConfig,
-    ShieldInput,
-    ToolGroupInput,
-    VectorStoresConfig,
-)
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
-from llama_stack.providers.datatypes import RemoteProviderSpec
-from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.inline.vector_io.milvus.config import MilvusVectorIOConfig
-from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
-    SQLiteVectorIOConfig,
-)
-from llama_stack.providers.registry.inference import available_providers
-from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
-from llama_stack.providers.remote.vector_io.pgvector.config import (
-    PGVectorVectorIOConfig,
-)
-from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
-from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
-
-
-def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]:
-    """Get configuration for a provider using its adapter's config class."""
-    config_class = instantiate_class_type(provider_spec.config_class)
-
-    if hasattr(config_class, "sample_run_config"):
-        config: dict[str, Any] = config_class.sample_run_config()
-        return config
-    return {}
-
-
-ENABLED_INFERENCE_PROVIDERS = [
-    "ollama",
-    "vllm",
-    "tgi",
-    "fireworks",
-    "together",
-    "gemini",
-    "vertexai",
-    "groq",
-    "sambanova",
-    "anthropic",
-    "openai",
-    "cerebras",
-    "nvidia",
-    "bedrock",
-    "azure",
-]
-
-INFERENCE_PROVIDER_IDS = {
-    "ollama": "${env.OLLAMA_URL:+ollama}",
-    "vllm": "${env.VLLM_URL:+vllm}",
-    "tgi": "${env.TGI_URL:+tgi}",
-    "cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
-    "nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
-    "vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
-    "azure": "${env.AZURE_API_KEY:+azure}",
-}
-
-
-def get_remote_inference_providers() -> list[Provider]:
-    # Filter out inline providers and some others - the starter distro only exposes remote providers
-    remote_providers = [
-        provider
-        for provider in available_providers()
-        if isinstance(provider, RemoteProviderSpec) and provider.adapter_type in ENABLED_INFERENCE_PROVIDERS
-    ]
-
-    inference_providers = []
-    for provider_spec in remote_providers:
-        provider_type = provider_spec.adapter_type
-
-        if provider_type in INFERENCE_PROVIDER_IDS:
-            provider_id = INFERENCE_PROVIDER_IDS[provider_type]
-        else:
-            provider_id = provider_type.replace("-", "_").replace("::", "_")
-        config = _get_config_for_provider(provider_spec)
-
-        inference_providers.append(
-            Provider(
-                provider_id=provider_id,
-                provider_type=f"remote::{provider_type}",
-                config=config,
-            )
-        )
-    return inference_providers
-
-
-def get_distribution_template(name: str = "starter") -> DistributionTemplate:
-    remote_inference_providers = get_remote_inference_providers()
-
-    providers = {
-        "inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers]
-        + [BuildProvider(provider_type="inline::sentence-transformers")],
-        "vector_io": [
-            BuildProvider(provider_type="inline::faiss"),
-            BuildProvider(provider_type="inline::sqlite-vec"),
-            BuildProvider(provider_type="inline::milvus"),
-            BuildProvider(provider_type="remote::chromadb"),
-            BuildProvider(provider_type="remote::pgvector"),
-            BuildProvider(provider_type="remote::qdrant"),
-            BuildProvider(provider_type="remote::weaviate"),
-        ],
-        "files": [BuildProvider(provider_type="inline::localfs")],
-        "safety": [
-            BuildProvider(provider_type="inline::llama-guard"),
-            BuildProvider(provider_type="inline::code-scanner"),
-        ],
-        "agents": [BuildProvider(provider_type="inline::meta-reference")],
-        "post_training": [BuildProvider(provider_type="inline::torchtune-cpu")],
-        "eval": [BuildProvider(provider_type="inline::meta-reference")],
-        "datasetio": [
-            BuildProvider(provider_type="remote::huggingface"),
-            BuildProvider(provider_type="inline::localfs"),
-        ],
-        "scoring": [
-            BuildProvider(provider_type="inline::basic"),
-            BuildProvider(provider_type="inline::llm-as-judge"),
-            BuildProvider(provider_type="inline::braintrust"),
-        ],
-        "tool_runtime": [
-            BuildProvider(provider_type="remote::brave-search"),
-            BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
-            BuildProvider(provider_type="remote::model-context-protocol"),
-        ],
-        "batches": [
-            BuildProvider(provider_type="inline::reference"),
-        ],
-    }
-    files_provider = Provider(
-        provider_id="meta-reference-files",
-        provider_type="inline::localfs",
-        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-    default_shields = [
-        # if the
-        ShieldInput(
-            shield_id="llama-guard",
-            provider_id="${env.SAFETY_MODEL:+llama-guard}",
-            provider_shield_id="${env.SAFETY_MODEL:=}",
-        ),
-        ShieldInput(
-            shield_id="code-scanner",
-            provider_id="${env.CODE_SCANNER_MODEL:+code-scanner}",
-            provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Quick start template for running Llama Stack with several popular providers. This distribution is intended for CPU-only environments.",
-        container_image=None,
-        template_path=None,
-        providers=providers,
-        additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": remote_inference_providers + [embedding_provider],
-                    "vector_io": [
-                        Provider(
-                            provider_id="faiss",
-                            provider_type="inline::faiss",
-                            config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="sqlite-vec",
-                            provider_type="inline::sqlite-vec",
-                            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="${env.MILVUS_URL:+milvus}",
-                            provider_type="inline::milvus",
-                            config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="${env.CHROMADB_URL:+chromadb}",
-                            provider_type="remote::chromadb",
-                            config=ChromaVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}/",
-                                url="${env.CHROMADB_URL:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.PGVECTOR_DB:+pgvector}",
-                            provider_type="remote::pgvector",
-                            config=PGVectorVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                db="${env.PGVECTOR_DB:=}",
-                                user="${env.PGVECTOR_USER:=}",
-                                password="${env.PGVECTOR_PASSWORD:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.QDRANT_URL:+qdrant}",
-                            provider_type="remote::qdrant",
-                            config=QdrantVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                url="${env.QDRANT_URL:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
-                            provider_type="remote::weaviate",
-                            config=WeaviateVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
-                            ),
-                        ),
-                    ],
-                    "files": [files_provider],
-                },
-                default_models=[],
-                default_tool_groups=default_tool_groups,
-                default_shields=default_shields,
-                vector_stores_config=VectorStoresConfig(
-                    default_provider_id="faiss",
-                    default_embedding_model=QualifiedModel(
-                        provider_id="sentence-transformers",
-                        model_id="nomic-ai/nomic-embed-text-v1.5",
-                    ),
-                ),
-                safety_config=SafetyConfig(
-                    default_shield_id="llama-guard",
-                ),
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "FIREWORKS_API_KEY": (
-                "",
-                "Fireworks API Key",
-            ),
-            "OPENAI_API_KEY": (
-                "",
-                "OpenAI API Key",
-            ),
-            "GROQ_API_KEY": (
-                "",
-                "Groq API Key",
-            ),
-            "ANTHROPIC_API_KEY": (
-                "",
-                "Anthropic API Key",
-            ),
-            "GEMINI_API_KEY": (
-                "",
-                "Gemini API Key",
-            ),
-            "VERTEX_AI_PROJECT": (
-                "",
-                "Google Cloud Project ID for Vertex AI",
-            ),
-            "VERTEX_AI_LOCATION": (
-                "us-central1",
-                "Google Cloud Location for Vertex AI",
-            ),
-            "SAMBANOVA_API_KEY": (
-                "",
-                "SambaNova API Key",
-            ),
-            "VLLM_URL": (
-                "http://localhost:8000/v1",
-                "vLLM URL",
-            ),
-            "VLLM_INFERENCE_MODEL": (
-                "",
-                "Optional vLLM Inference Model to register on startup",
-            ),
-            "OLLAMA_URL": (
-                "http://localhost:11434",
-                "Ollama URL",
-            ),
-            "AZURE_API_KEY": (
-                "",
-                "Azure API Key",
-            ),
-            "AZURE_API_BASE": (
-                "",
-                "Azure API Base",
-            ),
-            "AZURE_API_VERSION": (
-                "",
-                "Azure API Version",
-            ),
-            "AZURE_API_TYPE": (
-                "azure",
-                "Azure API Type",
-            ),
-        },
-    )
--- a/llama_stack/distributions/template.py
+++ b/llama_stack/distributions/template.py
@ -1,465 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-from typing import Any, Literal
-
-import jinja2
-import rich
-import yaml
-from pydantic import BaseModel, Field
-
-from llama_stack.apis.datasets import DatasetPurpose
-from llama_stack.apis.models import ModelType
-from llama_stack.core.datatypes import (
-    LLAMA_STACK_RUN_CONFIG_VERSION,
-    Api,
-    BenchmarkInput,
-    BuildConfig,
-    BuildProvider,
-    DatasetInput,
-    DistributionSpec,
-    ModelInput,
-    Provider,
-    SafetyConfig,
-    ShieldInput,
-    TelemetryConfig,
-    ToolGroupInput,
-    VectorStoresConfig,
-)
-from llama_stack.core.distribution import get_provider_registry
-from llama_stack.core.storage.datatypes import (
-    InferenceStoreReference,
-    KVStoreReference,
-    SqlStoreReference,
-    StorageBackendType,
-)
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.core.utils.image_types import LlamaStackImageType
-from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
-from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
-
-
-def filter_empty_values(obj: Any) -> Any:
-    """Recursively filter out specific empty values from a dictionary or list.
-
-    This function removes:
-    - Empty strings ('') only when they are the 'module' field
-    - Empty dictionaries ({}) only when they are the 'config' field
-    - None values (always excluded)
-    """
-    if obj is None:
-        return None
-
-    if isinstance(obj, dict):
-        filtered = {}
-        for key, value in obj.items():
-            # Special handling for specific fields
-            if key == "module" and isinstance(value, str) and value == "":
-                # Skip empty module strings
-                continue
-            elif key == "config" and isinstance(value, dict) and not value:
-                # Skip empty config dictionaries
-                continue
-            elif key == "container_image" and not value:
-                # Skip empty container_image names
-                continue
-            else:
-                # For all other fields, recursively filter but preserve empty values
-                filtered_value = filter_empty_values(value)
-                # if filtered_value is not None:
-                filtered[key] = filtered_value
-        return filtered
-
-    elif isinstance(obj, list):
-        filtered = []
-        for item in obj:
-            filtered_item = filter_empty_values(item)
-            if filtered_item is not None:
-                filtered.append(filtered_item)
-        return filtered
-
-    else:
-        # For all other types (including empty strings and dicts that aren't module/config),
-        # preserve them as-is
-        return obj
-
-
-def get_model_registry(
-    available_models: dict[str, list[ProviderModelEntry]],
-) -> tuple[list[ModelInput], bool]:
-    models = []
-
-    # check for conflicts in model ids
-    all_ids = set()
-    ids_conflict = False
-
-    for _, entries in available_models.items():
-        for entry in entries:
-            ids = [entry.provider_model_id] + entry.aliases
-            for model_id in ids:
-                if model_id in all_ids:
-                    ids_conflict = True
-                    rich.print(
-                        f"[yellow]Model id {model_id} conflicts; all model ids will be prefixed with provider id[/yellow]"
-                    )
-                    break
-            all_ids.update(ids)
-            if ids_conflict:
-                break
-        if ids_conflict:
-            break
-
-    for provider_id, entries in available_models.items():
-        for entry in entries:
-            ids = [entry.provider_model_id] + entry.aliases
-            for model_id in ids:
-                identifier = f"{provider_id}/{model_id}" if ids_conflict and provider_id not in model_id else model_id
-                models.append(
-                    ModelInput(
-                        model_id=identifier,
-                        provider_model_id=entry.provider_model_id,
-                        provider_id=provider_id,
-                        model_type=entry.model_type,
-                        metadata=entry.metadata,
-                    )
-                )
-    return models, ids_conflict
-
-
-def get_shield_registry(
-    available_safety_models: dict[str, list[ProviderModelEntry]],
-    ids_conflict_in_models: bool,
-) -> list[ShieldInput]:
-    shields = []
-
-    # check for conflicts in shield ids
-    all_ids = set()
-    ids_conflict = False
-
-    for _, entries in available_safety_models.items():
-        for entry in entries:
-            ids = [entry.provider_model_id] + entry.aliases
-            for model_id in ids:
-                if model_id in all_ids:
-                    ids_conflict = True
-                    rich.print(
-                        f"[yellow]Shield id {model_id} conflicts; all shield ids will be prefixed with provider id[/yellow]"
-                    )
-                    break
-            all_ids.update(ids)
-            if ids_conflict:
-                break
-        if ids_conflict:
-            break
-
-    for provider_id, entries in available_safety_models.items():
-        for entry in entries:
-            ids = [entry.provider_model_id] + entry.aliases
-            for model_id in ids:
-                identifier = f"{provider_id}/{model_id}" if ids_conflict and provider_id not in model_id else model_id
-                shields.append(
-                    ShieldInput(
-                        shield_id=identifier,
-                        provider_shield_id=f"{provider_id}/{entry.provider_model_id}"
-                        if ids_conflict_in_models
-                        else entry.provider_model_id,
-                    )
-                )
-
-    return shields
-
-
-class DefaultModel(BaseModel):
-    model_id: str
-    doc_string: str
-
-
-class RunConfigSettings(BaseModel):
-    provider_overrides: dict[str, list[Provider]] = Field(default_factory=dict)
-    default_models: list[ModelInput] | None = None
-    default_shields: list[ShieldInput] | None = None
-    default_tool_groups: list[ToolGroupInput] | None = None
-    default_datasets: list[DatasetInput] | None = None
-    default_benchmarks: list[BenchmarkInput] | None = None
-    vector_stores_config: VectorStoresConfig | None = None
-    safety_config: SafetyConfig | None = None
-    telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
-    storage_backends: dict[str, Any] | None = None
-    storage_stores: dict[str, Any] | None = None
-
-    def run_config(
-        self,
-        name: str,
-        providers: dict[str, list[BuildProvider]],
-        container_image: str | None = None,
-    ) -> dict:
-        provider_registry = get_provider_registry()
-        provider_configs = {}
-        for api_str, provider_objs in providers.items():
-            if api_providers := self.provider_overrides.get(api_str):
-                # Convert Provider objects to dicts for YAML serialization
-                provider_configs[api_str] = [p.model_dump(exclude_none=True) for p in api_providers]
-                continue
-
-            provider_configs[api_str] = []
-            for provider in provider_objs:
-                api = Api(api_str)
-                if provider.provider_type not in provider_registry[api]:
-                    raise ValueError(f"Unknown provider type: {provider.provider_type} for API: {api_str}")
-                provider_id = provider.provider_type.split("::")[-1]
-                config_class = provider_registry[api][provider.provider_type].config_class
-                assert config_class is not None, (
-                    f"No config class for provider type: {provider.provider_type} for API: {api_str}"
-                )
-
-                config_class = instantiate_class_type(config_class)
-                if hasattr(config_class, "sample_run_config"):
-                    config = config_class.sample_run_config(__distro_dir__=f"~/.llama/distributions/{name}")
-                else:
-                    config = {}
-                # BuildProvider does not have a config attribute; skip assignment
-                provider_configs[api_str].append(
-                    Provider(
-                        provider_id=provider_id,
-                        provider_type=provider.provider_type,
-                        config=config,
-                    ).model_dump(exclude_none=True)
-                )
-        # Get unique set of APIs from providers
-        apis = sorted(providers.keys())
-
-        storage_backends = self.storage_backends or {
-            "kv_default": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=f"~/.llama/distributions/{name}",
-                db_name="kvstore.db",
-            ),
-            "sql_default": SqliteSqlStoreConfig.sample_run_config(
-                __distro_dir__=f"~/.llama/distributions/{name}",
-                db_name="sql_store.db",
-            ),
-        }
-
-        storage_stores = self.storage_stores or {
-            "metadata": KVStoreReference(
-                backend="kv_default",
-                namespace="registry",
-            ).model_dump(exclude_none=True),
-            "inference": InferenceStoreReference(
-                backend="sql_default",
-                table_name="inference_store",
-            ).model_dump(exclude_none=True),
-            "conversations": SqlStoreReference(
-                backend="sql_default",
-                table_name="openai_conversations",
-            ).model_dump(exclude_none=True),
-            "prompts": KVStoreReference(
-                backend="kv_default",
-                namespace="prompts",
-            ).model_dump(exclude_none=True),
-        }
-
-        storage_config = dict(
-            backends=storage_backends,
-            stores=storage_stores,
-        )
-
-        # Return a dict that matches StackRunConfig structure
-        config = {
-            "version": LLAMA_STACK_RUN_CONFIG_VERSION,
-            "image_name": name,
-            "container_image": container_image,
-            "apis": apis,
-            "providers": provider_configs,
-            "storage": storage_config,
-            "registered_resources": {
-                "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
-                "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
-                "vector_dbs": [],
-                "datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])],
-                "scoring_fns": [],
-                "benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])],
-                "tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])],
-            },
-            "server": {
-                "port": 8321,
-            },
-            "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
-        }
-
-        if self.vector_stores_config:
-            config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
-
-        if self.safety_config:
-            config["safety"] = self.safety_config.model_dump(exclude_none=True)
-
-        return config
-
-
-class DistributionTemplate(BaseModel):
-    """
-    Represents a Llama Stack distribution instance that can generate configuration
-    and documentation files.
-    """
-
-    name: str
-    description: str
-    distro_type: Literal["self_hosted", "remote_hosted", "ondevice"]
-
-    # Now uses BuildProvider for build config, not Provider
-    providers: dict[str, list[BuildProvider]]
-    run_configs: dict[str, RunConfigSettings]
-    template_path: Path | None = None
-
-    # Optional configuration
-    run_config_env_vars: dict[str, tuple[str, str]] | None = None
-    container_image: str | None = None
-
-    available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None
-
-    # we may want to specify additional pip packages without necessarily indicating a
-    # specific "default" inference store (which is what typically used to dictate additional
-    # pip packages)
-    additional_pip_packages: list[str] | None = None
-
-    def build_config(self) -> BuildConfig:
-        additional_pip_packages: list[str] = []
-        for run_config in self.run_configs.values():
-            run_config_ = run_config.run_config(self.name, self.providers, self.container_image)
-
-            # TODO: This is a hack to get the dependencies for internal APIs into build
-            # We should have a better way to do this by formalizing the concept of "internal" APIs
-            # and providers, with a way to specify dependencies for them.
-
-            storage_cfg = run_config_.get("storage", {})
-            for backend_cfg in storage_cfg.get("backends", {}).values():
-                store_type = backend_cfg.get("type")
-                if not store_type:
-                    continue
-                if str(store_type).startswith("kv_"):
-                    additional_pip_packages.extend(get_kv_pip_packages(backend_cfg))
-                elif str(store_type).startswith("sql_"):
-                    additional_pip_packages.extend(get_sql_pip_packages(backend_cfg))
-
-        if self.additional_pip_packages:
-            additional_pip_packages.extend(self.additional_pip_packages)
-
-        # Create minimal providers for build config (without runtime configs)
-        build_providers = {}
-        for api, providers in self.providers.items():
-            build_providers[api] = []
-            for provider in providers:
-                # Create a minimal build provider object with only essential build information
-                build_provider = BuildProvider(
-                    provider_type=provider.provider_type,
-                    module=provider.module,
-                )
-                build_providers[api].append(build_provider)
-
-        return BuildConfig(
-            distribution_spec=DistributionSpec(
-                description=self.description,
-                container_image=self.container_image,
-                providers=build_providers,
-            ),
-            image_type=LlamaStackImageType.VENV.value,  # default to venv
-            additional_pip_packages=sorted(set(additional_pip_packages)),
-        )
-
-    def generate_markdown_docs(self) -> str:
-        providers_table = "| API | Provider(s) |\n"
-        providers_table += "|-----|-------------|\n"
-
-        for api, providers in sorted(self.providers.items()):
-            providers_str = ", ".join(f"`{p.provider_type}`" for p in providers)
-            providers_table += f"| {api} | {providers_str} |\n"
-
-        if self.template_path is not None:
-            template = self.template_path.read_text()
-            comment = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
-            orphantext = "---\norphan: true\n---\n"
-
-            if template.startswith(orphantext):
-                template = template.replace(orphantext, orphantext + comment)
-            else:
-                template = comment + template
-
-            # Render template with rich-generated table
-            env = jinja2.Environment(
-                trim_blocks=True,
-                lstrip_blocks=True,
-                # NOTE: autoescape is required to prevent XSS attacks
-                autoescape=True,
-            )
-            template = env.from_string(template)
-
-            default_models = []
-            if self.available_models_by_provider:
-                has_multiple_providers = len(self.available_models_by_provider.keys()) > 1
-                for provider_id, model_entries in self.available_models_by_provider.items():
-                    for model_entry in model_entries:
-                        doc_parts = []
-                        if model_entry.aliases:
-                            doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}")
-                        if has_multiple_providers:
-                            doc_parts.append(f"provider: {provider_id}")
-
-                        default_models.append(
-                            DefaultModel(
-                                model_id=model_entry.provider_model_id,
-                                doc_string=(f"({' -- '.join(doc_parts)})" if doc_parts else ""),
-                            )
-                        )
-
-            return template.render(
-                name=self.name,
-                description=self.description,
-                providers=self.providers,
-                providers_table=providers_table,
-                run_config_env_vars=self.run_config_env_vars,
-                default_models=default_models,
-            )
-        return ""
-
-    def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None:
-        def enum_representer(dumper, data):
-            return dumper.represent_scalar("tag:yaml.org,2002:str", data.value)
-
-        # Register YAML representer for enums
-        yaml.add_representer(ModelType, enum_representer)
-        yaml.add_representer(DatasetPurpose, enum_representer)
-        yaml.add_representer(StorageBackendType, enum_representer)
-        yaml.SafeDumper.add_representer(ModelType, enum_representer)
-        yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer)
-        yaml.SafeDumper.add_representer(StorageBackendType, enum_representer)
-
-        for output_dir in [yaml_output_dir, doc_output_dir]:
-            output_dir.mkdir(parents=True, exist_ok=True)
-
-        build_config = self.build_config()
-        with open(yaml_output_dir / "build.yaml", "w") as f:
-            yaml.safe_dump(
-                filter_empty_values(build_config.model_dump(exclude_none=True)),
-                f,
-                sort_keys=False,
-            )
-
-        for yaml_pth, settings in self.run_configs.items():
-            run_config = settings.run_config(self.name, self.providers, self.container_image)
-            with open(yaml_output_dir / yaml_pth, "w") as f:
-                yaml.safe_dump(
-                    filter_empty_values(run_config),
-                    f,
-                    sort_keys=False,
-                )
-
-        if self.template_path:
-            docs = self.generate_markdown_docs()
-            with open(doc_output_dir / f"{self.name}.md", "w") as f:
-                f.write(docs if docs.endswith("\n") else docs + "\n")
--- a/llama_stack/distributions/watsonx/init.py
+++ b/llama_stack/distributions/watsonx/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .watsonx import get_distribution_template  # noqa: F401
--- a/llama_stack/distributions/watsonx/watsonx.py
+++ b/llama_stack/distributions/watsonx/watsonx.py
@ -1,95 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
-from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
-from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
-from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
-
-
-def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
-    providers = {
-        "inference": [
-            BuildProvider(provider_type="remote::watsonx"),
-            BuildProvider(provider_type="inline::sentence-transformers"),
-        ],
-        "vector_io": [BuildProvider(provider_type="inline::faiss")],
-        "safety": [BuildProvider(provider_type="inline::llama-guard")],
-        "agents": [BuildProvider(provider_type="inline::meta-reference")],
-        "eval": [BuildProvider(provider_type="inline::meta-reference")],
-        "datasetio": [
-            BuildProvider(provider_type="remote::huggingface"),
-            BuildProvider(provider_type="inline::localfs"),
-        ],
-        "scoring": [
-            BuildProvider(provider_type="inline::basic"),
-            BuildProvider(provider_type="inline::llm-as-judge"),
-            BuildProvider(provider_type="inline::braintrust"),
-        ],
-        "tool_runtime": [
-            BuildProvider(provider_type="remote::brave-search"),
-            BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
-            BuildProvider(provider_type="remote::model-context-protocol"),
-        ],
-        "files": [BuildProvider(provider_type="inline::localfs")],
-    }
-
-    inference_provider = Provider(
-        provider_id="watsonx",
-        provider_type="remote::watsonx",
-        config=WatsonXConfig.sample_run_config(),
-    )
-
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    files_provider = Provider(
-        provider_id="meta-reference-files",
-        provider_type="inline::localfs",
-        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-    return DistributionTemplate(
-        name=name,
-        distro_type="remote_hosted",
-        description="Use watsonx for running LLM inference",
-        container_image=None,
-        template_path=None,
-        providers=providers,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider],
-                    "files": [files_provider],
-                },
-                default_models=[],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMASTACK_PORT": (
-                "5001",
-                "Port for the Llama Stack distribution server",
-            ),
-            "WATSONX_API_KEY": (
-                "",
-                "watsonx API Key",
-            ),
-            "WATSONX_PROJECT_ID": (
-                "",
-                "watsonx Project ID",
-            ),
-        },
-    )