CLI Update: build -> configure -> run (#69)

* remove configure from build * remove config from build * configure to regenerate file * update memory providers * remove comments * udpate build script * add reedme * update doc * rename getting started * update build cli * update docker build script * configure update * clean up configure * [tmp fix] hardware requirement tmp fix * clean up build * fix configure * add example build files for conda & docker * remove resolve_distribution_spec * remove available_distribution_specs * example build files * update example build files * more clean up on build * add name args to override name * move distribution to yaml files * generate distribution specs * getting started guide * getting started * add build yaml to Dockerfile * cleanup distribution_dependencies * configure from docker image name * build relative paths * minor comment * getting started * Update getting_started.md * Update getting_started.md * address comments, configure within docker file * remove distribution types! * update getting started * update documentation * remove listing distribution * minor heading * address nits, remove docker_image=null * gitignore
2025-10-04 04:04:14 +00:00 · 2024-09-16 11:02:26 -07:00 · 2024-09-16 11:02:26 -07:00 · d9147f3184
commit d9147f3184
parent 73b71d9689
27 changed files with 759 additions and 512 deletions
--- a/llama_toolchain/cli/stack/build.py
+++ b/llama_toolchain/cli/stack/build.py
@ -8,33 +8,11 @@ import argparse

 from llama_toolchain.cli.subcommand import Subcommand
 from llama_toolchain.core.datatypes import *  # noqa: F403
+from pathlib import Path
+
 import yaml


-def parse_api_provider_tuples(
-    tuples: str, parser: argparse.ArgumentParser
-) -> Dict[str, ProviderSpec]:
-    from llama_toolchain.core.distribution import api_providers
-
-    all_providers = api_providers()
-
-    deps = {}
-    for dep in tuples.split(","):
-        dep = dep.strip()
-        if not dep:
-            continue
-        api_str, provider = dep.split("=")
-        api = Api(api_str)
-
-        provider = provider.strip()
-        if provider not in all_providers[api]:
-            parser.error(f"Provider `{provider}` is not available for API `{api}`")
-            return
-        deps[api] = all_providers[api][provider]
-
-    return deps
-
-
 class StackBuild(Subcommand):
    def __init__(self, subparsers: argparse._SubParsersAction):
        super().__init__()
@ -48,16 +26,16 @@ class StackBuild(Subcommand):
        self.parser.set_defaults(func=self._run_stack_build_command)

    def _add_arguments(self):
-        from llama_toolchain.core.distribution_registry import (
-            available_distribution_specs,
-        )
-        from llama_toolchain.core.package import ImageType
-
-        allowed_ids = [d.distribution_type for d in available_distribution_specs()]
        self.parser.add_argument(
-            "--config",
+            "config",
            type=str,
-            help="Path to a config file to use for the build",
+            help="Path to a config file to use for the build. You may find example configs in llama_toolchain/configs/distributions",
+        )
+
+        self.parser.add_argument(
+            "--name",
+            type=str,
+            help="Name of the llama stack build to override from template config",
        )

    def _run_stack_build_command_from_build_config(
@ -68,69 +46,19 @@ class StackBuild(Subcommand):

        from llama_toolchain.common.config_dirs import DISTRIBS_BASE_DIR
        from llama_toolchain.common.serialize import EnumEncoder
-        from llama_toolchain.core.distribution_registry import resolve_distribution_spec
        from llama_toolchain.core.package import ApiInput, build_package, ImageType
        from termcolor import cprint

-        api_inputs = []
-        if build_config.distribution == "adhoc":
-            if not build_config.api_providers:
-                self.parser.error(
-                    "You must specify API providers with (api=provider,...) for building an adhoc distribution"
-                )
-                return
-
-            parsed = parse_api_provider_tuples(build_config.api_providers, self.parser)
-            for api, provider_spec in parsed.items():
-                for dep in provider_spec.api_dependencies:
-                    if dep not in parsed:
-                        self.parser.error(
-                            f"API {api} needs dependency {dep} provided also"
-                        )
-                        return
-
-                api_inputs.append(
-                    ApiInput(
-                        api=api,
-                        provider=provider_spec.provider_type,
-                    )
-                )
-            docker_image = None
-        else:
-            if build_config.api_providers:
-                self.parser.error(
-                    "You cannot specify API providers for pre-registered distributions"
-                )
-                return
-
-            dist = resolve_distribution_spec(build_config.distribution)
-            if dist is None:
-                self.parser.error(
-                    f"Could not find distribution {build_config.distribution}"
-                )
-                return
-
-            for api, provider_type in dist.providers.items():
-                api_inputs.append(
-                    ApiInput(
-                        api=api,
-                        provider=provider_type,
-                    )
-                )
-            docker_image = dist.docker_image
-
-        build_package(
-            api_inputs,
-            image_type=ImageType(build_config.image_type),
-            name=build_config.name,
-            distribution_type=build_config.distribution,
-            docker_image=docker_image,
-        )
-
        # save build.yaml spec for building same distribution again
-        build_dir = (
-            DISTRIBS_BASE_DIR / build_config.distribution / build_config.image_type
-        )
+        if build_config.image_type == ImageType.docker.value:
+            # docker needs build file to be in the llama-stack repo dir to be able to copy over to the image
+            llama_toolchain_path = Path(os.path.relpath(__file__)).parent.parent.parent
+            build_dir = (
+                llama_toolchain_path / "configs/distributions" / build_config.image_type
+            )
+        else:
+            build_dir = DISTRIBS_BASE_DIR / build_config.image_type
+
        os.makedirs(build_dir, exist_ok=True)
        build_file_path = build_dir / f"{build_config.name}-build.yaml"

@ -138,6 +66,8 @@ class StackBuild(Subcommand):
            to_write = json.loads(json.dumps(build_config.dict(), cls=EnumEncoder))
            f.write(yaml.dump(to_write, sort_keys=False))

+        build_package(build_config, build_file_path)
+
        cprint(
            f"Build spec configuration saved at {str(build_file_path)}",
            color="green",
@ -147,15 +77,18 @@ class StackBuild(Subcommand):
        from llama_toolchain.common.prompt_for_config import prompt_for_config
        from llama_toolchain.core.dynamic import instantiate_class_type

-        if args.config:
-            with open(args.config, "r") as f:
-                try:
-                    build_config = BuildConfig(**yaml.safe_load(f))
-                except Exception as e:
-                    self.parser.error(f"Could not parse config file {args.config}: {e}")
-                    return
-                self._run_stack_build_command_from_build_config(build_config)
+        if not args.config:
+            self.parser.error(
+                "No config file specified. Please use `llama stack build /path/to/*-build.yaml`. Example config files can be found in llama_toolchain/configs/distributions"
+            )
            return

-        build_config = prompt_for_config(BuildConfig, None)
-        self._run_stack_build_command_from_build_config(build_config)
+        with open(args.config, "r") as f:
+            try:
+                build_config = BuildConfig(**yaml.safe_load(f))
+            except Exception as e:
+                self.parser.error(f"Could not parse config file {args.config}: {e}")
+                return
+            if args.name:
+                build_config.name = args.name
+            self._run_stack_build_command_from_build_config(build_config)
--- a/llama_toolchain/cli/stack/configure.py
+++ b/llama_toolchain/cli/stack/configure.py
@ -8,12 +8,18 @@ import argparse
 import json
 from pathlib import Path

-import yaml
+import pkg_resources

+import yaml
 from llama_toolchain.cli.subcommand import Subcommand
 from llama_toolchain.common.config_dirs import BUILDS_BASE_DIR
+
+from llama_toolchain.common.exec import run_with_pty
 from termcolor import cprint
 from llama_toolchain.core.datatypes import *  # noqa: F403
+import os
+
+from termcolor import cprint


 class StackConfigure(Subcommand):
@ -31,49 +37,107 @@ class StackConfigure(Subcommand):
        self.parser.set_defaults(func=self._run_stack_configure_cmd)

    def _add_arguments(self):
-        from llama_toolchain.core.distribution_registry import (
-            available_distribution_specs,
-        )
-        from llama_toolchain.core.package import ImageType
-
-        allowed_ids = [d.distribution_type for d in available_distribution_specs()]
        self.parser.add_argument(
            "config",
            type=str,
-            help="Path to the package config file (e.g. ~/.llama/builds/<distribution>/<image_type>/<name>.yaml)",
+            help="Path to the build config file (e.g. ~/.llama/builds/<image_type>/<name>-build.yaml). For docker, this could also be the name of the docker image. ",
+        )
+
+        self.parser.add_argument(
+            "--output-dir",
+            type=str,
+            help="Path to the output directory to store generated run.yaml config file. If not specified, will use ~/.llama/build/<image_type>/<name>-run.yaml",
        )

    def _run_stack_configure_cmd(self, args: argparse.Namespace) -> None:
        from llama_toolchain.core.package import ImageType

-        config_file = Path(args.config)
-        if not config_file.exists():
-            self.parser.error(
-                f"Could not find {config_file}. Please run `llama stack build` first"
+        docker_image = None
+        build_config_file = Path(args.config)
+        if not build_config_file.exists():
+            cprint(
+                f"Could not find {build_config_file}. Trying docker image name instead...",
+                color="green",
+            )
+            docker_image = args.config
+
+            builds_dir = BUILDS_BASE_DIR / ImageType.docker.value
+            if args.output_dir:
+                builds_dir = Path(output_dir)
+            os.makedirs(builds_dir, exist_ok=True)
+
+            script = pkg_resources.resource_filename(
+                "llama_toolchain", "core/configure_container.sh"
+            )
+            script_args = [script, docker_image, str(builds_dir)]
+
+            return_code = run_with_pty(script_args)
+
+            # we have regenerated the build config file with script, now check if it exists
+            if return_code != 0:
+                self.parser.error(
+                    f"Can not find {build_config_file}. Please run llama stack build first or check if docker image exists"
+                )
+
+            build_name = docker_image.removeprefix("llamastack-")
+            cprint(
+                f"YAML configuration has been written to {builds_dir / f'{build_name}-run.yaml'}",
+                color="green",
            )
            return

-        configure_llama_distribution(config_file)
+        with open(build_config_file, "r") as f:
+            build_config = BuildConfig(**yaml.safe_load(f))

+        self._configure_llama_distribution(build_config, args.output_dir)

-def configure_llama_distribution(config_file: Path) -> None:
-    from llama_toolchain.common.serialize import EnumEncoder
-    from llama_toolchain.core.configure import configure_api_providers
+    def _configure_llama_distribution(
+        self,
+        build_config: BuildConfig,
+        output_dir: Optional[str] = None,
+    ):
+        from llama_toolchain.common.serialize import EnumEncoder
+        from llama_toolchain.core.configure import configure_api_providers

-    with open(config_file, "r") as f:
-        config = PackageConfig(**yaml.safe_load(f))
+        builds_dir = BUILDS_BASE_DIR / build_config.image_type
+        if output_dir:
+            builds_dir = Path(output_dir)
+        os.makedirs(builds_dir, exist_ok=True)
+        package_name = build_config.name.replace("::", "-")
+        package_file = builds_dir / f"{package_name}-run.yaml"

-    if config.providers:
-        cprint(
-            f"Configuration already exists for {config.distribution_type}. Will overwrite...",
-            "yellow",
-            attrs=["bold"],
+        api2providers = build_config.distribution_spec.providers
+
+        stub_config = {
+            api_str: {"provider_type": provider}
+            for api_str, provider in api2providers.items()
+        }
+
+        if package_file.exists():
+            cprint(
+                f"Configuration already exists for {build_config.name}. Will overwrite...",
+                "yellow",
+                attrs=["bold"],
+            )
+            config = PackageConfig(**yaml.safe_load(package_file.read_text()))
+        else:
+            config = PackageConfig(
+                built_at=datetime.now(),
+                package_name=package_name,
+                providers=stub_config,
+            )
+
+        config.providers = configure_api_providers(config.providers)
+        config.docker_image = (
+            package_name if build_config.image_type == "docker" else None
        )
+        config.conda_env = package_name if build_config.image_type == "conda" else None

-    config.providers = configure_api_providers(config.providers)
+        with open(package_file, "w") as f:
+            to_write = json.loads(json.dumps(config.dict(), cls=EnumEncoder))
+            f.write(yaml.dump(to_write, sort_keys=False))

-    with open(config_file, "w") as fp:
-        to_write = json.loads(json.dumps(config.dict(), cls=EnumEncoder))
-        fp.write(yaml.dump(to_write, sort_keys=False))
-
-    print(f"YAML configuration has been written to {config_file}")
+        cprint(
+            f"> YAML configuration has been written to {package_file}",
+            color="blue",
+        )
--- a/llama_toolchain/cli/stack/list_distributions.py
+++ b/llama_toolchain/cli/stack/list_distributions.py
@ -1,55 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import argparse
-import json
-
-from llama_toolchain.cli.subcommand import Subcommand
-
-
-class StackListDistributions(Subcommand):
-    def __init__(self, subparsers: argparse._SubParsersAction):
-        super().__init__()
-        self.parser = subparsers.add_parser(
-            "list-distributions",
-            prog="llama stack list-distributions",
-            description="Show available Llama Stack Distributions",
-            formatter_class=argparse.RawTextHelpFormatter,
-        )
-        self._add_arguments()
-        self.parser.set_defaults(func=self._run_distribution_list_cmd)
-
-    def _add_arguments(self):
-        pass
-
-    def _run_distribution_list_cmd(self, args: argparse.Namespace) -> None:
-        from llama_toolchain.cli.table import print_table
-        from llama_toolchain.core.distribution_registry import (
-            available_distribution_specs,
-        )
-
-        # eventually, this should query a registry at llama.meta.com/llamastack/distributions
-        headers = [
-            "Distribution Type",
-            "Providers",
-            "Description",
-        ]
-
-        rows = []
-        for spec in available_distribution_specs():
-            providers = {k.value: v for k, v in spec.providers.items()}
-            rows.append(
-                [
-                    spec.distribution_type,
-                    json.dumps(providers, indent=2),
-                    spec.description,
-                ]
-            )
-        print_table(
-            rows,
-            headers,
-            separate_rows=True,
-        )
--- a/llama_toolchain/cli/stack/run.py
+++ b/llama_toolchain/cli/stack/run.py
@ -69,9 +69,6 @@ class StackRun(Subcommand):
        with open(config_file, "r") as f:
            config = PackageConfig(**yaml.safe_load(f))

-        if not config.distribution_type:
-            raise ValueError("Build config appears to be corrupt.")
-
        if config.docker_image:
            script = pkg_resources.resource_filename(
                "llama_toolchain",
--- a/llama_toolchain/cli/stack/stack.py
+++ b/llama_toolchain/cli/stack/stack.py
@ -11,7 +11,6 @@ from llama_toolchain.cli.subcommand import Subcommand
 from .build import StackBuild
 from .configure import StackConfigure
 from .list_apis import StackListApis
-from .list_distributions import StackListDistributions
 from .list_providers import StackListProviders
 from .run import StackRun

@ -31,6 +30,5 @@ class StackParser(Subcommand):
        StackBuild.create(subparsers)
        StackConfigure.create(subparsers)
        StackListApis.create(subparsers)
-        StackListDistributions.create(subparsers)
        StackListProviders.create(subparsers)
        StackRun.create(subparsers)
--- a/llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml
+++ b/llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml
@ -0,0 +1,10 @@
+name: local-conda-example
+distribution_spec:
+  description: Use code from `llama_toolchain` itself to serve all llama stack APIs
+  providers:
+    inference: meta-reference
+    memory: meta-reference-faiss
+    safety: meta-reference
+    agentic_system: meta-reference
+    telemetry: console
+image_type: conda
--- a/llama_toolchain/configs/distributions/conda/local-fireworks-conda-example-build.yaml
+++ b/llama_toolchain/configs/distributions/conda/local-fireworks-conda-example-build.yaml
@ -0,0 +1,10 @@
+name: local-fireworks-conda-example
+distribution_spec:
+  description: Use Fireworks.ai for running LLM inference
+  providers:
+    inference: remote::fireworks
+    memory: meta-reference-faiss
+    safety: meta-reference
+    agentic_system: meta-reference
+    telemetry: console
+image_type: conda
--- a/llama_toolchain/configs/distributions/conda/local-ollama-conda-example-build.yaml
+++ b/llama_toolchain/configs/distributions/conda/local-ollama-conda-example-build.yaml
@ -0,0 +1,10 @@
+name: local-ollama-conda-example
+distribution_spec:
+  description: Like local, but use ollama for running LLM inference
+  providers:
+    inference: remote::ollama
+    memory: meta-reference-faiss
+    safety: meta-reference
+    agentic_system: meta-reference
+    telemetry: console
+image_type: conda
--- a/llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml
+++ b/llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml
@ -0,0 +1,10 @@
+name: local-tgi-conda-example
+distribution_spec:
+  description: Use TGI (local or with Hugging Face Inference Endpoints for running LLM inference. When using HF Inference Endpoints, you must provide the name of the endpoint).
+  providers:
+    inference: remote::tgi
+    memory: meta-reference-faiss
+    safety: meta-reference
+    agentic_system: meta-reference
+    telemetry: console
+image_type: conda
--- a/llama_toolchain/configs/distributions/conda/local-together-conda-example-build.yaml
+++ b/llama_toolchain/configs/distributions/conda/local-together-conda-example-build.yaml
@ -0,0 +1,10 @@
+name: local-tgi-conda-example
+distribution_spec:
+  description: Use Together.ai for running LLM inference
+  providers:
+    inference: remote::together
+    memory: meta-reference-faiss
+    safety: meta-reference
+    agentic_system: meta-reference
+    telemetry: console
+image_type: conda
--- a/llama_toolchain/configs/distributions/distribution_registry/local-ollama.yaml
+++ b/llama_toolchain/configs/distributions/distribution_registry/local-ollama.yaml
@ -0,0 +1,7 @@
+description: Like local, but use ollama for running LLM inference
+providers:
+  inference: remote::ollama
+  safety: meta-reference
+  agentic_system: meta-reference
+  memory: meta-reference-faiss
+  telemetry: console
--- a/llama_toolchain/configs/distributions/distribution_registry/local-plus-fireworks-inference.yaml
+++ b/llama_toolchain/configs/distributions/distribution_registry/local-plus-fireworks-inference.yaml
@ -0,0 +1,7 @@
+description: Use Fireworks.ai for running LLM inference
+providers:
+  inference: remote::fireworks
+  safety: meta-reference
+  agentic_system: meta-reference
+  memory: meta-reference-faiss
+  telemetry: console
--- a/llama_toolchain/configs/distributions/distribution_registry/local-plus-tgi-inference.yaml
+++ b/llama_toolchain/configs/distributions/distribution_registry/local-plus-tgi-inference.yaml
@ -0,0 +1,6 @@
+description: Use TGI (local or with Hugging Face Inference Endpoints for running LLM inference. When using HF Inference Endpoints, you must provide the name of the endpoint).
+providers:
+  inference: remote::tgi
+  safety: meta-reference
+  agentic_system: meta-reference
+  memory: meta-reference-faiss
--- a/llama_toolchain/configs/distributions/distribution_registry/local-plus-together-inference.yaml
+++ b/llama_toolchain/configs/distributions/distribution_registry/local-plus-together-inference.yaml
@ -0,0 +1,7 @@
+description: Use Together.ai for running LLM inference
+providers:
+  inference: remote::together
+  safety: meta-reference
+  agentic_system: meta-reference
+  memory: meta-reference-faiss
+  telemetry: console
--- a/llama_toolchain/configs/distributions/distribution_registry/local.yaml
+++ b/llama_toolchain/configs/distributions/distribution_registry/local.yaml
@ -0,0 +1,7 @@
+description: Use code from `llama_toolchain` itself to serve all llama stack APIs
+providers:
+  inference: meta-reference
+  memory: meta-reference-faiss
+  safety: meta-reference
+  agentic_system: meta-reference
+  telemetry: console
--- a/llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml
+++ b/llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml
@ -0,0 +1,10 @@
+name: local-docker-example
+distribution_spec:
+  description: Use code from `llama_toolchain` itself to serve all llama stack APIs
+  providers:
+    inference: meta-reference
+    memory: meta-reference-faiss
+    safety: meta-reference
+    agentic_system: meta-reference
+    telemetry: console
+image_type: docker
--- a/llama_toolchain/core/build_conda_env.sh
+++ b/llama_toolchain/core/build_conda_env.sh
@ -19,17 +19,15 @@ fi

 set -euo pipefail

-if [ "$#" -ne 4 ]; then
+if [ "$#" -ne 2 ]; then
  echo "Usage: $0 <distribution_type> <build_name> <pip_dependencies>" >&2
  echo "Example: $0 <distribution_type> mybuild 'numpy pandas scipy'" >&2
  exit 1
 fi

-distribution_type="$1"
-build_name="$2"
+build_name="$1"
 env_name="llamastack-$build_name"
-config_file="$3"
-pip_dependencies="$4"
+pip_dependencies="$2"

 # Define color codes
 RED='\033[0;31m'
@ -115,7 +113,3 @@ ensure_conda_env_python310() {
 }

 ensure_conda_env_python310 "$env_name" "$pip_dependencies"
-
-printf "${GREEN}Successfully setup conda environment. Configuring build...${NC}\n"
-
-$CONDA_PREFIX/bin/python3 -m llama_toolchain.cli.llama stack configure $config_file
--- a/llama_toolchain/core/build_container.sh
+++ b/llama_toolchain/core/build_container.sh
@ -4,18 +4,17 @@ LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-}
 LLAMA_TOOLCHAIN_DIR=${LLAMA_TOOLCHAIN_DIR:-}
 TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}

-if [ "$#" -ne 5 ]; then
-  echo "Usage: $0 <distribution_type> <build_name> <docker_base> <pip_dependencies>
-  echo "Example: $0 distribution_type my-fastapi-app python:3.9-slim 'fastapi uvicorn'
+if [ "$#" -ne 4 ]; then
+  echo "Usage: $0 <build_name> <docker_base> <pip_dependencies>
+  echo "Example: $0 my-fastapi-app python:3.9-slim 'fastapi uvicorn'
  exit 1
 fi

-distribution_type=$1
-build_name="$2"
+build_name="$1"
 image_name="llamastack-$build_name"
-docker_base=$3
-config_file=$4
-pip_dependencies=$5
+docker_base=$2
+build_file_path=$3
+pip_dependencies=$4

 # Define color codes
 RED='\033[0;31m'
@ -26,6 +25,8 @@ set -euo pipefail

 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 REPO_DIR=$(dirname $(dirname "$SCRIPT_DIR"))
+DOCKER_BINARY=${DOCKER_BINARY:-docker}
+DOCKER_OPTS=${DOCKER_OPTS:-}

 TEMP_DIR=$(mktemp -d)

@ -93,6 +94,8 @@ add_to_docker <<EOF

 EOF

+add_to_docker "ADD $build_file_path ./llamastack-build.yaml"
+
 printf "Dockerfile created successfully in $TEMP_DIR/Dockerfile"
 cat $TEMP_DIR/Dockerfile
 printf "\n"
@ -105,10 +108,10 @@ if [ -n "$LLAMA_MODELS_DIR" ]; then
  mounts="$mounts -v $(readlink -f $LLAMA_MODELS_DIR):$models_mount"
 fi
 set -x
-podman build -t $image_name -f "$TEMP_DIR/Dockerfile" "$REPO_DIR" $mounts
+$DOCKER_BINARY build $DOCKER_OPTS -t $image_name -f "$TEMP_DIR/Dockerfile" "$REPO_DIR" $mounts
 set +x

-printf "${GREEN}Succesfully setup Podman image. Configuring build...${NC}"
 echo "You can run it with: podman run -p 8000:8000 $image_name"

-$CONDA_PREFIX/bin/python3 -m llama_toolchain.cli.llama stack configure $config_file
+echo "Checking image builds..."
+podman run -it $image_name cat llamastack-build.yaml
--- a/llama_toolchain/core/configure_container.sh
+++ b/llama_toolchain/core/configure_container.sh
@ -0,0 +1,31 @@
+#!/bin/bash
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+set -euo pipefail
+
+error_handler() {
+  echo "Error occurred in script at line: ${1}" >&2
+  exit 1
+}
+
+trap 'error_handler ${LINENO}' ERR
+
+if [ $# -lt 2 ]; then
+  echo "Usage: $0 <container name> <build file path>"
+  exit 1
+fi
+
+docker_image="$1"
+host_build_dir="$2"
+container_build_dir="/app/builds"
+
+set -x
+podman run -it \
+  -v $host_build_dir:$container_build_dir \
+  $docker_image \
+  llama stack configure ./llamastack-build.yaml --output-dir $container_build_dir
--- a/llama_toolchain/core/datatypes.py
+++ b/llama_toolchain/core/datatypes.py
@ -151,11 +151,12 @@ def remote_provider_spec(

@json_schema_type
 class DistributionSpec(BaseModel):
-    distribution_type: str
-    description: str
-
+    description: Optional[str] = Field(
+        default="",
+        description="Description of the distribution",
+    )
    docker_image: Optional[str] = None
-    providers: Dict[Api, str] = Field(
+    providers: Dict[str, str] = Field(
        default_factory=dict,
        description="Provider Types for each of the APIs provided by this distribution",
    )
@ -172,8 +173,6 @@ Reference to the distribution this package refers to. For unregistered (adhoc) p
 this could be just a hash
 """,
    )
-    distribution_type: Optional[str] = None
-
    docker_image: Optional[str] = Field(
        default=None,
        description="Reference to the docker image if this package refers to a container",
@ -194,12 +193,8 @@ the dependencies of these providers as well.
@json_schema_type
 class BuildConfig(BaseModel):
    name: str
-    distribution: str = Field(
-        default="local", description="Type of distribution to build (adhoc | {})"
-    )
-    api_providers: Optional[str] = Field(
-        default_factory=list,
-        description="List of API provider names to build",
+    distribution_spec: DistributionSpec = Field(
+        description="The distribution spec to build including API providers. "
    )
    image_type: str = Field(
        default="conda",
--- a/llama_toolchain/core/distribution.py
+++ b/llama_toolchain/core/distribution.py
@ -31,16 +31,6 @@ SERVER_DEPENDENCIES = [
 ]


-def distribution_dependencies(distribution: DistributionSpec) -> List[str]:
-    # only consider InlineProviderSpecs when calculating dependencies
-    return [
-        dep
-        for provider_spec in distribution.provider_specs.values()
-        if isinstance(provider_spec, InlineProviderSpec)
-        for dep in provider_spec.pip_packages
-    ] + SERVER_DEPENDENCIES
-
-
 def stack_apis() -> List[Api]:
    return [v for v in Api]

--- a/llama_toolchain/core/distribution_registry.py
+++ b/llama_toolchain/core/distribution_registry.py
@ -5,84 +5,19 @@
 # the root directory of this source tree.

 from functools import lru_cache
+from pathlib import Path
 from typing import List, Optional
-
 from .datatypes import *  # noqa: F403
+import yaml


@lru_cache()
 def available_distribution_specs() -> List[DistributionSpec]:
-    return [
-        DistributionSpec(
-            distribution_type="local",
-            description="Use code from `llama_toolchain` itself to serve all llama stack APIs",
-            providers={
-                Api.inference: "meta-reference",
-                Api.memory: "meta-reference-faiss",
-                Api.safety: "meta-reference",
-                Api.agentic_system: "meta-reference",
-                Api.telemetry: "console",
-            },
-        ),
-        DistributionSpec(
-            distribution_type="remote",
-            description="Point to remote services for all llama stack APIs",
-            providers={
-                **{x: "remote" for x in Api},
-                Api.telemetry: "console",
-            },
-        ),
-        DistributionSpec(
-            distribution_type="local-ollama",
-            description="Like local, but use ollama for running LLM inference",
-            providers={
-                Api.inference: remote_provider_type("ollama"),
-                Api.safety: "meta-reference",
-                Api.agentic_system: "meta-reference",
-                Api.memory: "meta-reference-faiss",
-                Api.telemetry: "console",
-            },
-        ),
-        DistributionSpec(
-            distribution_type="local-plus-fireworks-inference",
-            description="Use Fireworks.ai for running LLM inference",
-            providers={
-                Api.inference: remote_provider_type("fireworks"),
-                Api.safety: "meta-reference",
-                Api.agentic_system: "meta-reference",
-                Api.memory: "meta-reference-faiss",
-                Api.telemetry: "console",
-            },
-        ),
-        DistributionSpec(
-            distribution_type="local-plus-together-inference",
-            description="Use Together.ai for running LLM inference",
-            providers={
-                Api.inference: remote_provider_type("together"),
-                Api.safety: "meta-reference",
-                Api.agentic_system: "meta-reference",
-                Api.memory: "meta-reference-faiss",
-                Api.telemetry: "console",
-            },
-        ),
-        DistributionSpec(
-            distribution_type="local-plus-tgi-inference",
-            description="Use TGI for running LLM inference",
-            providers={
-                Api.inference: remote_provider_type("tgi"),
-                Api.safety: "meta-reference",
-                Api.agentic_system: "meta-reference",
-                Api.memory: "meta-reference-faiss",
-            },
-        ),
-    ]
+    distribution_specs = []
+    for p in Path("llama_toolchain/configs/distributions/distribution_registry").rglob(
+        "*.yaml"
+    ):
+        with open(p, "r") as f:
+            distribution_specs.append(DistributionSpec(**yaml.safe_load(f)))

-
-@lru_cache()
-def resolve_distribution_spec(
-    distribution_type: str,
-) -> Optional[DistributionSpec]:
-    for spec in available_distribution_specs():
-        if spec.distribution_type == distribution_type:
-            return spec
-    return None
+    return distribution_specs
--- a/llama_toolchain/core/package.py
+++ b/llama_toolchain/core/package.py
@ -21,6 +21,8 @@ from pydantic import BaseModel
 from termcolor import cprint

 from llama_toolchain.core.datatypes import *  # noqa: F403
+from pathlib import Path
+
 from llama_toolchain.core.distribution import api_providers, SERVER_DEPENDENCIES


@ -39,87 +41,35 @@ class ApiInput(BaseModel):
    provider: str


-def build_package(
-    api_inputs: List[ApiInput],
-    image_type: ImageType,
-    name: str,
-    distribution_type: Optional[str] = None,
-    docker_image: Optional[str] = None,
-):
-    if not distribution_type:
-        distribution_type = "adhoc"
-
-    build_dir = BUILDS_BASE_DIR / distribution_type / image_type.value
-    os.makedirs(build_dir, exist_ok=True)
-
-    package_name = name.replace("::", "-")
-    package_file = build_dir / f"{package_name}.yaml"
-
-    all_providers = api_providers()
-
+def build_package(build_config: BuildConfig, build_file_path: Path):
    package_deps = Dependencies(
-        docker_image=docker_image or "python:3.10-slim",
+        docker_image=build_config.distribution_spec.docker_image or "python:3.10-slim",
        pip_packages=SERVER_DEPENDENCIES,
    )

-    stub_config = {}
-    for api_input in api_inputs:
-        api = api_input.api
-        providers_for_api = all_providers[api]
-        if api_input.provider not in providers_for_api:
+    # extend package dependencies based on providers spec
+    all_providers = api_providers()
+    for api_str, provider in build_config.distribution_spec.providers.items():
+        providers_for_api = all_providers[Api(api_str)]
+        if provider not in providers_for_api:
            raise ValueError(
-                f"Provider `{api_input.provider}` is not available for API `{api}`"
+                f"Provider `{provider}` is not available for API `{api_str}`"
            )

-        provider = providers_for_api[api_input.provider]
-        package_deps.pip_packages.extend(provider.pip_packages)
-        if provider.docker_image:
+        provider_spec = providers_for_api[provider]
+        package_deps.pip_packages.extend(provider_spec.pip_packages)
+        if provider_spec.docker_image:
            raise ValueError("A stack's dependencies cannot have a docker image")

-        stub_config[api.value] = {"provider_type": api_input.provider}
-
-    if package_file.exists():
-        cprint(
-            f"Build `{package_name}` exists; will reconfigure",
-            color="yellow",
-        )
-        c = PackageConfig(**yaml.safe_load(package_file.read_text()))
-        for api_str, new_config in stub_config.items():
-            if api_str not in c.providers:
-                c.providers[api_str] = new_config
-            else:
-                existing_config = c.providers[api_str]
-                if existing_config["provider_type"] != new_config["provider_type"]:
-                    cprint(
-                        f"Provider `{api_str}` has changed from `{existing_config}` to `{new_config}`",
-                        color="yellow",
-                    )
-                    c.providers[api_str] = new_config
-    else:
-        c = PackageConfig(
-            built_at=datetime.now(),
-            package_name=package_name,
-            providers=stub_config,
-        )
-
-    c.distribution_type = distribution_type
-    c.docker_image = package_name if image_type == ImageType.docker else None
-    c.conda_env = package_name if image_type == ImageType.conda else None
-
-    with open(package_file, "w") as f:
-        to_write = json.loads(json.dumps(c.dict(), cls=EnumEncoder))
-        f.write(yaml.dump(to_write, sort_keys=False))
-
-    if image_type == ImageType.docker:
+    if build_config.image_type == ImageType.docker.value:
        script = pkg_resources.resource_filename(
            "llama_toolchain", "core/build_container.sh"
        )
        args = [
            script,
-            distribution_type,
-            package_name,
+            build_config.name,
            package_deps.docker_image,
-            str(package_file),
+            str(build_file_path),
            " ".join(package_deps.pip_packages),
        ]
    else:
@ -128,21 +78,14 @@ def build_package(
        )
        args = [
            script,
-            distribution_type,
-            package_name,
-            str(package_file),
+            build_config.name,
            " ".join(package_deps.pip_packages),
        ]

    return_code = run_with_pty(args)
    if return_code != 0:
        cprint(
-            f"Failed to build target {package_name} with return code {return_code}",
+            f"Failed to build target {build_config.name} with return code {return_code}",
            color="red",
        )
        return
-
-    cprint(
-        f"Target `{package_name}` built with configuration at {str(package_file)}",
-        color="green",
-    )
--- a/llama_toolchain/inference/meta_reference/generation.py
+++ b/llama_toolchain/inference/meta_reference/generation.py
@ -28,10 +28,10 @@ from llama_models.llama3.api.datatypes import Message, ToolPromptFormat
 from llama_models.llama3.api.tokenizer import Tokenizer
 from llama_models.llama3.reference_impl.model import Transformer
 from llama_models.sku_list import resolve_model
-from termcolor import cprint

 from llama_toolchain.common.model_utils import model_local_dir
 from llama_toolchain.inference.api import QuantizationType
+from termcolor import cprint

 from .config import MetaReferenceImplConfig

@ -80,6 +80,7 @@ class Llama:
            torch.distributed.init_process_group("nccl")

        model_parallel_size = config.model_parallel_size
+
        if not model_parallel_is_initialized():
            initialize_model_parallel(model_parallel_size)

--- a/llama_toolchain/memory/common/init.py
+++ b/llama_toolchain/memory/common/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.