API Updates (#73)

* API Keys passed from Client instead of distro configuration * delete distribution registry * Rename the "package" word away * Introduce a "Router" layer for providers Some providers need to be factorized and considered as thin routing layers on top of other providers. Consider two examples: - The inference API should be a routing layer over inference providers, routed using the "model" key - The memory banks API is another instance where various memory bank types will be provided by independent providers (e.g., a vector store is served by Chroma while a keyvalue memory can be served by Redis or PGVector) This commit introduces a generalized routing layer for this purpose. * update `apis_to_serve` * llama_toolchain -> llama_stack * Codemod from llama_toolchain -> llama_stack - added providers/registry - cleaned up api/ subdirectories and moved impls away - restructured api/api.py - from llama_stack.apis.<api> import foo should work now - update imports to do llama_stack.apis.<api> - update many other imports - added __init__, fixed some registry imports - updated registry imports - create_agentic_system -> create_agent - AgenticSystem -> Agent * Moved some stuff out of common/; re-generated OpenAPI spec * llama-toolchain -> llama-stack (hyphens) * add control plane API * add redis adapter + sqlite provider * move core -> distribution * Some more toolchain -> stack changes * small naming shenanigans * Removing custom tool and agent utilities and moving them client side * Move control plane to distribution server for now * Remove control plane from API list * no codeshield dependency randomly plzzzzz * Add "fire" as a dependency * add back event loggers * stack configure fixes * use brave instead of bing in the example client * add init file so it gets packaged * add init files so it gets packaged * Update MANIFEST * bug fix --------- Co-authored-by: Hardik Shah <hjshah@fb.com> Co-authored-by: Xi Yan <xiyan@meta.com> Co-authored-by: Ashwin Bharambe <ashwin@meta.com>
2024-09-17 19:51:35 -07:00 · 2024-09-17 19:51:35 -07:00 · 9487ad8294
commit 9487ad8294
parent f294eac5f5
213 changed files with 1725 additions and 1204 deletions
--- a/llama_stack/cli/init.py
+++ b/llama_stack/cli/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/llama_stack/cli/download.py
+++ b/llama_stack/cli/download.py
@ -0,0 +1,339 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+import asyncio
+import json
+import os
+import shutil
+import time
+from datetime import datetime
+from functools import partial
+from pathlib import Path
+from typing import Dict, List
+
+import httpx
+from pydantic import BaseModel
+
+from termcolor import cprint
+
+from llama_stack.cli.subcommand import Subcommand
+
+
+class Download(Subcommand):
+    """Llama cli for downloading llama toolchain assets"""
+
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "download",
+            prog="llama download",
+            description="Download a model from llama.meta.com or Hugging Face Hub",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        setup_download_parser(self.parser)
+
+
+def setup_download_parser(parser: argparse.ArgumentParser) -> None:
+    from llama_models.sku_list import all_registered_models
+
+    models = all_registered_models()
+    parser.add_argument(
+        "--source",
+        choices=["meta", "huggingface"],
+        required=True,
+    )
+    parser.add_argument(
+        "--model-id",
+        required=False,
+        help="See `llama model list` or `llama model list --show-all` for the list of available models",
+    )
+    parser.add_argument(
+        "--hf-token",
+        type=str,
+        required=False,
+        default=None,
+        help="Hugging Face API token. Needed for gated models like llama2/3. Will also try to read environment variable `HF_TOKEN` as default.",
+    )
+    parser.add_argument(
+        "--meta-url",
+        type=str,
+        required=False,
+        help="For source=meta, URL obtained from llama.meta.com after accepting license terms",
+    )
+    parser.add_argument(
+        "--ignore-patterns",
+        type=str,
+        required=False,
+        default="*.safetensors",
+        help="""
+For source=huggingface, files matching any of the patterns are not downloaded. Defaults to ignoring
+safetensors files to avoid downloading duplicate weights.
+""",
+    )
+    parser.add_argument(
+        "--manifest-file",
+        type=str,
+        help="For source=meta, you can download models from a manifest file containing a file => URL mapping",
+        required=False,
+    )
+    parser.set_defaults(func=partial(run_download_cmd, parser=parser))
+
+
+def _hf_download(
+    model: "Model",
+    hf_token: str,
+    ignore_patterns: str,
+    parser: argparse.ArgumentParser,
+):
+    from huggingface_hub import snapshot_download
+    from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
+
+    from llama_stack.distribution.utils.model_utils import model_local_dir
+
+    repo_id = model.huggingface_repo
+    if repo_id is None:
+        raise ValueError(f"No repo id found for model {model.descriptor()}")
+
+    output_dir = model_local_dir(model.descriptor())
+    os.makedirs(output_dir, exist_ok=True)
+    try:
+        true_output_dir = snapshot_download(
+            repo_id,
+            local_dir=output_dir,
+            ignore_patterns=ignore_patterns,
+            token=hf_token,
+            library_name="llama-stack",
+        )
+    except GatedRepoError:
+        parser.error(
+            "It looks like you are trying to access a gated repository. Please ensure you "
+            "have access to the repository and have provided the proper Hugging Face API token "
+            "using the option `--hf-token` or by running `huggingface-cli login`."
+            "You can find your token by visiting https://huggingface.co/settings/tokens"
+        )
+    except RepositoryNotFoundError:
+        parser.error(f"Repository '{args.repo_id}' not found on the Hugging Face Hub.")
+    except Exception as e:
+        parser.error(e)
+
+    print(f"\nSuccessfully downloaded model to {true_output_dir}")
+
+
+def _meta_download(model: "Model", meta_url: str):
+    from llama_models.sku_list import llama_meta_net_info
+
+    from llama_stack.distribution.utils.model_utils import model_local_dir
+
+    output_dir = Path(model_local_dir(model.descriptor()))
+    os.makedirs(output_dir, exist_ok=True)
+
+    info = llama_meta_net_info(model)
+
+    # I believe we can use some concurrency here if needed but not sure it is worth it
+    for f in info.files:
+        output_file = str(output_dir / f)
+        url = meta_url.replace("*", f"{info.folder}/{f}")
+        total_size = info.pth_size if "consolidated" in f else 0
+        cprint(f"Downloading `{f}`...", "white")
+        downloader = ResumableDownloader(url, output_file, total_size)
+        asyncio.run(downloader.download())
+
+    print(f"\nSuccessfully downloaded model to {output_dir}")
+    cprint(f"\nMD5 Checksums are at: {output_dir / 'checklist.chk'}", "white")
+
+
+def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
+    from llama_models.sku_list import resolve_model
+
+    if args.manifest_file:
+        _download_from_manifest(args.manifest_file)
+        return
+
+    if args.model_id is None:
+        parser.error("Please provide a model id")
+        return
+
+    model = resolve_model(args.model_id)
+    if model is None:
+        parser.error(f"Model {args.model_id} not found")
+        return
+
+    if args.source == "huggingface":
+        _hf_download(model, args.hf_token, args.ignore_patterns, parser)
+    else:
+        meta_url = args.meta_url
+        if not meta_url:
+            meta_url = input(
+                "Please provide the signed URL you received via email (e.g., https://llama3-1.llamameta.net/*?Policy...): "
+            )
+            assert meta_url is not None and "llamameta.net" in meta_url
+        _meta_download(model, meta_url)
+
+
+class ModelEntry(BaseModel):
+    model_id: str
+    files: Dict[str, str]
+
+    class Config:
+        protected_namespaces = ()
+
+
+class Manifest(BaseModel):
+    models: List[ModelEntry]
+    expires_on: datetime
+
+
+def _download_from_manifest(manifest_file: str):
+    from llama_stack.distribution.utils.model_utils import model_local_dir
+
+    with open(manifest_file, "r") as f:
+        d = json.load(f)
+        manifest = Manifest(**d)
+
+    if datetime.now() > manifest.expires_on:
+        raise ValueError(f"Manifest URLs have expired on {manifest.expires_on}")
+
+    for entry in manifest.models:
+        print(f"Downloading model {entry.model_id}...")
+        output_dir = Path(model_local_dir(entry.model_id))
+        os.makedirs(output_dir, exist_ok=True)
+
+        if any(output_dir.iterdir()):
+            cprint(f"Output directory {output_dir} is not empty.", "red")
+
+            while True:
+                resp = input(
+                    "Do you want to (C)ontinue download or (R)estart completely? (continue/restart): "
+                )
+                if resp.lower() == "restart" or resp.lower() == "r":
+                    shutil.rmtree(output_dir)
+                    os.makedirs(output_dir, exist_ok=True)
+                    break
+                elif resp.lower() == "continue" or resp.lower() == "c":
+                    print("Continuing download...")
+                    break
+                else:
+                    cprint("Invalid response. Please try again.", "red")
+
+        for fname, url in entry.files.items():
+            output_file = str(output_dir / fname)
+            downloader = ResumableDownloader(url, output_file)
+            asyncio.run(downloader.download())
+
+
+class ResumableDownloader:
+    def __init__(
+        self,
+        url: str,
+        output_file: str,
+        total_size: int = 0,
+        buffer_size: int = 32 * 1024,
+    ):
+        self.url = url
+        self.output_file = output_file
+        self.buffer_size = buffer_size
+        self.total_size = total_size
+        self.downloaded_size = 0
+        self.start_size = 0
+        self.start_time = 0
+
+    async def get_file_info(self, client: httpx.AsyncClient) -> None:
+        if self.total_size > 0:
+            return
+
+        # Force disable compression when trying to retrieve file size
+        response = await client.head(
+            self.url, follow_redirects=True, headers={"Accept-Encoding": "identity"}
+        )
+        response.raise_for_status()
+        self.url = str(response.url)  # Update URL in case of redirects
+        self.total_size = int(response.headers.get("Content-Length", 0))
+        if self.total_size == 0:
+            raise ValueError(
+                "Unable to determine file size. The server might not support range requests."
+            )
+
+    async def download(self) -> None:
+        self.start_time = time.time()
+        async with httpx.AsyncClient(follow_redirects=True) as client:
+            await self.get_file_info(client)
+
+            if os.path.exists(self.output_file):
+                self.downloaded_size = os.path.getsize(self.output_file)
+                self.start_size = self.downloaded_size
+                if self.downloaded_size >= self.total_size:
+                    print(f"Already downloaded `{self.output_file}`, skipping...")
+                    return
+
+            additional_size = self.total_size - self.downloaded_size
+            if not self.has_disk_space(additional_size):
+                M = 1024 * 1024  # noqa
+                print(
+                    f"Not enough disk space to download `{self.output_file}`. "
+                    f"Required: {(additional_size // M):.2f} MB"
+                )
+                raise ValueError(
+                    f"Not enough disk space to download `{self.output_file}`"
+                )
+
+            while True:
+                if self.downloaded_size >= self.total_size:
+                    break
+
+                # Cloudfront has a max-size limit
+                max_chunk_size = 27_000_000_000
+                request_size = min(
+                    self.total_size - self.downloaded_size, max_chunk_size
+                )
+                headers = {
+                    "Range": f"bytes={self.downloaded_size}-{self.downloaded_size + request_size}"
+                }
+                print(f"Downloading `{self.output_file}`....{headers}")
+                try:
+                    async with client.stream(
+                        "GET", self.url, headers=headers
+                    ) as response:
+                        response.raise_for_status()
+                        with open(self.output_file, "ab") as file:
+                            async for chunk in response.aiter_bytes(self.buffer_size):
+                                file.write(chunk)
+                                self.downloaded_size += len(chunk)
+                                self.print_progress()
+                except httpx.HTTPError as e:
+                    print(f"\nDownload interrupted: {e}")
+                    print("You can resume the download by running the script again.")
+                except Exception as e:
+                    print(f"\nAn error occurred: {e}")
+
+            print(f"\nFinished downloading `{self.output_file}`....")
+
+    def print_progress(self) -> None:
+        percent = (self.downloaded_size / self.total_size) * 100
+        bar_length = 50
+        filled_length = int(bar_length * self.downloaded_size // self.total_size)
+        bar = "█" * filled_length + "-" * (bar_length - filled_length)
+
+        elapsed_time = time.time() - self.start_time
+        M = 1024 * 1024  # noqa
+
+        speed = (
+            (self.downloaded_size - self.start_size) / (elapsed_time * M)
+            if elapsed_time > 0
+            else 0
+        )
+        print(
+            f"\rProgress: |{bar}| {percent:.2f}% "
+            f"({self.downloaded_size // M}/{self.total_size // M} MB) "
+            f"Speed: {speed:.2f} MiB/s",
+            end="",
+            flush=True,
+        )
+
+    def has_disk_space(self, file_size: int) -> bool:
+        dir_path = os.path.dirname(os.path.abspath(self.output_file))
+        free_space = shutil.disk_usage(dir_path).free
+        return free_space > file_size
--- a/llama_stack/cli/llama.py
+++ b/llama_stack/cli/llama.py
@ -0,0 +1,48 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from .download import Download
+from .model import ModelParser
+from .stack import StackParser
+
+
+class LlamaCLIParser:
+    """Defines CLI parser for Llama CLI"""
+
+    def __init__(self):
+        self.parser = argparse.ArgumentParser(
+            prog="llama",
+            description="Welcome to the Llama CLI",
+            add_help=True,
+        )
+
+        # Default command is to print help
+        self.parser.set_defaults(func=lambda args: self.parser.print_help())
+
+        subparsers = self.parser.add_subparsers(title="subcommands")
+
+        # Add sub-commands
+        Download.create(subparsers)
+        ModelParser.create(subparsers)
+        StackParser.create(subparsers)
+
+    def parse_args(self) -> argparse.Namespace:
+        return self.parser.parse_args()
+
+    def run(self, args: argparse.Namespace) -> None:
+        args.func(args)
+
+
+def main():
+    parser = LlamaCLIParser()
+    args = parser.parse_args()
+    parser.run(args)
+
+
+if __name__ == "__main__":
+    main()
--- a/llama_stack/cli/model/init.py
+++ b/llama_stack/cli/model/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .model import ModelParser  # noqa
--- a/llama_stack/cli/model/describe.py
+++ b/llama_stack/cli/model/describe.py
@ -0,0 +1,75 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+import json
+
+from llama_models.sku_list import resolve_model
+
+from termcolor import colored
+
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.cli.table import print_table
+from llama_stack.distribution.utils.serialize import EnumEncoder
+
+
+class ModelDescribe(Subcommand):
+    """Show details about a model"""
+
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "describe",
+            prog="llama model describe",
+            description="Show details about a llama model",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_model_describe_cmd)
+
+    def _add_arguments(self):
+        self.parser.add_argument(
+            "-m",
+            "--model-id",
+            type=str,
+            required=True,
+        )
+
+    def _run_model_describe_cmd(self, args: argparse.Namespace) -> None:
+        model = resolve_model(args.model_id)
+        if model is None:
+            self.parser.error(
+                f"Model {args.model_id} not found; try 'llama model list' for a list of available models."
+            )
+            return
+
+        rows = [
+            (
+                colored("Model", "white", attrs=["bold"]),
+                colored(model.descriptor(), "white", attrs=["bold"]),
+            ),
+            ("HuggingFace ID", model.huggingface_repo or "<Not Available>"),
+            ("Description", model.description_markdown),
+            ("Context Length", f"{model.max_seq_length // 1024}K tokens"),
+            ("Weights format", model.quantization_format.value),
+            ("Model params.json", json.dumps(model.model_args, indent=4)),
+        ]
+
+        if model.recommended_sampling_params is not None:
+            sampling_params = model.recommended_sampling_params.dict()
+            for k in ("max_tokens", "repetition_penalty"):
+                del sampling_params[k]
+            rows.append(
+                (
+                    "Recommended sampling params",
+                    json.dumps(sampling_params, cls=EnumEncoder, indent=4),
+                )
+            )
+
+        print_table(
+            rows,
+            separate_rows=True,
+        )
--- a/llama_stack/cli/model/download.py
+++ b/llama_stack/cli/model/download.py
@ -0,0 +1,24 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from llama_stack.cli.subcommand import Subcommand
+
+
+class ModelDownload(Subcommand):
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "download",
+            prog="llama model download",
+            description="Download a model from llama.meta.com or Hugging Face Hub",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+
+        from llama_stack.cli.download import setup_download_parser
+
+        setup_download_parser(self.parser)
--- a/llama_stack/cli/model/list.py
+++ b/llama_stack/cli/model/list.py
@ -0,0 +1,60 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from llama_models.sku_list import all_registered_models
+
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.cli.table import print_table
+
+
+class ModelList(Subcommand):
+    """List available llama models"""
+
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "list",
+            prog="llama model list",
+            description="Show available llama models",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_model_list_cmd)
+
+    def _add_arguments(self):
+        self.parser.add_argument(
+            "--show-all",
+            action="store_true",
+            help="Show all models (not just defaults)",
+        )
+
+    def _run_model_list_cmd(self, args: argparse.Namespace) -> None:
+        headers = [
+            "Model Descriptor",
+            "HuggingFace Repo",
+            "Context Length",
+        ]
+
+        rows = []
+        for model in all_registered_models():
+            if not args.show_all and not model.is_featured:
+                continue
+
+            descriptor = model.descriptor()
+            rows.append(
+                [
+                    descriptor,
+                    model.huggingface_repo,
+                    f"{model.max_seq_length // 1024}K",
+                ]
+            )
+        print_table(
+            rows,
+            headers,
+            separate_rows=True,
+        )
--- a/llama_stack/cli/model/model.py
+++ b/llama_stack/cli/model/model.py
@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from llama_stack.cli.model.describe import ModelDescribe
+from llama_stack.cli.model.download import ModelDownload
+from llama_stack.cli.model.list import ModelList
+from llama_stack.cli.model.template import ModelTemplate
+
+from llama_stack.cli.subcommand import Subcommand
+
+
+class ModelParser(Subcommand):
+    """Llama cli for model interface apis"""
+
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "model",
+            prog="llama model",
+            description="Work with llama models",
+        )
+
+        subparsers = self.parser.add_subparsers(title="model_subcommands")
+
+        # Add sub-commands
+        ModelDownload.create(subparsers)
+        ModelList.create(subparsers)
+        ModelTemplate.create(subparsers)
+        ModelDescribe.create(subparsers)
--- a/llama_stack/cli/model/template.py
+++ b/llama_stack/cli/model/template.py
@ -0,0 +1,113 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+import textwrap
+
+from termcolor import colored
+
+from llama_stack.cli.subcommand import Subcommand
+
+
+class ModelTemplate(Subcommand):
+    """Llama model cli for describe a model template (message formats)"""
+
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "template",
+            prog="llama model template",
+            description="Show llama model message formats",
+            epilog=textwrap.dedent(
+                """
+                Example:
+                    llama model template <options>
+                """
+            ),
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_model_template_cmd)
+
+    def _prompt_type(self, value):
+        from llama_models.llama3.api.datatypes import ToolPromptFormat
+
+        try:
+            return ToolPromptFormat(value.lower())
+        except ValueError:
+            raise argparse.ArgumentTypeError(
+                f"{value} is not a valid ToolPromptFormat. Choose from {', '.join(t.value for t in ToolPromptFormat)}"
+            ) from None
+
+    def _add_arguments(self):
+        self.parser.add_argument(
+            "-m",
+            "--model-family",
+            type=str,
+            default="llama3_1",
+            help="Model Family (llama3_1, llama3_X, etc.)",
+        )
+        self.parser.add_argument(
+            "--name",
+            type=str,
+            help="Usecase template name (system_message, user_message, assistant_message, tool_message)...",
+            required=False,
+        )
+        self.parser.add_argument(
+            "--format",
+            type=str,
+            help="ToolPromptFormat (json or function_tag). This flag is used to print the template in a specific formats.",
+            required=False,
+            default="json",
+        )
+        self.parser.add_argument(
+            "--raw",
+            action="store_true",
+            help="If set to true, don't pretty-print into a table. Useful to copy-paste.",
+        )
+
+    def _run_model_template_cmd(self, args: argparse.Namespace) -> None:
+        from llama_models.llama3.api.interface import (
+            list_jinja_templates,
+            render_jinja_template,
+        )
+
+        from llama_stack.cli.table import print_table
+
+        if args.name:
+            tool_prompt_format = self._prompt_type(args.format)
+            template, tokens_info = render_jinja_template(args.name, tool_prompt_format)
+            rendered = ""
+            for tok, is_special in tokens_info:
+                if is_special:
+                    rendered += colored(tok, "yellow", attrs=["bold"])
+                else:
+                    rendered += tok
+
+            if not args.raw:
+                rendered = rendered.replace("\n", "↵\n")
+                print_table(
+                    [
+                        (
+                            "Name",
+                            colored(template.template_name, "white", attrs=["bold"]),
+                        ),
+                        ("Template", rendered),
+                        ("Notes", template.notes),
+                    ],
+                    separate_rows=True,
+                )
+            else:
+                print("Template: ", template.template_name)
+                print("=" * 40)
+                print(rendered)
+        else:
+            templates = list_jinja_templates()
+            headers = ["Role", "Template Name"]
+            print_table(
+                [(t.role, t.template_name) for t in templates],
+                headers,
+            )
--- a/llama_stack/cli/scripts/init.py
+++ b/llama_stack/cli/scripts/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/llama_stack/cli/scripts/install-wheel-from-presigned.sh
+++ b/llama_stack/cli/scripts/install-wheel-from-presigned.sh
@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+set -euo pipefail
+
+if [ $# -eq 0 ]; then
+  echo "Please provide a URL as an argument."
+  exit 1
+fi
+
+URL=$1
+
+HEADERS_FILE=$(mktemp)
+curl -s -I "$URL" >"$HEADERS_FILE"
+FILENAME=$(grep -i "x-manifold-obj-canonicalpath:" "$HEADERS_FILE" | sed -E 's/.*nodes\/[^\/]+\/(.+)/\1/' | tr -d "\r\n")
+
+if [ -z "$FILENAME" ]; then
+  echo "Could not find the x-manifold-obj-canonicalpath header."
+  echo "HEADERS_FILE contents: "
+  cat "$HEADERS_FILE"
+  echo ""
+  exit 1
+fi
+
+echo "Downloading $FILENAME..."
+
+curl -s -L -o "$FILENAME" "$URL"
+
+echo "Installing $FILENAME..."
+pip install "$FILENAME"
+echo "Successfully installed $FILENAME"
+
+rm -f "$FILENAME"
--- a/llama_stack/cli/scripts/run.py
+++ b/llama_stack/cli/scripts/run.py
@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+import subprocess
+import sys
+
+
+def install_wheel_from_presigned():
+    file = "install-wheel-from-presigned.sh"
+    script_path = os.path.join(os.path.dirname(__file__), file)
+    try:
+        subprocess.run(["sh", script_path] + sys.argv[1:], check=True)
+    except Exception:
+        sys.exit(1)
--- a/llama_stack/cli/stack/init.py
+++ b/llama_stack/cli/stack/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .stack import StackParser  # noqa
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@ -0,0 +1,94 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.distribution.datatypes import *  # noqa: F403
+from pathlib import Path
+
+import yaml
+
+
+class StackBuild(Subcommand):
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "build",
+            prog="llama stack build",
+            description="Build a Llama stack container",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_stack_build_command)
+
+    def _add_arguments(self):
+        self.parser.add_argument(
+            "config",
+            type=str,
+            help="Path to a config file to use for the build. You may find example configs in llama_stack/distribution/example_configs",
+        )
+
+        self.parser.add_argument(
+            "--name",
+            type=str,
+            help="Name of the llama stack build to override from template config",
+        )
+
+    def _run_stack_build_command_from_build_config(
+        self, build_config: BuildConfig
+    ) -> None:
+        import json
+        import os
+
+        from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
+        from llama_stack.distribution.utils.serialize import EnumEncoder
+        from llama_stack.distribution.build import ApiInput, build_image, ImageType
+        from termcolor import cprint
+
+        # save build.yaml spec for building same distribution again
+        if build_config.image_type == ImageType.docker.value:
+            # docker needs build file to be in the llama-stack repo dir to be able to copy over to the image
+            llama_stack_path = Path(os.path.relpath(__file__)).parent.parent.parent
+            build_dir = (
+                llama_stack_path / "configs/distributions" / build_config.image_type
+            )
+        else:
+            build_dir = DISTRIBS_BASE_DIR / build_config.image_type
+
+        os.makedirs(build_dir, exist_ok=True)
+        build_file_path = build_dir / f"{build_config.name}-build.yaml"
+
+        with open(build_file_path, "w") as f:
+            to_write = json.loads(json.dumps(build_config.dict(), cls=EnumEncoder))
+            f.write(yaml.dump(to_write, sort_keys=False))
+
+        build_image(build_config, build_file_path)
+
+        cprint(
+            f"Build spec configuration saved at {str(build_file_path)}",
+            color="green",
+        )
+
+    def _run_stack_build_command(self, args: argparse.Namespace) -> None:
+        from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
+        from llama_stack.distribution.utils.dynamic import instantiate_class_type
+
+        if not args.config:
+            self.parser.error(
+                "No config file specified. Please use `llama stack build /path/to/*-build.yaml`. Example config files can be found in llama_stack/distribution/example_configs"
+            )
+            return
+
+        with open(args.config, "r") as f:
+            try:
+                build_config = BuildConfig(**yaml.safe_load(f))
+            except Exception as e:
+                self.parser.error(f"Could not parse config file {args.config}: {e}")
+                return
+            if args.name:
+                build_config.name = args.name
+            self._run_stack_build_command_from_build_config(build_config)
--- a/llama_stack/cli/stack/configure.py
+++ b/llama_stack/cli/stack/configure.py
@ -0,0 +1,137 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+import json
+from pathlib import Path
+
+import pkg_resources
+
+import yaml
+from termcolor import cprint
+
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR
+
+from llama_stack.distribution.utils.exec import run_with_pty
+from llama_stack.distribution.datatypes import *  # noqa: F403
+import os
+
+
+class StackConfigure(Subcommand):
+    """Llama cli for configuring llama toolchain configs"""
+
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "configure",
+            prog="llama stack configure",
+            description="configure a llama stack distribution",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_stack_configure_cmd)
+
+    def _add_arguments(self):
+        self.parser.add_argument(
+            "config",
+            type=str,
+            help="Path to the build config file (e.g. ~/.llama/builds/<image_type>/<name>-build.yaml). For docker, this could also be the name of the docker image. ",
+        )
+
+        self.parser.add_argument(
+            "--output-dir",
+            type=str,
+            help="Path to the output directory to store generated run.yaml config file. If not specified, will use ~/.llama/build/<image_type>/<name>-run.yaml",
+        )
+
+    def _run_stack_configure_cmd(self, args: argparse.Namespace) -> None:
+        from llama_stack.distribution.build import ImageType
+
+        docker_image = None
+        build_config_file = Path(args.config)
+        if not build_config_file.exists():
+            cprint(
+                f"Could not find {build_config_file}. Trying docker image name instead...",
+                color="green",
+            )
+            docker_image = args.config
+
+            builds_dir = BUILDS_BASE_DIR / ImageType.docker.value
+            if args.output_dir:
+                builds_dir = Path(output_dir)
+            os.makedirs(builds_dir, exist_ok=True)
+
+            script = pkg_resources.resource_filename(
+                "llama_stack", "distribution/configure_container.sh"
+            )
+            script_args = [script, docker_image, str(builds_dir)]
+
+            return_code = run_with_pty(script_args)
+
+            # we have regenerated the build config file with script, now check if it exists
+            if return_code != 0:
+                self.parser.error(
+                    f"Can not find {build_config_file}. Please run llama stack build first or check if docker image exists"
+                )
+
+            build_name = docker_image.removeprefix("llamastack-")
+            cprint(
+                f"YAML configuration has been written to {builds_dir / f'{build_name}-run.yaml'}",
+                color="green",
+            )
+            return
+
+        with open(build_config_file, "r") as f:
+            build_config = BuildConfig(**yaml.safe_load(f))
+
+        self._configure_llama_distribution(build_config, args.output_dir)
+
+    def _configure_llama_distribution(
+        self,
+        build_config: BuildConfig,
+        output_dir: Optional[str] = None,
+    ):
+        from llama_stack.distribution.configure import configure_api_providers
+        from llama_stack.distribution.utils.serialize import EnumEncoder
+
+        builds_dir = BUILDS_BASE_DIR / build_config.image_type
+        if output_dir:
+            builds_dir = Path(output_dir)
+        os.makedirs(builds_dir, exist_ok=True)
+        image_name = build_config.name.replace("::", "-")
+        run_config_file = builds_dir / f"{image_name}-run.yaml"
+
+        if run_config_file.exists():
+            cprint(
+                f"Configuration already exists at `{str(run_config_file)}`. Will overwrite...",
+                "yellow",
+                attrs=["bold"],
+            )
+            config = StackRunConfig(**yaml.safe_load(run_config_file.read_text()))
+        else:
+            config = StackRunConfig(
+                built_at=datetime.now(),
+                image_name=image_name,
+                apis_to_serve=[],
+                provider_map={},
+            )
+
+        config = configure_api_providers(config, build_config.distribution_spec)
+
+        config.docker_image = (
+            image_name if build_config.image_type == "docker" else None
+        )
+        config.conda_env = image_name if build_config.image_type == "conda" else None
+
+        with open(run_config_file, "w") as f:
+            to_write = json.loads(json.dumps(config.dict(), cls=EnumEncoder))
+            f.write(yaml.dump(to_write, sort_keys=False))
+
+        cprint(
+            f"> YAML configuration has been written to {run_config_file}",
+            color="blue",
+        )
--- a/llama_stack/cli/stack/list_apis.py
+++ b/llama_stack/cli/stack/list_apis.py
@ -0,0 +1,47 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from llama_stack.cli.subcommand import Subcommand
+
+
+class StackListApis(Subcommand):
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "list-apis",
+            prog="llama stack list-apis",
+            description="List APIs part of the Llama Stack implementation",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_apis_list_cmd)
+
+    def _add_arguments(self):
+        pass
+
+    def _run_apis_list_cmd(self, args: argparse.Namespace) -> None:
+        from llama_stack.cli.table import print_table
+        from llama_stack.distribution.distribution import stack_apis
+
+        # eventually, this should query a registry at llama.meta.com/llamastack/distributions
+        headers = [
+            "API",
+        ]
+
+        rows = []
+        for api in stack_apis():
+            rows.append(
+                [
+                    api.value,
+                ]
+            )
+        print_table(
+            rows,
+            headers,
+            separate_rows=True,
+        )
--- a/llama_stack/cli/stack/list_providers.py
+++ b/llama_stack/cli/stack/list_providers.py
@ -0,0 +1,60 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from llama_stack.cli.subcommand import Subcommand
+
+
+class StackListProviders(Subcommand):
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "list-providers",
+            prog="llama stack list-providers",
+            description="Show available Llama Stack Providers for an API",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_providers_list_cmd)
+
+    def _add_arguments(self):
+        from llama_stack.distribution.distribution import stack_apis
+
+        api_values = [a.value for a in stack_apis()]
+        self.parser.add_argument(
+            "api",
+            type=str,
+            choices=api_values,
+            help="API to list providers for (one of: {})".format(api_values),
+        )
+
+    def _run_providers_list_cmd(self, args: argparse.Namespace) -> None:
+        from llama_stack.cli.table import print_table
+        from llama_stack.distribution.distribution import Api, api_providers
+
+        all_providers = api_providers()
+        providers_for_api = all_providers[Api(args.api)]
+
+        # eventually, this should query a registry at llama.meta.com/llamastack/distributions
+        headers = [
+            "Provider Type",
+            "PIP Package Dependencies",
+        ]
+
+        rows = []
+        for spec in providers_for_api.values():
+            rows.append(
+                [
+                    spec.provider_id,
+                    ",".join(spec.pip_packages),
+                ]
+            )
+        print_table(
+            rows,
+            headers,
+            separate_rows=True,
+        )
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@ -0,0 +1,88 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from pathlib import Path
+
+import pkg_resources
+import yaml
+
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.distribution.datatypes import *  # noqa: F403
+
+
+class StackRun(Subcommand):
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "run",
+            prog="llama stack run",
+            description="""start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.""",
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        self._add_arguments()
+        self.parser.set_defaults(func=self._run_stack_run_cmd)
+
+    def _add_arguments(self):
+        self.parser.add_argument(
+            "config",
+            type=str,
+            help="Path to config file to use for the run",
+        )
+        self.parser.add_argument(
+            "--port",
+            type=int,
+            help="Port to run the server on. Defaults to 5000",
+            default=5000,
+        )
+        self.parser.add_argument(
+            "--disable-ipv6",
+            action="store_true",
+            help="Disable IPv6 support",
+            default=False,
+        )
+
+    def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
+        from llama_stack.distribution.utils.exec import run_with_pty
+
+        if not args.config:
+            self.parser.error("Must specify a config file to run")
+            return
+
+        path = args.config
+        config_file = Path(path)
+
+        if not config_file.exists():
+            self.parser.error(
+                f"File {str(config_file)} does not exist. Did you run `llama stack build`?"
+            )
+            return
+
+        with open(config_file, "r") as f:
+            config = StackRunConfig(**yaml.safe_load(f))
+
+        if config.docker_image:
+            script = pkg_resources.resource_filename(
+                "llama_stack",
+                "distribution/start_container.sh",
+            )
+            run_args = [script, config.docker_image]
+        else:
+            script = pkg_resources.resource_filename(
+                "llama_stack",
+                "distribution/start_conda_env.sh",
+            )
+            run_args = [
+                script,
+                config.conda_env,
+            ]
+
+        run_args.extend([str(config_file), str(args.port)])
+        if args.disable_ipv6:
+            run_args.append("--disable-ipv6")
+
+        run_with_pty(run_args)
--- a/llama_stack/cli/stack/stack.py
+++ b/llama_stack/cli/stack/stack.py
@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+
+from llama_stack.cli.subcommand import Subcommand
+
+from .build import StackBuild
+from .configure import StackConfigure
+from .list_apis import StackListApis
+from .list_providers import StackListProviders
+from .run import StackRun
+
+
+class StackParser(Subcommand):
+    def __init__(self, subparsers: argparse._SubParsersAction):
+        super().__init__()
+        self.parser = subparsers.add_parser(
+            "stack",
+            prog="llama stack",
+            description="Operations for the Llama Stack / Distributions",
+        )
+
+        subparsers = self.parser.add_subparsers(title="stack_subcommands")
+
+        # Add sub-commands
+        StackBuild.create(subparsers)
+        StackConfigure.create(subparsers)
+        StackListApis.create(subparsers)
+        StackListProviders.create(subparsers)
+        StackRun.create(subparsers)
--- a/llama_stack/cli/subcommand.py
+++ b/llama_stack/cli/subcommand.py
@ -0,0 +1,19 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+class Subcommand:
+    """All llama cli subcommands must inherit from this class"""
+
+    def __init__(self, *args, **kwargs):
+        pass
+
+    @classmethod
+    def create(cls, *args, **kwargs):
+        return cls(*args, **kwargs)
+
+    def _add_arguments(self):
+        pass
--- a/llama_stack/cli/table.py
+++ b/llama_stack/cli/table.py
@ -0,0 +1,77 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import re
+import textwrap
+
+from termcolor import cprint
+
+
+def strip_ansi_colors(text):
+    ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
+    return ansi_escape.sub("", text)
+
+
+def format_row(row, col_widths):
+    def wrap(text, width):
+        lines = []
+        for line in text.split("\n"):
+            if line.strip() == "":
+                lines.append("")
+            else:
+                lines.extend(
+                    textwrap.wrap(
+                        line, width, break_long_words=False, replace_whitespace=False
+                    )
+                )
+        return lines
+
+    wrapped = [wrap(item, width) for item, width in zip(row, col_widths)]
+    max_lines = max(len(subrow) for subrow in wrapped)
+
+    lines = []
+    for i in range(max_lines):
+        line = []
+        for cell_lines, width in zip(wrapped, col_widths):
+            value = cell_lines[i] if i < len(cell_lines) else ""
+            line.append(value + " " * (width - len(strip_ansi_colors(value))))
+        lines.append("| " + (" | ".join(line)) + " |")
+
+    return "\n".join(lines)
+
+
+def print_table(rows, headers=None, separate_rows: bool = False):
+    def itemlen(item):
+        return max([len(line) for line in strip_ansi_colors(item).split("\n")])
+
+    rows = [[x or "" for x in row] for row in rows]
+    if not headers:
+        col_widths = [max(itemlen(item) for item in col) for col in zip(*rows)]
+    else:
+        col_widths = [
+            max(
+                itemlen(header),
+                max(itemlen(item) for item in col),
+            )
+            for header, col in zip(headers, zip(*rows))
+        ]
+    col_widths = [min(w, 80) for w in col_widths]
+
+    header_line = "+".join("-" * (width + 2) for width in col_widths)
+    header_line = f"+{header_line}+"
+
+    if headers:
+        print(header_line)
+        cprint(format_row(headers, col_widths), "white", attrs=["bold"])
+
+    print(header_line)
+    for row in rows:
+        print(format_row(row, col_widths))
+        if separate_rows:
+            print(header_line)
+
+    if not separate_rows:
+        print(header_line)