Introduce Llama stack distributions (#22)

* Add distribution CLI scaffolding * More progress towards `llama distribution install` * getting closer to a distro definition, distro install + configure works * Distribution server now functioning * read existing configuration, save enums properly * Remove inference uvicorn server entrypoint and llama inference CLI command * updated dependency and client model name * Improved exception handling * local imports for faster cli * undo a typo, add a passthrough distribution * implement full-passthrough in the server * add safety adapters, configuration handling, server + clients * cleanup, moving stuff to common, nuke utils * Add a Path() wrapper at the earliest place * fixes * Bring agentic system api to toolchain Add adapter dependencies and resolve adapters using a topological sort * refactor to reduce size of `agentic_system` * move straggler files and fix some important existing bugs * ApiSurface -> Api * refactor a method out * Adapter -> Provider * Make each inference provider into its own subdirectory * installation fixes * Rename Distribution -> DistributionSpec, simplify RemoteProviders * dict key instead of attr * update inference config to take model and not model_dir * Fix passthrough streaming, send headers properly not part of body :facepalm * update safety to use model sku ids and not model dirs * Update cli_reference.md * minor fixes * add DistributionConfig, fix a bug in model download * Make install + start scripts do proper configuration automatically * Update CLI_reference * Nuke fp8_requirements, fold fbgemm into common requirements * Update README, add newline between API surface configurations * Refactor download functionality out of the Command so can be reused * Add `llama model download` alias for `llama download` * Show message about checksum file so users can check themselves * Simpler intro statements * get ollama working * Reduce a bunch of dependencies from toolchain Some improvements to the distribution install script * Avoid using `conda run` since it buffers everything * update dependencies and rely on LLAMA_TOOLCHAIN_DIR for dev purposes * add validation for configuration input * resort imports * make optional subclasses default to yes for configuration * Remove additional_pip_packages; move deps to providers * for inline make 8b model the default * Add scripts to MANIFEST * allow installing from test.pypi.org * Fix #2 to help with testing packages * Must install llama-models at that same version first * fix PIP_ARGS --------- Co-authored-by: Hardik Shah <hjshah@fb.com> Co-authored-by: Hardik Shah <hjshah@meta.com>
2025-07-19 03:10:03 +00:00 · 2024-08-08 13:38:41 -07:00 · 2024-08-08 13:38:41 -07:00 · e830814399
commit e830814399
parent da4645a27a
115 changed files with 5839 additions and 1120 deletions
--- a/llama_toolchain/common/config_dirs.py
+++ b/llama_toolchain/common/config_dirs.py
@ -0,0 +1,15 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+from pathlib import Path
+
+
+LLAMA_STACK_CONFIG_DIR = Path(os.path.expanduser("~/.llama/"))
+
+DISTRIBS_BASE_DIR = LLAMA_STACK_CONFIG_DIR / "distributions"
+
+DEFAULT_CHECKPOINT_DIR = LLAMA_STACK_CONFIG_DIR / "checkpoints"
--- a/llama_toolchain/common/deployment_types.py
+++ b/llama_toolchain/common/deployment_types.py
@ -9,9 +9,9 @@ from typing import Dict, Optional

 from llama_models.llama3_1.api.datatypes import URL

-from pydantic import BaseModel
+from llama_models.schema_utils import json_schema_type

-from strong_typing.schema import json_schema_type
+from pydantic import BaseModel


@json_schema_type
--- a/llama_toolchain/common/exec.py
+++ b/llama_toolchain/common/exec.py
@ -0,0 +1,105 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import errno
+import os
+import pty
+import select
+import signal
+import subprocess
+import sys
+import termios
+
+from termcolor import cprint
+
+
+# run a command in a pseudo-terminal, with interrupt handling,
+# useful when you want to run interactive things
+def run_with_pty(command):
+    master, slave = pty.openpty()
+
+    old_settings = termios.tcgetattr(sys.stdin)
+    original_sigint = signal.getsignal(signal.SIGINT)
+
+    ctrl_c_pressed = False
+
+    def sigint_handler(signum, frame):
+        nonlocal ctrl_c_pressed
+        ctrl_c_pressed = True
+        cprint("\nCtrl-C detected. Aborting...", "white", attrs=["bold"])
+
+    try:
+        # Set up the signal handler
+        signal.signal(signal.SIGINT, sigint_handler)
+
+        new_settings = termios.tcgetattr(sys.stdin)
+        new_settings[3] = new_settings[3] & ~termios.ECHO  # Disable echo
+        new_settings[3] = new_settings[3] & ~termios.ICANON  # Disable canonical mode
+        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, new_settings)
+
+        process = subprocess.Popen(
+            command,
+            stdin=slave,
+            stdout=slave,
+            stderr=slave,
+            universal_newlines=True,
+            preexec_fn=os.setsid,
+        )
+
+        # Close the slave file descriptor as it's now owned by the subprocess
+        os.close(slave)
+
+        def handle_io():
+            while not ctrl_c_pressed:
+                try:
+                    rlist, _, _ = select.select([sys.stdin, master], [], [], 0.1)
+
+                    if sys.stdin in rlist:
+                        data = os.read(sys.stdin.fileno(), 1024)
+                        if not data:
+                            break
+                        os.write(master, data)
+
+                    if master in rlist:
+                        data = os.read(master, 1024)
+                        if not data:
+                            break
+                        sys.stdout.buffer.write(data)
+                        sys.stdout.flush()
+
+                except KeyboardInterrupt:
+                    # This will be raised when Ctrl+C is pressed
+                    break
+
+                if process.poll() is not None:
+                    break
+
+        handle_io()
+    except (EOFError, KeyboardInterrupt):
+        pass
+    except OSError as e:
+        if e.errno != errno.EIO:
+            raise
+    finally:
+        # Clean up
+        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
+        signal.signal(signal.SIGINT, original_sigint)
+
+        os.close(master)
+        if process.poll() is None:
+            process.terminate()
+            process.wait()
+
+    return process.returncode
+
+
+def run_command(command):
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    output, error = process.communicate()
+    if process.returncode != 0:
+        print(f"Error: {error.decode('utf-8')}")
+        sys.exit(1)
+    return output.decode("utf-8")
--- a/llama_toolchain/common/model_utils.py
+++ b/llama_toolchain/common/model_utils.py
@ -0,0 +1,8 @@
+import os
+from llama_models.datatypes import Model
+
+from .config_dirs import DEFAULT_CHECKPOINT_DIR
+
+
+def model_local_dir(model: Model) -> str:
+    return os.path.join(DEFAULT_CHECKPOINT_DIR, model.descriptor())
--- a/llama_toolchain/common/prompt_for_config.py
+++ b/llama_toolchain/common/prompt_for_config.py
@ -0,0 +1,256 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import inspect
+import json
+from enum import Enum
+
+from typing import Any, get_args, get_origin, List, Literal, Optional, Type, Union
+
+from pydantic import BaseModel
+from pydantic.fields import ModelField
+
+from typing_extensions import Annotated
+
+
+def is_list_of_primitives(field_type):
+    """Check if a field type is a List of primitive types."""
+    origin = get_origin(field_type)
+    if origin is List or origin is list:
+        args = get_args(field_type)
+        if len(args) == 1 and args[0] in (int, float, str, bool):
+            return True
+    return False
+
+
+def get_literal_values(field):
+    """Extract literal values from a field if it's a Literal type."""
+    if get_origin(field.annotation) is Literal:
+        return get_args(field.annotation)
+    return None
+
+
+def is_optional(field_type):
+    """Check if a field type is Optional."""
+    return get_origin(field_type) is Union and type(None) in get_args(field_type)
+
+
+def get_non_none_type(field_type):
+    """Get the non-None type from an Optional type."""
+    return next(arg for arg in get_args(field_type) if arg is not type(None))
+
+
+def manually_validate_field(model: Type[BaseModel], field: ModelField, value: Any):
+    validators = field.class_validators.values()
+
+    for validator in validators:
+        if validator.pre:
+            value = validator.func(model, value)
+
+    # Apply type coercion
+    value = field.type_(value)
+
+    for validator in validators:
+        if not validator.pre:
+            value = validator.func(model, value)
+
+    return value
+
+
+# This is somewhat elaborate, but does not purport to be comprehensive in any way.
+# We should add handling for the most common cases to tide us over.
+#
+# doesn't support List[nested_class] yet or Dicts of any kind. needs a bunch of
+# unit tests for coverage.
+def prompt_for_config(
+    config_type: type[BaseModel], existing_config: Optional[BaseModel] = None
+) -> BaseModel:
+    """
+    Recursively prompt the user for configuration values based on a Pydantic BaseModel.
+
+    Args:
+        config_type: A Pydantic BaseModel class representing the configuration structure.
+
+    Returns:
+        An instance of the config_type with user-provided values.
+    """
+    config_data = {}
+
+    for field_name, field in config_type.__fields__.items():
+        field_type = field.annotation
+
+        existing_value = (
+            getattr(existing_config, field_name) if existing_config else None
+        )
+        if existing_value:
+            default_value = existing_value
+        else:
+            default_value = (
+                field.default if not isinstance(field.default, type(Ellipsis)) else None
+            )
+        is_required = field.required
+
+        # Skip fields with Literal type
+        if get_origin(field_type) is Literal:
+            continue
+
+        if inspect.isclass(field_type) and issubclass(field_type, Enum):
+            prompt = f"Choose {field_name} (options: {', '.join(e.name for e in field_type)}):"
+            while True:
+                # this branch does not handle existing and default values yet
+                user_input = input(prompt + " ")
+                try:
+                    value = field_type[user_input]
+                    validated_value = manually_validate_field(config_type, field, value)
+                    config_data[field_name] = validated_value
+                    break
+                except KeyError:
+                    print(
+                        f"Invalid choice. Please choose from: {', '.join(e.name for e in field_type)}"
+                    )
+            continue
+
+        # Check if the field is a discriminated union
+        if get_origin(field_type) is Annotated:
+            inner_type = get_args(field_type)[0]
+            if get_origin(inner_type) is Union:
+                discriminator = field.field_info.discriminator
+                if discriminator:
+                    union_types = get_args(inner_type)
+                    # Find the discriminator field in each union type
+                    type_map = {}
+                    for t in union_types:
+                        disc_field = t.__fields__[discriminator]
+                        literal_values = get_literal_values(disc_field)
+                        if literal_values:
+                            for value in literal_values:
+                                type_map[value] = t
+
+                    while True:
+                        discriminator_value = input(
+                            f"Enter the {discriminator} (options: {', '.join(type_map.keys())}): "
+                        )
+                        if discriminator_value in type_map:
+                            chosen_type = type_map[discriminator_value]
+                            print(f"\nConfiguring {chosen_type.__name__}:")
+
+                            if existing_value and (
+                                getattr(existing_value, discriminator)
+                                != discriminator_value
+                            ):
+                                existing_value = None
+
+                            sub_config = prompt_for_config(chosen_type, existing_value)
+                            config_data[field_name] = sub_config
+                            # Set the discriminator field in the sub-config
+                            setattr(sub_config, discriminator, discriminator_value)
+                            break
+                        else:
+                            print(f"Invalid {discriminator}. Please try again.")
+                    continue
+
+        if (
+            is_optional(field_type)
+            and inspect.isclass(get_non_none_type(field_type))
+            and issubclass(get_non_none_type(field_type), BaseModel)
+        ):
+            prompt = f"Do you want to configure {field_name}? (y/n): "
+            if input(prompt).lower() == "n":
+                config_data[field_name] = None
+                continue
+            nested_type = get_non_none_type(field_type)
+            print(f"Entering sub-configuration for {field_name}:")
+            config_data[field_name] = prompt_for_config(nested_type, existing_value)
+        elif (
+            inspect.isclass(field_type)
+            and issubclass(field_type, BaseModel)
+            and len(field_type.__fields__) > 0
+        ):
+            print(f"\nEntering sub-configuration for {field_name}:")
+            config_data[field_name] = prompt_for_config(
+                field_type,
+                existing_value,
+            )
+        else:
+            prompt = f"Enter value for {field_name}"
+            if existing_value is not None:
+                prompt += f" (existing: {existing_value})"
+            elif default_value is not None:
+                prompt += f" (default: {default_value})"
+            if is_optional(field_type):
+                prompt += " (optional)"
+            elif is_required:
+                prompt += " (required)"
+            prompt += ": "
+
+            while True:
+                user_input = input(prompt)
+                if user_input == "":
+                    if default_value is not None:
+                        config_data[field_name] = default_value
+                        break
+                    elif is_optional(field_type) or not is_required:
+                        config_data[field_name] = None
+                        break
+                    else:
+                        print("This field is required. Please provide a value.")
+                        continue
+                else:
+                    try:
+                        # Handle Optional types
+                        if is_optional(field_type):
+                            if user_input.lower() == "none":
+                                value = None
+                            else:
+                                field_type = get_non_none_type(field_type)
+                                value = user_input
+
+                        # Handle List of primitives
+                        elif is_list_of_primitives(field_type):
+                            try:
+                                value = json.loads(user_input)
+                                if not isinstance(value, list):
+                                    raise ValueError(
+                                        "Input must be a JSON-encoded list"
+                                    )
+                                element_type = get_args(field_type)[0]
+                                value = [element_type(item) for item in value]
+
+                            except json.JSONDecodeError:
+                                print(
+                                    "Invalid JSON. Please enter a valid JSON-encoded list."
+                                )
+                                continue
+                            except ValueError as e:
+                                print(f"{str(e)}")
+                                continue
+
+                        # Convert the input to the correct type
+                        elif inspect.isclass(field_type) and issubclass(
+                            field_type, BaseModel
+                        ):
+                            # For nested BaseModels, we assume a dictionary-like string input
+                            import ast
+
+                            value = field_type(**ast.literal_eval(user_input))
+                        else:
+                            value = field_type(user_input)
+
+                    except ValueError:
+                        print(
+                            f"Invalid input. Expected type: {getattr(field_type, '__name__', str(field_type))}"
+                        )
+                        continue
+
+                try:
+                    # Validate the field using our manual validation function
+                    validated_value = manually_validate_field(config_type, field, value)
+                    config_data[field_name] = validated_value
+                    break
+                except ValueError as e:
+                    print(f"Validation error: {str(e)}")
+
+    return config_type(**config_data)
--- a/llama_toolchain/common/serialize.py
+++ b/llama_toolchain/common/serialize.py
@ -0,0 +1,15 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+from enum import Enum
+
+
+class EnumEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, Enum):
+            return obj.value
+        return super().default(obj)
--- a/llama_toolchain/common/training_types.py
+++ b/llama_toolchain/common/training_types.py
@ -5,8 +5,8 @@
 # the root directory of this source tree.

 from llama_models.llama3_1.api.datatypes import URL
+from llama_models.schema_utils import json_schema_type
 from pydantic import BaseModel
-from strong_typing.schema import json_schema_type


@json_schema_type(schema={"description": "Checkpoint created during training runs"})