Introduce Llama stack distributions (#22)

* Add distribution CLI scaffolding

* More progress towards `llama distribution install`

* getting closer to a distro definition, distro install + configure works

* Distribution server now functioning

* read existing configuration, save enums properly

* Remove inference uvicorn server entrypoint and llama inference CLI command

* updated dependency and client model name

* Improved exception handling

* local imports for faster cli

* undo a typo, add a passthrough distribution

* implement full-passthrough in the server

* add safety adapters, configuration handling, server + clients

* cleanup, moving stuff to common, nuke utils

* Add a Path() wrapper at the earliest place

* fixes

* Bring agentic system api to toolchain

Add adapter dependencies and resolve adapters using a topological sort

* refactor to reduce size of `agentic_system`

* move straggler files and fix some important existing bugs

* ApiSurface -> Api

* refactor a method out

* Adapter -> Provider

* Make each inference provider into its own subdirectory

* installation fixes

* Rename Distribution -> DistributionSpec, simplify RemoteProviders

* dict key instead of attr

* update inference config to take model and not model_dir

* Fix passthrough streaming, send headers properly not part of body :facepalm

* update safety to use model sku ids and not model dirs

* Update cli_reference.md

* minor fixes

* add DistributionConfig, fix a bug in model download

* Make install + start scripts do proper configuration automatically

* Update CLI_reference

* Nuke fp8_requirements, fold fbgemm into common requirements

* Update README, add newline between API surface configurations

* Refactor download functionality out of the Command so can be reused

* Add `llama model download` alias for `llama download`

* Show message about checksum file so users can check themselves

* Simpler intro statements

* get ollama working

* Reduce a bunch of dependencies from toolchain

Some improvements to the distribution install script

* Avoid using `conda run` since it buffers everything

* update dependencies and rely on LLAMA_TOOLCHAIN_DIR for dev purposes

* add validation for configuration input

* resort imports

* make optional subclasses default to yes for configuration

* Remove additional_pip_packages; move deps to providers

* for inline make 8b model the default

* Add scripts to MANIFEST

* allow installing from test.pypi.org

* Fix #2 to help with testing packages

* Must install llama-models at that same version first

* fix PIP_ARGS

---------

Co-authored-by: Hardik Shah <hjshah@fb.com>
Co-authored-by: Hardik Shah <hjshah@meta.com>
This commit is contained in:
Ashwin Bharambe 2024-08-08 13:38:41 -07:00 committed by GitHub
parent da4645a27a
commit e830814399
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
115 changed files with 5839 additions and 1120 deletions

View file

@ -0,0 +1,15 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
from pathlib import Path
LLAMA_STACK_CONFIG_DIR = Path(os.path.expanduser("~/.llama/"))
DISTRIBS_BASE_DIR = LLAMA_STACK_CONFIG_DIR / "distributions"
DEFAULT_CHECKPOINT_DIR = LLAMA_STACK_CONFIG_DIR / "checkpoints"

View file

@ -9,9 +9,9 @@ from typing import Dict, Optional
from llama_models.llama3_1.api.datatypes import URL
from pydantic import BaseModel
from llama_models.schema_utils import json_schema_type
from strong_typing.schema import json_schema_type
from pydantic import BaseModel
@json_schema_type

View file

@ -0,0 +1,105 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import errno
import os
import pty
import select
import signal
import subprocess
import sys
import termios
from termcolor import cprint
# run a command in a pseudo-terminal, with interrupt handling,
# useful when you want to run interactive things
def run_with_pty(command):
master, slave = pty.openpty()
old_settings = termios.tcgetattr(sys.stdin)
original_sigint = signal.getsignal(signal.SIGINT)
ctrl_c_pressed = False
def sigint_handler(signum, frame):
nonlocal ctrl_c_pressed
ctrl_c_pressed = True
cprint("\nCtrl-C detected. Aborting...", "white", attrs=["bold"])
try:
# Set up the signal handler
signal.signal(signal.SIGINT, sigint_handler)
new_settings = termios.tcgetattr(sys.stdin)
new_settings[3] = new_settings[3] & ~termios.ECHO # Disable echo
new_settings[3] = new_settings[3] & ~termios.ICANON # Disable canonical mode
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, new_settings)
process = subprocess.Popen(
command,
stdin=slave,
stdout=slave,
stderr=slave,
universal_newlines=True,
preexec_fn=os.setsid,
)
# Close the slave file descriptor as it's now owned by the subprocess
os.close(slave)
def handle_io():
while not ctrl_c_pressed:
try:
rlist, _, _ = select.select([sys.stdin, master], [], [], 0.1)
if sys.stdin in rlist:
data = os.read(sys.stdin.fileno(), 1024)
if not data:
break
os.write(master, data)
if master in rlist:
data = os.read(master, 1024)
if not data:
break
sys.stdout.buffer.write(data)
sys.stdout.flush()
except KeyboardInterrupt:
# This will be raised when Ctrl+C is pressed
break
if process.poll() is not None:
break
handle_io()
except (EOFError, KeyboardInterrupt):
pass
except OSError as e:
if e.errno != errno.EIO:
raise
finally:
# Clean up
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
signal.signal(signal.SIGINT, original_sigint)
os.close(master)
if process.poll() is None:
process.terminate()
process.wait()
return process.returncode
def run_command(command):
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = process.communicate()
if process.returncode != 0:
print(f"Error: {error.decode('utf-8')}")
sys.exit(1)
return output.decode("utf-8")

View file

@ -0,0 +1,8 @@
import os
from llama_models.datatypes import Model
from .config_dirs import DEFAULT_CHECKPOINT_DIR
def model_local_dir(model: Model) -> str:
return os.path.join(DEFAULT_CHECKPOINT_DIR, model.descriptor())

View file

@ -0,0 +1,256 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import inspect
import json
from enum import Enum
from typing import Any, get_args, get_origin, List, Literal, Optional, Type, Union
from pydantic import BaseModel
from pydantic.fields import ModelField
from typing_extensions import Annotated
def is_list_of_primitives(field_type):
"""Check if a field type is a List of primitive types."""
origin = get_origin(field_type)
if origin is List or origin is list:
args = get_args(field_type)
if len(args) == 1 and args[0] in (int, float, str, bool):
return True
return False
def get_literal_values(field):
"""Extract literal values from a field if it's a Literal type."""
if get_origin(field.annotation) is Literal:
return get_args(field.annotation)
return None
def is_optional(field_type):
"""Check if a field type is Optional."""
return get_origin(field_type) is Union and type(None) in get_args(field_type)
def get_non_none_type(field_type):
"""Get the non-None type from an Optional type."""
return next(arg for arg in get_args(field_type) if arg is not type(None))
def manually_validate_field(model: Type[BaseModel], field: ModelField, value: Any):
validators = field.class_validators.values()
for validator in validators:
if validator.pre:
value = validator.func(model, value)
# Apply type coercion
value = field.type_(value)
for validator in validators:
if not validator.pre:
value = validator.func(model, value)
return value
# This is somewhat elaborate, but does not purport to be comprehensive in any way.
# We should add handling for the most common cases to tide us over.
#
# doesn't support List[nested_class] yet or Dicts of any kind. needs a bunch of
# unit tests for coverage.
def prompt_for_config(
config_type: type[BaseModel], existing_config: Optional[BaseModel] = None
) -> BaseModel:
"""
Recursively prompt the user for configuration values based on a Pydantic BaseModel.
Args:
config_type: A Pydantic BaseModel class representing the configuration structure.
Returns:
An instance of the config_type with user-provided values.
"""
config_data = {}
for field_name, field in config_type.__fields__.items():
field_type = field.annotation
existing_value = (
getattr(existing_config, field_name) if existing_config else None
)
if existing_value:
default_value = existing_value
else:
default_value = (
field.default if not isinstance(field.default, type(Ellipsis)) else None
)
is_required = field.required
# Skip fields with Literal type
if get_origin(field_type) is Literal:
continue
if inspect.isclass(field_type) and issubclass(field_type, Enum):
prompt = f"Choose {field_name} (options: {', '.join(e.name for e in field_type)}):"
while True:
# this branch does not handle existing and default values yet
user_input = input(prompt + " ")
try:
value = field_type[user_input]
validated_value = manually_validate_field(config_type, field, value)
config_data[field_name] = validated_value
break
except KeyError:
print(
f"Invalid choice. Please choose from: {', '.join(e.name for e in field_type)}"
)
continue
# Check if the field is a discriminated union
if get_origin(field_type) is Annotated:
inner_type = get_args(field_type)[0]
if get_origin(inner_type) is Union:
discriminator = field.field_info.discriminator
if discriminator:
union_types = get_args(inner_type)
# Find the discriminator field in each union type
type_map = {}
for t in union_types:
disc_field = t.__fields__[discriminator]
literal_values = get_literal_values(disc_field)
if literal_values:
for value in literal_values:
type_map[value] = t
while True:
discriminator_value = input(
f"Enter the {discriminator} (options: {', '.join(type_map.keys())}): "
)
if discriminator_value in type_map:
chosen_type = type_map[discriminator_value]
print(f"\nConfiguring {chosen_type.__name__}:")
if existing_value and (
getattr(existing_value, discriminator)
!= discriminator_value
):
existing_value = None
sub_config = prompt_for_config(chosen_type, existing_value)
config_data[field_name] = sub_config
# Set the discriminator field in the sub-config
setattr(sub_config, discriminator, discriminator_value)
break
else:
print(f"Invalid {discriminator}. Please try again.")
continue
if (
is_optional(field_type)
and inspect.isclass(get_non_none_type(field_type))
and issubclass(get_non_none_type(field_type), BaseModel)
):
prompt = f"Do you want to configure {field_name}? (y/n): "
if input(prompt).lower() == "n":
config_data[field_name] = None
continue
nested_type = get_non_none_type(field_type)
print(f"Entering sub-configuration for {field_name}:")
config_data[field_name] = prompt_for_config(nested_type, existing_value)
elif (
inspect.isclass(field_type)
and issubclass(field_type, BaseModel)
and len(field_type.__fields__) > 0
):
print(f"\nEntering sub-configuration for {field_name}:")
config_data[field_name] = prompt_for_config(
field_type,
existing_value,
)
else:
prompt = f"Enter value for {field_name}"
if existing_value is not None:
prompt += f" (existing: {existing_value})"
elif default_value is not None:
prompt += f" (default: {default_value})"
if is_optional(field_type):
prompt += " (optional)"
elif is_required:
prompt += " (required)"
prompt += ": "
while True:
user_input = input(prompt)
if user_input == "":
if default_value is not None:
config_data[field_name] = default_value
break
elif is_optional(field_type) or not is_required:
config_data[field_name] = None
break
else:
print("This field is required. Please provide a value.")
continue
else:
try:
# Handle Optional types
if is_optional(field_type):
if user_input.lower() == "none":
value = None
else:
field_type = get_non_none_type(field_type)
value = user_input
# Handle List of primitives
elif is_list_of_primitives(field_type):
try:
value = json.loads(user_input)
if not isinstance(value, list):
raise ValueError(
"Input must be a JSON-encoded list"
)
element_type = get_args(field_type)[0]
value = [element_type(item) for item in value]
except json.JSONDecodeError:
print(
"Invalid JSON. Please enter a valid JSON-encoded list."
)
continue
except ValueError as e:
print(f"{str(e)}")
continue
# Convert the input to the correct type
elif inspect.isclass(field_type) and issubclass(
field_type, BaseModel
):
# For nested BaseModels, we assume a dictionary-like string input
import ast
value = field_type(**ast.literal_eval(user_input))
else:
value = field_type(user_input)
except ValueError:
print(
f"Invalid input. Expected type: {getattr(field_type, '__name__', str(field_type))}"
)
continue
try:
# Validate the field using our manual validation function
validated_value = manually_validate_field(config_type, field, value)
config_data[field_name] = validated_value
break
except ValueError as e:
print(f"Validation error: {str(e)}")
return config_type(**config_data)

View file

@ -0,0 +1,15 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
from enum import Enum
class EnumEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Enum):
return obj.value
return super().default(obj)

View file

@ -5,8 +5,8 @@
# the root directory of this source tree.
from llama_models.llama3_1.api.datatypes import URL
from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel
from strong_typing.schema import json_schema_type
@json_schema_type(schema={"description": "Checkpoint created during training runs"})