forked from phoenix-oss/llama-stack-mirror
* Add distribution CLI scaffolding * More progress towards `llama distribution install` * getting closer to a distro definition, distro install + configure works * Distribution server now functioning * read existing configuration, save enums properly * Remove inference uvicorn server entrypoint and llama inference CLI command * updated dependency and client model name * Improved exception handling * local imports for faster cli * undo a typo, add a passthrough distribution * implement full-passthrough in the server * add safety adapters, configuration handling, server + clients * cleanup, moving stuff to common, nuke utils * Add a Path() wrapper at the earliest place * fixes * Bring agentic system api to toolchain Add adapter dependencies and resolve adapters using a topological sort * refactor to reduce size of `agentic_system` * move straggler files and fix some important existing bugs * ApiSurface -> Api * refactor a method out * Adapter -> Provider * Make each inference provider into its own subdirectory * installation fixes * Rename Distribution -> DistributionSpec, simplify RemoteProviders * dict key instead of attr * update inference config to take model and not model_dir * Fix passthrough streaming, send headers properly not part of body :facepalm * update safety to use model sku ids and not model dirs * Update cli_reference.md * minor fixes * add DistributionConfig, fix a bug in model download * Make install + start scripts do proper configuration automatically * Update CLI_reference * Nuke fp8_requirements, fold fbgemm into common requirements * Update README, add newline between API surface configurations * Refactor download functionality out of the Command so can be reused * Add `llama model download` alias for `llama download` * Show message about checksum file so users can check themselves * Simpler intro statements * get ollama working * Reduce a bunch of dependencies from toolchain Some improvements to the distribution install script * Avoid using `conda run` since it buffers everything * update dependencies and rely on LLAMA_TOOLCHAIN_DIR for dev purposes * add validation for configuration input * resort imports * make optional subclasses default to yes for configuration * Remove additional_pip_packages; move deps to providers * for inline make 8b model the default * Add scripts to MANIFEST * allow installing from test.pypi.org * Fix #2 to help with testing packages * Must install llama-models at that same version first * fix PIP_ARGS --------- Co-authored-by: Hardik Shah <hjshah@fb.com> Co-authored-by: Hardik Shah <hjshah@meta.com>
105 lines
3.1 KiB
Python
105 lines
3.1 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import errno
|
|
import os
|
|
import pty
|
|
import select
|
|
import signal
|
|
import subprocess
|
|
import sys
|
|
import termios
|
|
|
|
from termcolor import cprint
|
|
|
|
|
|
# run a command in a pseudo-terminal, with interrupt handling,
|
|
# useful when you want to run interactive things
|
|
def run_with_pty(command):
|
|
master, slave = pty.openpty()
|
|
|
|
old_settings = termios.tcgetattr(sys.stdin)
|
|
original_sigint = signal.getsignal(signal.SIGINT)
|
|
|
|
ctrl_c_pressed = False
|
|
|
|
def sigint_handler(signum, frame):
|
|
nonlocal ctrl_c_pressed
|
|
ctrl_c_pressed = True
|
|
cprint("\nCtrl-C detected. Aborting...", "white", attrs=["bold"])
|
|
|
|
try:
|
|
# Set up the signal handler
|
|
signal.signal(signal.SIGINT, sigint_handler)
|
|
|
|
new_settings = termios.tcgetattr(sys.stdin)
|
|
new_settings[3] = new_settings[3] & ~termios.ECHO # Disable echo
|
|
new_settings[3] = new_settings[3] & ~termios.ICANON # Disable canonical mode
|
|
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, new_settings)
|
|
|
|
process = subprocess.Popen(
|
|
command,
|
|
stdin=slave,
|
|
stdout=slave,
|
|
stderr=slave,
|
|
universal_newlines=True,
|
|
preexec_fn=os.setsid,
|
|
)
|
|
|
|
# Close the slave file descriptor as it's now owned by the subprocess
|
|
os.close(slave)
|
|
|
|
def handle_io():
|
|
while not ctrl_c_pressed:
|
|
try:
|
|
rlist, _, _ = select.select([sys.stdin, master], [], [], 0.1)
|
|
|
|
if sys.stdin in rlist:
|
|
data = os.read(sys.stdin.fileno(), 1024)
|
|
if not data:
|
|
break
|
|
os.write(master, data)
|
|
|
|
if master in rlist:
|
|
data = os.read(master, 1024)
|
|
if not data:
|
|
break
|
|
sys.stdout.buffer.write(data)
|
|
sys.stdout.flush()
|
|
|
|
except KeyboardInterrupt:
|
|
# This will be raised when Ctrl+C is pressed
|
|
break
|
|
|
|
if process.poll() is not None:
|
|
break
|
|
|
|
handle_io()
|
|
except (EOFError, KeyboardInterrupt):
|
|
pass
|
|
except OSError as e:
|
|
if e.errno != errno.EIO:
|
|
raise
|
|
finally:
|
|
# Clean up
|
|
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
|
|
signal.signal(signal.SIGINT, original_sigint)
|
|
|
|
os.close(master)
|
|
if process.poll() is None:
|
|
process.terminate()
|
|
process.wait()
|
|
|
|
return process.returncode
|
|
|
|
|
|
def run_command(command):
|
|
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
output, error = process.communicate()
|
|
if process.returncode != 0:
|
|
print(f"Error: {error.decode('utf-8')}")
|
|
sys.exit(1)
|
|
return output.decode("utf-8")
|