refactor: simplify command execution and remove PTY handling (#1641)

# What does this PR do? A PTY is unnecessary for interactive mode since `subprocess.run()` already inherits the calling terminal’s stdin, stdout, and stderr, allowing natural interaction. Using a PTY can introduce unwanted side effects like buffering issues and inconsistent signal handling. Standard input/output is sufficient for most interactive programs. This commit simplifies the command execution by: 1. Removing PTY-based execution in favor of direct subprocess handling 2. Consolidating command execution into a single run_command function 3. Improving error handling with specific subprocess error types 4. Adding proper type hints and documentation 5. Maintaining Ctrl+C handling for graceful interruption ## Test Plan ``` llama stack run ``` Signed-off-by: Sébastien Han <seb@redhat.com>
2025-03-17 23:03:14 +01:00 · 2025-03-17 23:03:14 +01:00 · 24fd06879e
commit 24fd06879e
parent 77ca09467f
6 changed files with 31 additions and 131 deletions
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -56,8 +56,7 @@ jobs:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
        run: |
          source .venv/bin/activate
-          # TODO: use "llama stack run"
+          nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 &
          nohup uv run python -m llama_stack.distribution.server.server --yaml-config ./llama_stack/templates/ollama/run.yaml > server.log 2>&1 &
      - name: Wait for Llama Stack server to be ready
        run: |
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@ -40,6 +40,7 @@ jobs:
      matrix:
        template: ${{ fromJson(needs.generate-matrix.outputs.templates) }}
        image-type: [venv, container]
      fail-fast: false # We want to run all jobs even if some fail
    steps:
      - name: Checkout repository
@ -67,7 +68,9 @@ jobs:
      - name: Run Llama Stack Build
        run: |
-          uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
+          # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
          # LLAMA_STACK_DIR is set to the current directory so we are building from the source
          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
      - name: Print dependencies in the image
        if: matrix.image-type == 'venv'
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@ -38,7 +38,7 @@ from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.resolver import InvalidProviderError
 from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
-from llama_stack.distribution.utils.exec import formulate_run_args, run_with_pty
+from llama_stack.distribution.utils.exec import formulate_run_args, run_command
 from llama_stack.distribution.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
@ -213,7 +213,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
        config = parse_and_maybe_upgrade_config(config_dict)
        run_args = formulate_run_args(args.image_type, args.image_name, config, args.template)
        run_args.extend([run_config, str(os.getenv("LLAMA_STACK_PORT", 8321))])
-        run_with_pty(run_args)
+        run_command(run_args)
 def _generate_run_config(
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@ -82,7 +82,7 @@ class StackRun(Subcommand):
        from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
        from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
-        from llama_stack.distribution.utils.exec import formulate_run_args, run_with_pty
+        from llama_stack.distribution.utils.exec import formulate_run_args, run_command
        config_file = Path(args.config)
        has_yaml_suffix = args.config.endswith(".yaml")
@ -136,4 +136,4 @@ class StackRun(Subcommand):
        if args.tls_keyfile and args.tls_certfile:
            run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
-        run_with_pty(run_args)
+        run_command(run_args)
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@ -6,7 +6,6 @@
 import importlib.resources
 import logging
 import sys
 from pathlib import Path
 from typing import Dict, List
@ -15,7 +14,7 @@ from termcolor import cprint
 from llama_stack.distribution.datatypes import BuildConfig, Provider
 from llama_stack.distribution.distribution import get_provider_registry
-from llama_stack.distribution.utils.exec import run_command, run_with_pty
+from llama_stack.distribution.utils.exec import run_command
 from llama_stack.distribution.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
@ -123,10 +122,6 @@ def build_image(
    if special_deps:
        args.append("#".join(special_deps))
    is_terminal = sys.stdin.isatty()
    if is_terminal:
        return_code = run_with_pty(args)
    else:
    return_code = run_command(args)
    if return_code != 0:
--- a/llama_stack/distribution/utils/exec.py
+++ b/llama_stack/distribution/utils/exec.py
@ -4,13 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import errno
 import logging
 import os
 import select
 import signal
 import subprocess
 import sys
 from termcolor import cprint
@ -88,13 +85,6 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
    return run_args
 def run_with_pty(command):
    if sys.platform.startswith("win"):
        return _run_with_pty_win(command)
    else:
        return _run_with_pty_unix(command)
 def in_notebook():
    try:
        from IPython import get_ipython
@ -108,19 +98,19 @@ def in_notebook():
    return True
-# run a command in a pseudo-terminal, with interrupt handling,
+def run_command(command: list[str]) -> int:
-# useful when you want to run interactive things
+    """
-def _run_with_pty_unix(command):
+    Run a command with interrupt handling and output capture.
-    import pty
+    Uses subprocess.run with direct stream piping for better performance.
    import termios
-    master, slave = pty.openpty()
+    Args:
        command (list): The command to run.
-    old_settings = termios.tcgetattr(sys.stdin)
+    Returns:
        int: The return code of the command.
    """
    original_sigint = signal.getsignal(signal.SIGINT)
    ctrl_c_pressed = False
    process = None
    def sigint_handler(signum, frame):
        nonlocal ctrl_c_pressed
@ -131,106 +121,19 @@ def _run_with_pty_unix(command):
        # Set up the signal handler
        signal.signal(signal.SIGINT, sigint_handler)
-        new_settings = termios.tcgetattr(sys.stdin)
+        # Run the command with stdout/stderr piped directly to system streams
-        new_settings[3] = new_settings[3] & ~termios.ECHO  # Disable echo
+        result = subprocess.run(
        new_settings[3] = new_settings[3] & ~termios.ICANON  # Disable canonical mode
        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, new_settings)
        process = subprocess.Popen(
            command,
-            stdin=slave,
+            text=True,
-            stdout=slave,
+            check=False,
            stderr=slave,
            universal_newlines=True,
            preexec_fn=os.setsid,
        )
-
+        return result.returncode
-        # Close the slave file descriptor as it's now owned by the subprocess
+    except subprocess.SubprocessError as e:
-        os.close(slave)
+        log.error(f"Subprocess error: {e}")
-
+        return 1
        def handle_io():
            while not ctrl_c_pressed:
                try:
                    rlist, _, _ = select.select([sys.stdin, master], [], [], 0.1)
                    if sys.stdin in rlist:
                        data = os.read(sys.stdin.fileno(), 1024)
                        if not data:
                            break
                        os.write(master, data)
                    if master in rlist:
                        data = os.read(master, 1024)
                        if not data:
                            break
                        sys.stdout.buffer.write(data)
                        sys.stdout.flush()
                except KeyboardInterrupt:
                    # This will be raised when Ctrl+C is pressed
                    break
                if process.poll() is not None:
                    break
        handle_io()
    except (EOFError, KeyboardInterrupt):
        pass
    except OSError as e:
        if e.errno != errno.EIO:
            raise
    finally:
        # Clean up
        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
        signal.signal(signal.SIGINT, original_sigint)
        os.close(master)
        if process and process.poll() is None:
            process.terminate()
            process.wait()
    return process.returncode
 # run a command in a pseudo-terminal in windows, with interrupt handling,
 def _run_with_pty_win(command):
    """
    Runs a command with interactive support using subprocess directly.
    """
    try:
        # For shell scripts on Windows, use appropriate shell
        if isinstance(command, (list, tuple)):
            if command[0].endswith(".sh"):
                if os.path.exists("/usr/bin/bash"):  # WSL
                    command = ["bash"] + command
                else:
                    # Use cmd.exe with bash while preserving all arguments
                    command = ["cmd.exe", "/c", "bash"] + command
        process = subprocess.Popen(
            command,
            shell=True,
            universal_newlines=True,
        )
        process.wait()
    except Exception as e:
-        print(f"Error: {str(e)}")
+        log.exception(f"Unexpected error: {e}")
        return 1
    finally:
-        if process and process.poll() is None:
+        # Restore the original signal handler
-            process.terminate()
+        signal.signal(signal.SIGINT, original_sigint)
            process.wait()
    return process.returncode
 def run_command(command):
    try:
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        print("Script Output\n", result.stdout)
        return result.returncode
    except subprocess.CalledProcessError as e:
        print("Error running script:", e)
        print("Error output:", e.stderr)
        return e.returncode