feat(server): Use system packages for execution (#1252)

# What does this PR do? Users prefer to rely on the main CLI rather than invoking the server through a Python module. Users interact with a high-level CLI rather than needing to know internal module structures. Now, when running llama stack run <path-to-config>, the server will attempt to use the system package or a virtual environment if one is active. This also eliminates the current process dependency chain when running from a virtual environment: -> llama stack run        -> start_env.sh              -> python -m server... Signed-off-by: Sébastien Han <seb@redhat.com> [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan Run: ``` ollama run llama3.2:3b-instruct-fp16 --keepalive=2m & llama stack run ./llama_stack/templates/ollama/run.yaml --disable-ipv6 ``` Notice that the server starts and shutdowns normally. [//]: # (## Documentation) --------- Signed-off-by: Sébastien Han <seb@redhat.com> Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-12-03 09:53:45 +00:00 · 2025-03-11 00:01:03 +01:00 · 2025-03-11 00:01:03 +01:00 · 21e39633d8
commit 21e39633d8
parent feacf89548
2 changed files with 59 additions and 21 deletions
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@ -56,7 +56,6 @@ class StackRun(Subcommand):
            "--env",
            action="append",
            help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.",
-            default=[],
            metavar="KEY=VALUE",
        )
        self.parser.add_argument(
@ -74,7 +73,6 @@ class StackRun(Subcommand):
            type=str,
            help="Image Type used during the build. This can be either conda or container or venv.",
            choices=["conda", "container", "venv"],
-            default="conda",
        )

    def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
@ -120,20 +118,42 @@ class StackRun(Subcommand):
        except AttributeError as e:
            self.parser.error(f"failed to parse config file '{config_file}':\n {e}")

-        run_args = formulate_run_args(args.image_type, args.image_name, config, template_name)
+        # If neither image type nor image name is provided, assume the server should be run directly
+        # using the current environment packages.
+        if not args.image_type and not args.image_name:
+            logger.info("No image type or image name provided. Assuming environment packages.")
+            from llama_stack.distribution.server.server import main as server_main

-        run_args.extend([str(config_file), str(args.port)])
-        if args.disable_ipv6:
-            run_args.append("--disable-ipv6")
+            # Build the server args from the current args passed to the CLI
+            server_args = argparse.Namespace()
+            for arg in vars(args):
+                # If this is a function, avoid passing it
+                # "args" contains:
+                # func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
+                if callable(getattr(args, arg)):
+                    continue
+                setattr(server_args, arg, getattr(args, arg))

-        for env_var in args.env:
-            if "=" not in env_var:
-                self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format")
-            key, value = env_var.split("=", 1)  # split on first = only
-            if not key:
-                self.parser.error(f"Environment variable '{env_var}' has empty key")
-            run_args.extend(["--env", f"{key}={value}"])
+            # Run the server
+            server_main(server_args)
+        else:
+            run_args = formulate_run_args(args.image_type, args.image_name, config, template_name)

-        if args.tls_keyfile and args.tls_certfile:
-            run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
-        run_with_pty(run_args)
+            run_args.extend([str(config_file), str(args.port)])
+            if args.disable_ipv6:
+                run_args.append("--disable-ipv6")
+
+            if args.env:
+                for env_var in args.env:
+                    if "=" not in env_var:
+                        self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format")
+                        return
+                    key, value = env_var.split("=", 1)  # split on first = only
+                    if not key:
+                        self.parser.error(f"Environment variable '{env_var}' has empty key")
+                        return
+                    run_args.extend(["--env", f"{key}={value}"])
+
+            if args.tls_keyfile and args.tls_certfile:
+                run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
+            run_with_pty(run_args)