feat(server): Use system packages for execution (#1252)

# What does this PR do?

Users prefer to rely on the main CLI rather than invoking the server
through a Python module. Users interact with a high-level CLI rather
than needing to know internal module structures.

Now, when running llama stack run <path-to-config>, the server will
attempt to use the system package or a virtual environment if one is
active.

This also eliminates the current process dependency chain when running
from a virtual environment:

-> llama stack run
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; -> start_env.sh

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-> python -m server...

Signed-off-by: Sébastien Han <seb@redhat.com>

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan

Run:

```
ollama run llama3.2:3b-instruct-fp16 --keepalive=2m &
llama stack run ./llama_stack/templates/ollama/run.yaml --disable-ipv6
```

Notice that the server starts and shutdowns normally.

[//]: # (## Documentation)

---------

Signed-off-by: Sébastien Han <seb@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
Sébastien Han 2025-03-11 00:01:03 +01:00 committed by GitHub
parent feacf89548
commit 21e39633d8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 59 additions and 21 deletions

View file

@ -56,7 +56,6 @@ class StackRun(Subcommand):
"--env", "--env",
action="append", action="append",
help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.", help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.",
default=[],
metavar="KEY=VALUE", metavar="KEY=VALUE",
) )
self.parser.add_argument( self.parser.add_argument(
@ -74,7 +73,6 @@ class StackRun(Subcommand):
type=str, type=str,
help="Image Type used during the build. This can be either conda or container or venv.", help="Image Type used during the build. This can be either conda or container or venv.",
choices=["conda", "container", "venv"], choices=["conda", "container", "venv"],
default="conda",
) )
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
@ -120,20 +118,42 @@ class StackRun(Subcommand):
except AttributeError as e: except AttributeError as e:
self.parser.error(f"failed to parse config file '{config_file}':\n {e}") self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
run_args = formulate_run_args(args.image_type, args.image_name, config, template_name) # If neither image type nor image name is provided, assume the server should be run directly
# using the current environment packages.
if not args.image_type and not args.image_name:
logger.info("No image type or image name provided. Assuming environment packages.")
from llama_stack.distribution.server.server import main as server_main
run_args.extend([str(config_file), str(args.port)]) # Build the server args from the current args passed to the CLI
if args.disable_ipv6: server_args = argparse.Namespace()
run_args.append("--disable-ipv6") for arg in vars(args):
# If this is a function, avoid passing it
# "args" contains:
# func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
if callable(getattr(args, arg)):
continue
setattr(server_args, arg, getattr(args, arg))
for env_var in args.env: # Run the server
if "=" not in env_var: server_main(server_args)
self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format") else:
key, value = env_var.split("=", 1) # split on first = only run_args = formulate_run_args(args.image_type, args.image_name, config, template_name)
if not key:
self.parser.error(f"Environment variable '{env_var}' has empty key")
run_args.extend(["--env", f"{key}={value}"])
if args.tls_keyfile and args.tls_certfile: run_args.extend([str(config_file), str(args.port)])
run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile]) if args.disable_ipv6:
run_with_pty(run_args) run_args.append("--disable-ipv6")
if args.env:
for env_var in args.env:
if "=" not in env_var:
self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format")
return
key, value = env_var.split("=", 1) # split on first = only
if not key:
self.parser.error(f"Environment variable '{env_var}' has empty key")
return
run_args.extend(["--env", f"{key}={value}"])
if args.tls_keyfile and args.tls_certfile:
run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
run_with_pty(run_args)

View file

@ -17,7 +17,7 @@ import warnings
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from importlib.metadata import version as parse_version from importlib.metadata import version as parse_version
from pathlib import Path from pathlib import Path
from typing import Any, List, Union from typing import Any, List, Optional, Union
import yaml import yaml
from fastapi import Body, FastAPI, HTTPException, Request from fastapi import Body, FastAPI, HTTPException, Request
@ -314,11 +314,17 @@ class ClientVersionMiddleware:
return await self.app(scope, receive, send) return await self.app(scope, receive, send)
def main(): def main(args: Optional[argparse.Namespace] = None):
"""Start the LlamaStack server.""" """Start the LlamaStack server."""
parser = argparse.ArgumentParser(description="Start the LlamaStack server.") parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
parser.add_argument( parser.add_argument(
"--yaml-config", "--yaml-config",
dest="config",
help="(Deprecated) Path to YAML configuration file - use --config instead",
)
parser.add_argument(
"--config",
dest="config",
help="Path to YAML configuration file", help="Path to YAML configuration file",
) )
parser.add_argument( parser.add_argument(
@ -348,7 +354,19 @@ def main():
required="--tls-keyfile" in sys.argv, required="--tls-keyfile" in sys.argv,
) )
args = parser.parse_args() # Determine whether the server args are being passed by the "run" command, if this is the case
# the args will be passed as a Namespace object to the main function, otherwise they will be
# parsed from the command line
if args is None:
args = parser.parse_args()
# Check for deprecated argument usage
if "--yaml-config" in sys.argv:
warnings.warn(
"The '--yaml-config' argument is deprecated and will be removed in a future version. Use '--config' instead.",
DeprecationWarning,
stacklevel=2,
)
if args.env: if args.env:
for env_pair in args.env: for env_pair in args.env:
@ -360,9 +378,9 @@ def main():
logger.error(f"Error: {str(e)}") logger.error(f"Error: {str(e)}")
sys.exit(1) sys.exit(1)
if args.yaml_config: if args.config:
# if the user provided a config file, use it, even if template was specified # if the user provided a config file, use it, even if template was specified
config_file = Path(args.yaml_config) config_file = Path(args.config)
if not config_file.exists(): if not config_file.exists():
raise ValueError(f"Config file {config_file} does not exist") raise ValueError(f"Config file {config_file} does not exist")
logger.info(f"Using config file: {config_file}") logger.info(f"Using config file: {config_file}")