mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
feat(server): Use system packages for execution (#1252)
# What does this PR do? Users prefer to rely on the main CLI rather than invoking the server through a Python module. Users interact with a high-level CLI rather than needing to know internal module structures. Now, when running llama stack run <path-to-config>, the server will attempt to use the system package or a virtual environment if one is active. This also eliminates the current process dependency chain when running from a virtual environment: -> llama stack run -> start_env.sh -> python -m server... Signed-off-by: Sébastien Han <seb@redhat.com> [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan Run: ``` ollama run llama3.2:3b-instruct-fp16 --keepalive=2m & llama stack run ./llama_stack/templates/ollama/run.yaml --disable-ipv6 ``` Notice that the server starts and shutdowns normally. [//]: # (## Documentation) --------- Signed-off-by: Sébastien Han <seb@redhat.com> Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
parent
feacf89548
commit
21e39633d8
2 changed files with 59 additions and 21 deletions
|
@ -56,7 +56,6 @@ class StackRun(Subcommand):
|
||||||
"--env",
|
"--env",
|
||||||
action="append",
|
action="append",
|
||||||
help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.",
|
help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.",
|
||||||
default=[],
|
|
||||||
metavar="KEY=VALUE",
|
metavar="KEY=VALUE",
|
||||||
)
|
)
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
|
@ -74,7 +73,6 @@ class StackRun(Subcommand):
|
||||||
type=str,
|
type=str,
|
||||||
help="Image Type used during the build. This can be either conda or container or venv.",
|
help="Image Type used during the build. This can be either conda or container or venv.",
|
||||||
choices=["conda", "container", "venv"],
|
choices=["conda", "container", "venv"],
|
||||||
default="conda",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
||||||
|
@ -120,20 +118,42 @@ class StackRun(Subcommand):
|
||||||
except AttributeError as e:
|
except AttributeError as e:
|
||||||
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
|
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
|
||||||
|
|
||||||
run_args = formulate_run_args(args.image_type, args.image_name, config, template_name)
|
# If neither image type nor image name is provided, assume the server should be run directly
|
||||||
|
# using the current environment packages.
|
||||||
|
if not args.image_type and not args.image_name:
|
||||||
|
logger.info("No image type or image name provided. Assuming environment packages.")
|
||||||
|
from llama_stack.distribution.server.server import main as server_main
|
||||||
|
|
||||||
run_args.extend([str(config_file), str(args.port)])
|
# Build the server args from the current args passed to the CLI
|
||||||
if args.disable_ipv6:
|
server_args = argparse.Namespace()
|
||||||
run_args.append("--disable-ipv6")
|
for arg in vars(args):
|
||||||
|
# If this is a function, avoid passing it
|
||||||
|
# "args" contains:
|
||||||
|
# func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
|
||||||
|
if callable(getattr(args, arg)):
|
||||||
|
continue
|
||||||
|
setattr(server_args, arg, getattr(args, arg))
|
||||||
|
|
||||||
for env_var in args.env:
|
# Run the server
|
||||||
if "=" not in env_var:
|
server_main(server_args)
|
||||||
self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format")
|
else:
|
||||||
key, value = env_var.split("=", 1) # split on first = only
|
run_args = formulate_run_args(args.image_type, args.image_name, config, template_name)
|
||||||
if not key:
|
|
||||||
self.parser.error(f"Environment variable '{env_var}' has empty key")
|
|
||||||
run_args.extend(["--env", f"{key}={value}"])
|
|
||||||
|
|
||||||
if args.tls_keyfile and args.tls_certfile:
|
run_args.extend([str(config_file), str(args.port)])
|
||||||
run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
|
if args.disable_ipv6:
|
||||||
run_with_pty(run_args)
|
run_args.append("--disable-ipv6")
|
||||||
|
|
||||||
|
if args.env:
|
||||||
|
for env_var in args.env:
|
||||||
|
if "=" not in env_var:
|
||||||
|
self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format")
|
||||||
|
return
|
||||||
|
key, value = env_var.split("=", 1) # split on first = only
|
||||||
|
if not key:
|
||||||
|
self.parser.error(f"Environment variable '{env_var}' has empty key")
|
||||||
|
return
|
||||||
|
run_args.extend(["--env", f"{key}={value}"])
|
||||||
|
|
||||||
|
if args.tls_keyfile and args.tls_certfile:
|
||||||
|
run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
|
||||||
|
run_with_pty(run_args)
|
||||||
|
|
|
@ -17,7 +17,7 @@ import warnings
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from importlib.metadata import version as parse_version
|
from importlib.metadata import version as parse_version
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, List, Union
|
from typing import Any, List, Optional, Union
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
from fastapi import Body, FastAPI, HTTPException, Request
|
from fastapi import Body, FastAPI, HTTPException, Request
|
||||||
|
@ -314,11 +314,17 @@ class ClientVersionMiddleware:
|
||||||
return await self.app(scope, receive, send)
|
return await self.app(scope, receive, send)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main(args: Optional[argparse.Namespace] = None):
|
||||||
"""Start the LlamaStack server."""
|
"""Start the LlamaStack server."""
|
||||||
parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
|
parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--yaml-config",
|
"--yaml-config",
|
||||||
|
dest="config",
|
||||||
|
help="(Deprecated) Path to YAML configuration file - use --config instead",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--config",
|
||||||
|
dest="config",
|
||||||
help="Path to YAML configuration file",
|
help="Path to YAML configuration file",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
@ -348,7 +354,19 @@ def main():
|
||||||
required="--tls-keyfile" in sys.argv,
|
required="--tls-keyfile" in sys.argv,
|
||||||
)
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
# Determine whether the server args are being passed by the "run" command, if this is the case
|
||||||
|
# the args will be passed as a Namespace object to the main function, otherwise they will be
|
||||||
|
# parsed from the command line
|
||||||
|
if args is None:
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Check for deprecated argument usage
|
||||||
|
if "--yaml-config" in sys.argv:
|
||||||
|
warnings.warn(
|
||||||
|
"The '--yaml-config' argument is deprecated and will be removed in a future version. Use '--config' instead.",
|
||||||
|
DeprecationWarning,
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
|
||||||
if args.env:
|
if args.env:
|
||||||
for env_pair in args.env:
|
for env_pair in args.env:
|
||||||
|
@ -360,9 +378,9 @@ def main():
|
||||||
logger.error(f"Error: {str(e)}")
|
logger.error(f"Error: {str(e)}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if args.yaml_config:
|
if args.config:
|
||||||
# if the user provided a config file, use it, even if template was specified
|
# if the user provided a config file, use it, even if template was specified
|
||||||
config_file = Path(args.yaml_config)
|
config_file = Path(args.config)
|
||||||
if not config_file.exists():
|
if not config_file.exists():
|
||||||
raise ValueError(f"Config file {config_file} does not exist")
|
raise ValueError(f"Config file {config_file} does not exist")
|
||||||
logger.info(f"Using config file: {config_file}")
|
logger.info(f"Using config file: {config_file}")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue