Make run yaml optional so dockers can start with just --env (#492)

When running with dockers, the idea is that users be able to work purely
with the `llama stack` CLI. They should not need to know about the
existence of any YAMLs unless they need to. This PR enables it.

The docker command now doesn't need to volume mount a yaml and can
simply be:
```bash
docker run -v ~/.llama/:/root/.llama \
  --env A=a --env B=b
```

## Test Plan

Check with conda first (no regressions):
```bash
LLAMA_STACK_DIR=. llama stack build --template ollama
llama stack run ollama --port 5001

# server starts up correctly
```

Check with docker
```bash
# build the docker
LLAMA_STACK_DIR=. llama stack build --template ollama --image-type docker

export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"

docker run -it  -p 5001:5001 \
  -v ~/.llama:/root/.llama \
  -v $PWD:/app/llama-stack-source \
  localhost/distribution-ollama:dev \
  --port 5001 \
  --env INFERENCE_MODEL=$INFERENCE_MODEL \
  --env OLLAMA_URL=http://host.docker.internal:11434
```

Note that volume mounting to `/app/llama-stack-source` is only needed
because we built the docker with uncommitted source code.
This commit is contained in:
Ashwin Bharambe 2024-11-20 13:11:40 -08:00 committed by GitHub
parent 1d8d0593af
commit 681322731b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 44 additions and 8 deletions

View file

@ -5,9 +5,12 @@
# the root directory of this source tree. # the root directory of this source tree.
import argparse import argparse
from pathlib import Path
from llama_stack.cli.subcommand import Subcommand from llama_stack.cli.subcommand import Subcommand
REPO_ROOT = Path(__file__).parent.parent.parent.parent
class StackRun(Subcommand): class StackRun(Subcommand):
def __init__(self, subparsers: argparse._SubParsersAction): def __init__(self, subparsers: argparse._SubParsersAction):
@ -48,8 +51,6 @@ class StackRun(Subcommand):
) )
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
from pathlib import Path
import pkg_resources import pkg_resources
import yaml import yaml
@ -66,19 +67,27 @@ class StackRun(Subcommand):
return return
config_file = Path(args.config) config_file = Path(args.config)
if not config_file.exists() and not args.config.endswith(".yaml"): has_yaml_suffix = args.config.endswith(".yaml")
if not config_file.exists() and not has_yaml_suffix:
# check if this is a template
config_file = (
Path(REPO_ROOT) / "llama_stack" / "templates" / args.config / "run.yaml"
)
if not config_file.exists() and not has_yaml_suffix:
# check if it's a build config saved to conda dir # check if it's a build config saved to conda dir
config_file = Path( config_file = Path(
BUILDS_BASE_DIR / ImageType.conda.value / f"{args.config}-run.yaml" BUILDS_BASE_DIR / ImageType.conda.value / f"{args.config}-run.yaml"
) )
if not config_file.exists() and not args.config.endswith(".yaml"): if not config_file.exists() and not has_yaml_suffix:
# check if it's a build config saved to docker dir # check if it's a build config saved to docker dir
config_file = Path( config_file = Path(
BUILDS_BASE_DIR / ImageType.docker.value / f"{args.config}-run.yaml" BUILDS_BASE_DIR / ImageType.docker.value / f"{args.config}-run.yaml"
) )
if not config_file.exists() and not args.config.endswith(".yaml"): if not config_file.exists() and not has_yaml_suffix:
# check if it's a build config saved to ~/.llama dir # check if it's a build config saved to ~/.llama dir
config_file = Path( config_file = Path(
DISTRIBS_BASE_DIR DISTRIBS_BASE_DIR
@ -92,6 +101,7 @@ class StackRun(Subcommand):
) )
return return
print(f"Using config file: {config_file}")
config_dict = yaml.safe_load(config_file.read_text()) config_dict = yaml.safe_load(config_file.read_text())
config = parse_and_maybe_upgrade_config(config_dict) config = parse_and_maybe_upgrade_config(config_dict)

View file

@ -122,7 +122,7 @@ add_to_docker <<EOF
# This would be good in production but for debugging flexibility lets not add it right now # This would be good in production but for debugging flexibility lets not add it right now
# We need a more solid production ready entrypoint.sh anyway # We need a more solid production ready entrypoint.sh anyway
# #
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server"] ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "$build_name"]
EOF EOF

View file

@ -16,6 +16,7 @@ import traceback
import warnings import warnings
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from pathlib import Path
from ssl import SSLError from ssl import SSLError
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
@ -49,6 +50,9 @@ from llama_stack.distribution.stack import (
from .endpoints import get_all_api_endpoints from .endpoints import get_all_api_endpoints
REPO_ROOT = Path(__file__).parent.parent.parent.parent
def warn_with_traceback(message, category, filename, lineno, file=None, line=None): def warn_with_traceback(message, category, filename, lineno, file=None, line=None):
log = file if hasattr(file, "write") else sys.stderr log = file if hasattr(file, "write") else sys.stderr
traceback.print_stack(file=log) traceback.print_stack(file=log)
@ -279,9 +283,12 @@ def main():
parser = argparse.ArgumentParser(description="Start the LlamaStack server.") parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
parser.add_argument( parser.add_argument(
"--yaml-config", "--yaml-config",
default="llamastack-run.yaml",
help="Path to YAML configuration file", help="Path to YAML configuration file",
) )
parser.add_argument(
"--template",
help="One of the template names in llama_stack/templates (e.g., tgi, fireworks, remote-vllm, etc.)",
)
parser.add_argument("--port", type=int, default=5000, help="Port to listen on") parser.add_argument("--port", type=int, default=5000, help="Port to listen on")
parser.add_argument( parser.add_argument(
"--disable-ipv6", action="store_true", help="Whether to disable IPv6 support" "--disable-ipv6", action="store_true", help="Whether to disable IPv6 support"
@ -303,10 +310,29 @@ def main():
print(f"Error: {str(e)}") print(f"Error: {str(e)}")
sys.exit(1) sys.exit(1)
with open(args.yaml_config, "r") as fp: if args.yaml_config:
# if the user provided a config file, use it, even if template was specified
config_file = Path(args.yaml_config)
if not config_file.exists():
raise ValueError(f"Config file {config_file} does not exist")
print(f"Using config file: {config_file}")
elif args.template:
config_file = (
Path(REPO_ROOT) / "llama_stack" / "templates" / args.template / "run.yaml"
)
if not config_file.exists():
raise ValueError(f"Template {args.template} does not exist")
print(f"Using template {args.template} config file: {config_file}")
else:
raise ValueError("Either --yaml-config or --template must be provided")
with open(config_file, "r") as fp:
config = replace_env_vars(yaml.safe_load(fp)) config = replace_env_vars(yaml.safe_load(fp))
config = StackRunConfig(**config) config = StackRunConfig(**config)
print("Run configuration:")
print(yaml.dump(config.model_dump(), indent=2))
app = FastAPI() app = FastAPI()
try: try: