llama-stack-mirror/src/llama_stack/cli/stack/run.py
Charlie Doern 30f8921240
fix: generate provider config when using --providers (#4044)
# What does this PR do?

call the sample_run_config method for providers that have it when
generating a run config using `llama stack run --providers`. This will
propagate API keys

resolves #4032


## Test Plan

new unit test checks the output of using `--providers` to ensure
`api_key` is in the config.

manual testing:

```
╰─ llama stack list-deps --providers=inference=remote::openai --format uv | sh
Using Python 3.12.11 environment at: venv
Audited 7 packages in 8ms

╰─ llama stack run --providers=inference=remote::openai
INFO     2025-11-03 14:33:02,094 llama_stack.cli.stack.run:161 cli: Writing generated config to:
         /Users/charliedoern/.llama/distributions/providers-run/run.yaml
INFO     2025-11-03 14:33:02,096 llama_stack.cli.stack.run:169 cli: Using run configuration:
         /Users/charliedoern/.llama/distributions/providers-run/run.yaml
INFO     2025-11-03 14:33:02,099 llama_stack.cli.stack.run:228 cli: HTTPS enabled with certificates:
           Key: None
           Cert: None
INFO     2025-11-03 14:33:02,099 llama_stack.cli.stack.run:230 cli: Listening on 0.0.0.0:8321
INFO     2025-11-03 14:33:02,145 llama_stack.core.server.server:513 core::server: Run configuration:
INFO     2025-11-03 14:33:02,146 llama_stack.core.server.server:516 core::server: apis:
         - inference
         image_name: providers-run
         providers:
           inference:
           - config:
               api_key: '********'
               base_url: https://api.openai.com/v1
             provider_id: openai
             provider_type: remote::openai
         registered_resources:
           benchmarks: []
           datasets: []
           models: []
           scoring_fns: []
           shields: []
           tool_groups: []
           vector_stores: []
         server:
           port: 8321
           workers: 1
         storage:
           backends:
             kv_default:
               db_path: /Users/charliedoern/.llama/distributions/providers-run/kvstore.db
               type: kv_sqlite
             sql_default:
               db_path: /Users/charliedoern/.llama/distributions/providers-run/sql_store.db
               type: sql_sqlite
           stores:
             conversations:
               backend: sql_default
               table_name: openai_conversations
             inference:
               backend: sql_default
               max_write_queue_size: 10000
               num_writers: 4
               table_name: inference_store
             metadata:
               backend: kv_default
               namespace: registry
             prompts:
               backend: kv_default
               namespace: prompts
         telemetry:
           enabled: false
         version: 2

INFO     2025-11-03 14:33:02,299 llama_stack.providers.utils.inference.inference_store:74 inference: Write queue
         disabled for SQLite to avoid concurrency issues
INFO     2025-11-03 14:33:05,272 llama_stack.providers.utils.inference.openai_mixin:439 providers::utils:
         OpenAIInferenceAdapter.list_provider_model_ids() returned 105 models
INFO     2025-11-03 14:33:05,368 uvicorn.error:84 uncategorized: Started server process [69109]
INFO     2025-11-03 14:33:05,369 uvicorn.error:48 uncategorized: Waiting for application startup.
INFO     2025-11-03 14:33:05,370 llama_stack.core.server.server:172 core::server: Starting up Llama Stack server
         (version: 0.3.0)
INFO     2025-11-03 14:33:05,370 llama_stack.core.stack:495 core: starting registry refresh task
INFO     2025-11-03 14:33:05,370 uvicorn.error:62 uncategorized: Application startup complete.
INFO     2025-11-03 14:33:05,371 uvicorn.error:216 uncategorized: Uvicorn running on http://0.0.0.0:8321 (Press CTRL+C
         to quit)
INFO     2025-11-03 14:34:19,242 uvicorn.access:473 uncategorized: 127.0.0.1:63102 - "POST /v1/chat/completions
         HTTP/1.1" 200
```

client:

```
curl http://localhost:8321/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
 "model": "openai/gpt-5",
 "messages": [
     {"role": "user", "content": "What is 1 + 2"}
 ]
}'
{"id":"...","choices":[{"finish_reason":"stop","index":0,"logprobs":null,"message":{"content":"3","refusal":null,"role":"assistant","annotations":[],"audio":null,"function_call":null,"tool_calls":null}}],"created":1762198455,"model":"openai/gpt-5","object":"chat.completion","service_tier":"default","system_fingerprint":null,"usage":{"completion_tokens":10,"prompt_tokens":13,"total_tokens":23,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0}}}%
```

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-11-03 11:37:58 -08:00

326 lines
14 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import os
import ssl
import subprocess
import sys
from pathlib import Path
import uvicorn
import yaml
from termcolor import cprint
from llama_stack.cli.stack.utils import ImageType
from llama_stack.cli.subcommand import Subcommand
from llama_stack.core.datatypes import Api, Provider, StackRunConfig
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig,
SqliteSqlStoreConfig,
SqlStoreReference,
StorageConfig,
)
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.log import LoggingConfig, get_logger
REPO_ROOT = Path(__file__).parent.parent.parent.parent
logger = get_logger(name=__name__, category="cli")
class StackRun(Subcommand):
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"run",
prog="llama stack run",
description="""Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.""",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_stack_run_cmd)
def _add_arguments(self):
self.parser.add_argument(
"config",
type=str,
nargs="?", # Make it optional
metavar="config | distro",
help="Path to config file to use for the run or name of known distro (`llama stack list` for a list).",
)
self.parser.add_argument(
"--port",
type=int,
help="Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT.",
default=int(os.getenv("LLAMA_STACK_PORT", 8321)),
)
self.parser.add_argument(
"--image-name",
type=str,
default=None,
help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
)
self.parser.add_argument(
"--image-type",
type=str,
help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
)
self.parser.add_argument(
"--enable-ui",
action="store_true",
help="Start the UI server",
)
self.parser.add_argument(
"--providers",
type=str,
default=None,
help="Run a stack with only a list of providers. This list is formatted like: api1=provider1,api1=provider2,api2=provider3. Where there can be multiple providers per API.",
)
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
import yaml
from llama_stack.core.configure import parse_and_maybe_upgrade_config
if args.image_type or args.image_name:
self.parser.error(
"The --image-type and --image-name flags are no longer supported.\n\n"
"Please activate your virtual environment manually before running `llama stack run`.\n\n"
"For example:\n"
" source /path/to/venv/bin/activate\n"
" llama stack run <config>\n"
)
if args.enable_ui:
self._start_ui_development_server(args.port)
if args.config:
try:
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
config_file = resolve_config_or_distro(args.config, Mode.RUN)
except ValueError as e:
self.parser.error(str(e))
elif args.providers:
provider_list: dict[str, list[Provider]] = dict()
for api_provider in args.providers.split(","):
if "=" not in api_provider:
cprint(
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
color="red",
file=sys.stderr,
)
sys.exit(1)
api, provider_type = api_provider.split("=")
providers_for_api = get_provider_registry().get(Api(api), None)
if providers_for_api is None:
cprint(
f"{api} is not a valid API.",
color="red",
file=sys.stderr,
)
sys.exit(1)
if provider_type in providers_for_api:
config_type = instantiate_class_type(providers_for_api[provider_type].config_class)
if config_type is not None and hasattr(config_type, "sample_run_config"):
config = config_type.sample_run_config(__distro_dir__="~/.llama/distributions/providers-run")
else:
config = {}
provider = Provider(
provider_type=provider_type,
config=config,
provider_id=provider_type.split("::")[1],
)
provider_list.setdefault(api, []).append(provider)
else:
cprint(
f"{provider} is not a valid provider for the {api} API.",
color="red",
file=sys.stderr,
)
sys.exit(1)
run_config = self._generate_run_config_from_providers(providers=provider_list)
config_dict = run_config.model_dump(mode="json")
# Write config to disk in providers-run directory
distro_dir = DISTRIBS_BASE_DIR / "providers-run"
config_file = distro_dir / "run.yaml"
logger.info(f"Writing generated config to: {config_file}")
with open(config_file, "w") as f:
yaml.dump(config_dict, f, default_flow_style=False, sort_keys=False)
else:
config_file = None
if config_file:
logger.info(f"Using run configuration: {config_file}")
try:
config_dict = yaml.safe_load(config_file.read_text())
except yaml.parser.ParserError as e:
self.parser.error(f"failed to load config file '{config_file}':\n {e}")
try:
config = parse_and_maybe_upgrade_config(config_dict)
# Create external_providers_dir if it's specified and doesn't exist
if config.external_providers_dir and not os.path.exists(str(config.external_providers_dir)):
os.makedirs(str(config.external_providers_dir), exist_ok=True)
except AttributeError as e:
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
self._uvicorn_run(config_file, args)
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
if not config_file:
self.parser.error("Config file is required")
config_file = resolve_config_or_distro(str(config_file), Mode.RUN)
with open(config_file) as fp:
config_contents = yaml.safe_load(fp)
if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")):
logger_config = LoggingConfig(**cfg)
else:
logger_config = None
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
port = args.port or config.server.port
host = config.server.host or "0.0.0.0"
# Set the config file in environment so create_app can find it
os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
uvicorn_config = {
"factory": True,
"host": host,
"port": port,
"lifespan": "on",
"log_level": logger.getEffectiveLevel(),
"log_config": logger_config,
"workers": config.server.workers,
}
keyfile = config.server.tls_keyfile
certfile = config.server.tls_certfile
if keyfile and certfile:
uvicorn_config["ssl_keyfile"] = config.server.tls_keyfile
uvicorn_config["ssl_certfile"] = config.server.tls_certfile
if config.server.tls_cafile:
uvicorn_config["ssl_ca_certs"] = config.server.tls_cafile
uvicorn_config["ssl_cert_reqs"] = ssl.CERT_REQUIRED
logger.info(
f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}\n CA: {config.server.tls_cafile}"
)
else:
logger.info(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
logger.info(f"Listening on {host}:{port}")
# We need to catch KeyboardInterrupt because uvicorn's signal handling
# re-raises SIGINT signals using signal.raise_signal(), which Python
# converts to KeyboardInterrupt. Without this catch, we'd get a confusing
# stack trace when using Ctrl+C or kill -2 (SIGINT).
# SIGTERM (kill -15) works fine without this because Python doesn't
# have a default handler for it.
#
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
# signal handling but this is quite intrusive and not worth the effort.
try:
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) # type: ignore[arg-type]
except (KeyboardInterrupt, SystemExit):
logger.info("Received interrupt signal, shutting down gracefully...")
def _start_ui_development_server(self, stack_server_port: int):
logger.info("Attempting to start UI development server...")
# Check if npm is available
npm_check = subprocess.run(["npm", "--version"], capture_output=True, text=True, check=False)
if npm_check.returncode != 0:
logger.warning(
f"'npm' command not found or not executable. UI development server will not be started. Error: {npm_check.stderr}"
)
return
ui_dir = REPO_ROOT / "llama_stack" / "ui"
logs_dir = Path("~/.llama/ui/logs").expanduser()
try:
# Create logs directory if it doesn't exist
logs_dir.mkdir(parents=True, exist_ok=True)
ui_stdout_log_path = logs_dir / "stdout.log"
ui_stderr_log_path = logs_dir / "stderr.log"
# Open log files in append mode
stdout_log_file = open(ui_stdout_log_path, "a")
stderr_log_file = open(ui_stderr_log_path, "a")
process = subprocess.Popen(
["npm", "run", "dev"],
cwd=str(ui_dir),
stdout=stdout_log_file,
stderr=stderr_log_file,
env={**os.environ, "NEXT_PUBLIC_LLAMA_STACK_BASE_URL": f"http://localhost:{stack_server_port}"},
)
logger.info(f"UI development server process started in {ui_dir} with PID {process.pid}.")
logger.info(f"Logs: stdout -> {ui_stdout_log_path}, stderr -> {ui_stderr_log_path}")
logger.info(f"UI will be available at http://localhost:{os.getenv('LLAMA_STACK_UI_PORT', 8322)}")
except FileNotFoundError:
logger.error(
"Failed to start UI development server: 'npm' command not found. Make sure npm is installed and in your PATH."
)
except Exception as e:
logger.error(f"Failed to start UI development server in {ui_dir}: {e}")
def _generate_run_config_from_providers(self, providers: dict[str, list[Provider]]):
apis = list(providers.keys())
distro_dir = DISTRIBS_BASE_DIR / "providers-run"
# need somewhere to put the storage.
os.makedirs(distro_dir, exist_ok=True)
storage = StorageConfig(
backends={
"kv_default": SqliteKVStoreConfig(
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
),
"sql_default": SqliteSqlStoreConfig(
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
),
},
stores=ServerStoresConfig(
metadata=KVStoreReference(
backend="kv_default",
namespace="registry",
),
inference=InferenceStoreReference(
backend="sql_default",
table_name="inference_store",
),
conversations=SqlStoreReference(
backend="sql_default",
table_name="openai_conversations",
),
prompts=KVStoreReference(
backend="kv_default",
namespace="prompts",
),
),
)
return StackRunConfig(
image_name="providers-run",
apis=apis,
providers=providers,
storage=storage,
)