From 30f8921240dbcd6d8ac1f865bb68583410fac61e Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Mon, 3 Nov 2025 14:37:58 -0500 Subject: [PATCH] fix: generate provider config when using --providers (#4044) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? call the sample_run_config method for providers that have it when generating a run config using `llama stack run --providers`. This will propagate API keys resolves #4032 ## Test Plan new unit test checks the output of using `--providers` to ensure `api_key` is in the config. manual testing: ``` ╰─ llama stack list-deps --providers=inference=remote::openai --format uv | sh Using Python 3.12.11 environment at: venv Audited 7 packages in 8ms ╰─ llama stack run --providers=inference=remote::openai INFO 2025-11-03 14:33:02,094 llama_stack.cli.stack.run:161 cli: Writing generated config to: /Users/charliedoern/.llama/distributions/providers-run/run.yaml INFO 2025-11-03 14:33:02,096 llama_stack.cli.stack.run:169 cli: Using run configuration: /Users/charliedoern/.llama/distributions/providers-run/run.yaml INFO 2025-11-03 14:33:02,099 llama_stack.cli.stack.run:228 cli: HTTPS enabled with certificates: Key: None Cert: None INFO 2025-11-03 14:33:02,099 llama_stack.cli.stack.run:230 cli: Listening on 0.0.0.0:8321 INFO 2025-11-03 14:33:02,145 llama_stack.core.server.server:513 core::server: Run configuration: INFO 2025-11-03 14:33:02,146 llama_stack.core.server.server:516 core::server: apis: - inference image_name: providers-run providers: inference: - config: api_key: '********' base_url: https://api.openai.com/v1 provider_id: openai provider_type: remote::openai registered_resources: benchmarks: [] datasets: [] models: [] scoring_fns: [] shields: [] tool_groups: [] vector_stores: [] server: port: 8321 workers: 1 storage: backends: kv_default: db_path: /Users/charliedoern/.llama/distributions/providers-run/kvstore.db type: kv_sqlite sql_default: db_path: /Users/charliedoern/.llama/distributions/providers-run/sql_store.db type: sql_sqlite stores: conversations: backend: sql_default table_name: openai_conversations inference: backend: sql_default max_write_queue_size: 10000 num_writers: 4 table_name: inference_store metadata: backend: kv_default namespace: registry prompts: backend: kv_default namespace: prompts telemetry: enabled: false version: 2 INFO 2025-11-03 14:33:02,299 llama_stack.providers.utils.inference.inference_store:74 inference: Write queue disabled for SQLite to avoid concurrency issues INFO 2025-11-03 14:33:05,272 llama_stack.providers.utils.inference.openai_mixin:439 providers::utils: OpenAIInferenceAdapter.list_provider_model_ids() returned 105 models INFO 2025-11-03 14:33:05,368 uvicorn.error:84 uncategorized: Started server process [69109] INFO 2025-11-03 14:33:05,369 uvicorn.error:48 uncategorized: Waiting for application startup. INFO 2025-11-03 14:33:05,370 llama_stack.core.server.server:172 core::server: Starting up Llama Stack server (version: 0.3.0) INFO 2025-11-03 14:33:05,370 llama_stack.core.stack:495 core: starting registry refresh task INFO 2025-11-03 14:33:05,370 uvicorn.error:62 uncategorized: Application startup complete. INFO 2025-11-03 14:33:05,371 uvicorn.error:216 uncategorized: Uvicorn running on http://0.0.0.0:8321 (Press CTRL+C to quit) INFO 2025-11-03 14:34:19,242 uvicorn.access:473 uncategorized: 127.0.0.1:63102 - "POST /v1/chat/completions HTTP/1.1" 200 ``` client: ``` curl http://localhost:8321/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "openai/gpt-5", "messages": [ {"role": "user", "content": "What is 1 + 2"} ] }' {"id":"...","choices":[{"finish_reason":"stop","index":0,"logprobs":null,"message":{"content":"3","refusal":null,"role":"assistant","annotations":[],"audio":null,"function_call":null,"tool_calls":null}}],"created":1762198455,"model":"openai/gpt-5","object":"chat.completion","service_tier":"default","system_fingerprint":null,"usage":{"completion_tokens":10,"prompt_tokens":13,"total_tokens":23,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0}}}% ``` --------- Signed-off-by: Charlie Doern Co-authored-by: Ashwin Bharambe --- src/llama_stack/cli/stack/run.py | 7 +++++ tests/unit/cli/test_stack_config.py | 47 +++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/src/llama_stack/cli/stack/run.py b/src/llama_stack/cli/stack/run.py index ae35664af..9ceb238fa 100644 --- a/src/llama_stack/cli/stack/run.py +++ b/src/llama_stack/cli/stack/run.py @@ -31,6 +31,7 @@ from llama_stack.core.storage.datatypes import ( ) from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro +from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import LoggingConfig, get_logger REPO_ROOT = Path(__file__).parent.parent.parent.parent @@ -132,8 +133,14 @@ class StackRun(Subcommand): ) sys.exit(1) if provider_type in providers_for_api: + config_type = instantiate_class_type(providers_for_api[provider_type].config_class) + if config_type is not None and hasattr(config_type, "sample_run_config"): + config = config_type.sample_run_config(__distro_dir__="~/.llama/distributions/providers-run") + else: + config = {} provider = Provider( provider_type=provider_type, + config=config, provider_id=provider_type.split("::")[1], ) provider_list.setdefault(api, []).append(provider) diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py index 5270b8614..6aefac003 100644 --- a/tests/unit/cli/test_stack_config.py +++ b/tests/unit/cli/test_stack_config.py @@ -268,3 +268,50 @@ def test_generate_run_config_from_providers(): # Verify config can be parsed back parsed = parse_and_maybe_upgrade_config(config_dict) assert parsed.image_name == "providers-run" + + +def test_providers_flag_generates_config_with_api_keys(): + """Test that --providers flag properly generates provider configs including API keys. + + This tests the fix where sample_run_config() is called to populate + API keys and other credentials for remote providers like remote::openai. + """ + import argparse + from unittest.mock import patch + + from llama_stack.cli.stack.run import StackRun + + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers() + stack_run = StackRun(subparsers) + + # Create args with --providers flag set + args = argparse.Namespace( + providers="inference=remote::openai", + config=None, + port=8321, + image_type=None, + image_name=None, + enable_ui=False, + ) + + # Mock _uvicorn_run to prevent starting a server + with patch.object(stack_run, "_uvicorn_run"): + stack_run._run_stack_run_cmd(args) + + # Read the generated config file + from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR + + config_file = DISTRIBS_BASE_DIR / "providers-run" / "run.yaml" + with open(config_file) as f: + config_dict = yaml.safe_load(f) + + # Verify the provider has config with API keys + inference_providers = config_dict["providers"]["inference"] + assert len(inference_providers) == 1 + + openai_provider = inference_providers[0] + assert openai_provider["provider_type"] == "remote::openai" + assert openai_provider["config"], "Provider config should not be empty" + assert "api_key" in openai_provider["config"], "API key should be in provider config" + assert "base_url" in openai_provider["config"], "Base URL should be in provider config"