From 6ffaae62f88a330f2d56c98d30f388d198fa2577 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Tue, 18 Nov 2025 15:32:44 -0500 Subject: [PATCH 01/14] feat: remove usage of build yaml for list-deps the build.yaml is only used in the following ways: 1. list-deps 2. distribution code-gen since `llama stack build` no longer exists, I found myself asking "why do we need two different files for list-deps and run"? Removing the BuildConfig and DistributionTemplate from llama stack list-deps is the first step in removing the build yaml entirely. Removing the BuildConfig and build.yaml cuts the files users need to maintain in half, and allows us to focus on the stability of _just_ the run.yaml The build.yaml made sense for when we were managing the build process for the user and actually _producing_ a run.yaml _from_ the build.yaml, but now that we are simply just getting the provider registry and listing the deps, switching to run.yaml simplifies the scope here greatly Signed-off-by: Charlie Doern --- .github/workflows/providers-list-deps.yml | 2 +- .github/workflows/test-external.yml | 4 +-- .../external/external-providers-guide.mdx | 25 +++++++-------- src/llama_stack/cli/stack/_list_deps.py | 31 ++++++------------- src/llama_stack/core/build.py | 6 ++-- src/llama_stack/core/datatypes.py | 6 ++++ 6 files changed, 34 insertions(+), 40 deletions(-) diff --git a/.github/workflows/providers-list-deps.yml b/.github/workflows/providers-list-deps.yml index 88659dbe3..a2e8a87c9 100644 --- a/.github/workflows/providers-list-deps.yml +++ b/.github/workflows/providers-list-deps.yml @@ -102,4 +102,4 @@ jobs: USE_COPY_NOT_MOUNT: "true" LLAMA_STACK_DIR: "." run: | - uv run llama stack list-deps src/llama_stack/distributions/ci-tests/build.yaml + uv run llama stack list-deps src/llama_stack/distributions/ci-tests/run.yaml diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml index a99719718..fed3967ee 100644 --- a/.github/workflows/test-external.yml +++ b/.github/workflows/test-external.yml @@ -44,14 +44,14 @@ jobs: - name: Print distro dependencies run: | - uv run --no-sync llama stack list-deps tests/external/build.yaml + uv run --no-sync llama stack list-deps tests/external/run-byoa.yaml - name: Build distro from config file run: | uv venv ci-test source ci-test/bin/activate uv pip install -e . - LLAMA_STACK_LOGGING=all=CRITICAL llama stack list-deps tests/external/build.yaml | xargs -L1 uv pip install + LLAMA_STACK_LOGGING=all=CRITICAL llama stack list-deps tests/external/run-byoa.yaml | xargs -L1 uv pip install - name: Start Llama Stack server in background if: ${{ matrix.image-type }} == 'venv' diff --git a/docs/docs/providers/external/external-providers-guide.mdx b/docs/docs/providers/external/external-providers-guide.mdx index dc813c75b..3d013f720 100644 --- a/docs/docs/providers/external/external-providers-guide.mdx +++ b/docs/docs/providers/external/external-providers-guide.mdx @@ -222,22 +222,21 @@ def get_provider_spec() -> ProviderSpec: [ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module. -To install Llama Stack with this external provider a user can provider the following build.yaml: +To install Llama Stack with this external provider a user can provider the following run.yaml: ```yaml version: 2 -distribution_spec: - description: Use (an external) Ramalama server for running LLM inference - container_image: null - providers: - inference: - - provider_type: remote::ramalama - module: ramalama_stack==0.3.0a0 -image_type: venv -image_name: null -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] +image_name: ramalama +apis: +- inference +providers: + inference: + - provider_id: ramalama + provider_type: remote::ramalama + module: ramalama_stack==0.3.0a0 + config: {} +server: + port: 8321 ``` No other steps are required beyond installing dependencies with `llama stack list-deps | xargs -L1 uv pip install` and then running `llama stack run`. The CLI will use `module` to install the provider dependencies, retrieve the spec, etc. diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py index 82bef1a4f..f11ce486a 100644 --- a/src/llama_stack/cli/stack/_list_deps.py +++ b/src/llama_stack/cli/stack/_list_deps.py @@ -11,15 +11,9 @@ from pathlib import Path import yaml from termcolor import cprint -from llama_stack.cli.stack.utils import ImageType from llama_stack.core.build import get_provider_dependencies -from llama_stack.core.datatypes import ( - BuildConfig, - BuildProvider, - DistributionSpec, -) +from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.core.distribution import get_provider_registry -from llama_stack.core.stack import replace_env_vars from llama_stack.log import get_logger from llama_stack_api import Api @@ -72,7 +66,7 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None: try: from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro - config_file = resolve_config_or_distro(args.config, Mode.BUILD) + config_file = resolve_config_or_distro(args.config, Mode.RUN) except ValueError as e: cprint( f"Could not parse config file {args.config}: {e}", @@ -84,9 +78,7 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None: with open(config_file) as f: try: contents = yaml.safe_load(f) - contents = replace_env_vars(contents) - build_config = BuildConfig(**contents) - build_config.image_type = "venv" + run_config = StackRunConfig(**contents) except Exception as e: cprint( f"Could not parse config file {config_file}: {e}", @@ -95,7 +87,7 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None: ) sys.exit(1) elif args.providers: - provider_list: dict[str, list[BuildProvider]] = dict() + provider_list: dict[str, list[Provider]] = dict() for api_provider in args.providers.split(","): if "=" not in api_provider: cprint( @@ -114,8 +106,9 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None: ) sys.exit(1) if provider_type in providers_for_api: - provider = BuildProvider( + provider = Provider( provider_type=provider_type, + provider_id=provider_type.split("::")[1], module=None, ) provider_list.setdefault(api, []).append(provider) @@ -126,20 +119,16 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None: file=sys.stderr, ) sys.exit(1) - distribution_spec = DistributionSpec( - providers=provider_list, - description=",".join(args.providers), - ) - build_config = BuildConfig(image_type=ImageType.VENV.value, distribution_spec=distribution_spec) + run_config = StackRunConfig(providers=provider_list, image_name="providers-run") - normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config) + normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(run_config) normal_deps += SERVER_DEPENDENCIES # Add external API dependencies - if build_config.external_apis_dir: + if run_config.external_apis_dir: from llama_stack.core.external import load_external_apis - external_apis = load_external_apis(build_config) + external_apis = load_external_apis(run_config) if external_apis: for _, api_spec in external_apis.items(): normal_deps.extend(api_spec.pip_packages) diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py index 630b2a47f..757763bd1 100644 --- a/src/llama_stack/core/build.py +++ b/src/llama_stack/core/build.py @@ -9,7 +9,7 @@ import sys from pydantic import BaseModel from termcolor import cprint -from llama_stack.core.datatypes import BuildConfig +from llama_stack.core.datatypes import BuildConfig, StackRunConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.distributions.template import DistributionTemplate from llama_stack.log import get_logger @@ -36,13 +36,13 @@ class ApiInput(BaseModel): def get_provider_dependencies( - config: BuildConfig | DistributionTemplate, + config: StackRunConfig, ) -> tuple[list[str], list[str], list[str]]: """Get normal and special dependencies from provider configuration.""" if isinstance(config, DistributionTemplate): config = config.build_config() - providers = config.distribution_spec.providers + providers = config.providers additional_pip_packages = config.additional_pip_packages deps = [] diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index 1e29690ff..245491332 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -517,6 +517,7 @@ can be instantiated multiple times (with different configs) if necessary. """, ) storage: StorageConfig = Field( + default_factory=StorageConfig, description="Catalog of named storage backends and references available to the stack", ) @@ -534,6 +535,11 @@ can be instantiated multiple times (with different configs) if necessary. description="Configuration for the HTTP(S) server", ) + additional_pip_packages: list[str] = Field( + default_factory=list, + description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.", + ) + external_providers_dir: Path | None = Field( default=None, description="Path to directory containing external provider implementations. The providers code and dependencies must be installed on the system.", From 4311641fd6bda9988035a068648a12a9bc2e6813 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Wed, 19 Nov 2025 16:16:00 -0500 Subject: [PATCH 02/14] chore: remove additional_pip_packages all of the additional pip packages are already in `llama-stack`'s pyproject except for psycopg2-binary (which I added), so they are unnecessary. This also allows me to get rid of the additional_pip_packages field Signed-off-by: Charlie Doern --- pyproject.toml | 1 + src/llama_stack/core/build.py | 3 -- src/llama_stack/core/datatypes.py | 9 ------ .../distributions/ci-tests/build.yaml | 5 ---- src/llama_stack/distributions/dell/build.yaml | 3 -- .../meta-reference-gpu/build.yaml | 3 -- .../distributions/nvidia/build.yaml | 3 -- src/llama_stack/distributions/oci/build.yaml | 3 -- .../distributions/open-benchmark/build.yaml | 3 -- .../distributions/starter-gpu/build.yaml | 5 ---- .../distributions/starter/build.yaml | 5 ---- .../distributions/starter/starter.py | 1 - src/llama_stack/distributions/template.py | 29 ------------------- .../distributions/watsonx/build.yaml | 3 -- tests/external/ramalama-stack/build.yaml | 3 -- uv.lock | 2 ++ 16 files changed, 3 insertions(+), 78 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3e16dc08f..688318dd1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ dependencies = [ "asyncpg", # for metadata store "sqlalchemy[asyncio]>=2.0.41", # server - for conversations "starlette>=0.49.1", + "psycopg2-binary", ] [project.optional-dependencies] diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py index 757763bd1..ef64f2036 100644 --- a/src/llama_stack/core/build.py +++ b/src/llama_stack/core/build.py @@ -43,7 +43,6 @@ def get_provider_dependencies( config = config.build_config() providers = config.providers - additional_pip_packages = config.additional_pip_packages deps = [] external_provider_deps = [] @@ -81,8 +80,6 @@ def get_provider_dependencies( else: normal_deps.append(package) - normal_deps.extend(additional_pip_packages or []) - return list(set(normal_deps)), list(set(special_deps)), list(set(external_provider_deps)) diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index 245491332..94d235678 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -535,11 +535,6 @@ can be instantiated multiple times (with different configs) if necessary. description="Configuration for the HTTP(S) server", ) - additional_pip_packages: list[str] = Field( - default_factory=list, - description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.", - ) - external_providers_dir: Path | None = Field( default=None, description="Path to directory containing external provider implementations. The providers code and dependencies must be installed on the system.", @@ -631,10 +626,6 @@ class BuildConfig(BaseModel): description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. " "pip_packages MUST contain the provider package name.", ) - additional_pip_packages: list[str] = Field( - default_factory=list, - description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.", - ) external_apis_dir: Path | None = Field( default=None, description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.", diff --git a/src/llama_stack/distributions/ci-tests/build.yaml b/src/llama_stack/distributions/ci-tests/build.yaml index f29ac7712..a4a9b6354 100644 --- a/src/llama_stack/distributions/ci-tests/build.yaml +++ b/src/llama_stack/distributions/ci-tests/build.yaml @@ -53,8 +53,3 @@ distribution_spec: batches: - provider_type: inline::reference image_type: venv -additional_pip_packages: -- aiosqlite -- asyncpg -- psycopg2-binary -- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/dell/build.yaml b/src/llama_stack/distributions/dell/build.yaml index 7bc26ca9e..2c809e0fe 100644 --- a/src/llama_stack/distributions/dell/build.yaml +++ b/src/llama_stack/distributions/dell/build.yaml @@ -28,6 +28,3 @@ distribution_spec: - provider_type: remote::tavily-search - provider_type: inline::rag-runtime image_type: venv -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/meta-reference-gpu/build.yaml b/src/llama_stack/distributions/meta-reference-gpu/build.yaml index 1513742a7..62a845b83 100644 --- a/src/llama_stack/distributions/meta-reference-gpu/build.yaml +++ b/src/llama_stack/distributions/meta-reference-gpu/build.yaml @@ -27,6 +27,3 @@ distribution_spec: - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol image_type: venv -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/nvidia/build.yaml b/src/llama_stack/distributions/nvidia/build.yaml index 8ddd12439..86e085575 100644 --- a/src/llama_stack/distributions/nvidia/build.yaml +++ b/src/llama_stack/distributions/nvidia/build.yaml @@ -24,6 +24,3 @@ distribution_spec: files: - provider_type: inline::localfs image_type: venv -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/oci/build.yaml b/src/llama_stack/distributions/oci/build.yaml index 7e082e1f6..ac9fe967d 100644 --- a/src/llama_stack/distributions/oci/build.yaml +++ b/src/llama_stack/distributions/oci/build.yaml @@ -30,6 +30,3 @@ distribution_spec: files: - provider_type: inline::localfs image_type: venv -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/open-benchmark/build.yaml b/src/llama_stack/distributions/open-benchmark/build.yaml index 05acd98e3..9613ef60d 100644 --- a/src/llama_stack/distributions/open-benchmark/build.yaml +++ b/src/llama_stack/distributions/open-benchmark/build.yaml @@ -31,6 +31,3 @@ distribution_spec: - provider_type: inline::rag-runtime - provider_type: remote::model-context-protocol image_type: venv -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/starter-gpu/build.yaml b/src/llama_stack/distributions/starter-gpu/build.yaml index 10cbb1389..57ba12dc4 100644 --- a/src/llama_stack/distributions/starter-gpu/build.yaml +++ b/src/llama_stack/distributions/starter-gpu/build.yaml @@ -54,8 +54,3 @@ distribution_spec: batches: - provider_type: inline::reference image_type: venv -additional_pip_packages: -- aiosqlite -- asyncpg -- psycopg2-binary -- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/starter/build.yaml b/src/llama_stack/distributions/starter/build.yaml index acd51f773..9fe7ccd14 100644 --- a/src/llama_stack/distributions/starter/build.yaml +++ b/src/llama_stack/distributions/starter/build.yaml @@ -54,8 +54,3 @@ distribution_spec: batches: - provider_type: inline::reference image_type: venv -additional_pip_packages: -- aiosqlite -- asyncpg -- psycopg2-binary -- sqlalchemy[asyncio] diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index 32264eebb..8a0efdf1f 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -275,7 +275,6 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())), run_configs={ "run.yaml": base_run_settings, "run-with-postgres-store.yaml": postgres_run_settings, diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py index 90b458805..a8e5f2839 100644 --- a/src/llama_stack/distributions/template.py +++ b/src/llama_stack/distributions/template.py @@ -36,9 +36,7 @@ from llama_stack.core.storage.datatypes import ( StorageBackendType, ) from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig -from llama_stack.core.storage.kvstore.config import get_pip_packages as get_kv_pip_packages from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig -from llama_stack.core.storage.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry @@ -322,33 +320,7 @@ class DistributionTemplate(BaseModel): available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None - # we may want to specify additional pip packages without necessarily indicating a - # specific "default" inference store (which is what typically used to dictate additional - # pip packages) - additional_pip_packages: list[str] | None = None - def build_config(self) -> BuildConfig: - additional_pip_packages: list[str] = [] - for run_config in self.run_configs.values(): - run_config_ = run_config.run_config(self.name, self.providers, self.container_image) - - # TODO: This is a hack to get the dependencies for internal APIs into build - # We should have a better way to do this by formalizing the concept of "internal" APIs - # and providers, with a way to specify dependencies for them. - - storage_cfg = run_config_.get("storage", {}) - for backend_cfg in storage_cfg.get("backends", {}).values(): - store_type = backend_cfg.get("type") - if not store_type: - continue - if str(store_type).startswith("kv_"): - additional_pip_packages.extend(get_kv_pip_packages(backend_cfg)) - elif str(store_type).startswith("sql_"): - additional_pip_packages.extend(get_sql_pip_packages(backend_cfg)) - - if self.additional_pip_packages: - additional_pip_packages.extend(self.additional_pip_packages) - # Create minimal providers for build config (without runtime configs) build_providers = {} for api, providers in self.providers.items(): @@ -368,7 +340,6 @@ class DistributionTemplate(BaseModel): providers=build_providers, ), image_type=LlamaStackImageType.VENV.value, # default to venv - additional_pip_packages=sorted(set(additional_pip_packages)), ) def generate_markdown_docs(self) -> str: diff --git a/src/llama_stack/distributions/watsonx/build.yaml b/src/llama_stack/distributions/watsonx/build.yaml index dba1a94e2..8fc27c8c8 100644 --- a/src/llama_stack/distributions/watsonx/build.yaml +++ b/src/llama_stack/distributions/watsonx/build.yaml @@ -28,6 +28,3 @@ distribution_spec: files: - provider_type: inline::localfs image_type: venv -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/tests/external/ramalama-stack/build.yaml b/tests/external/ramalama-stack/build.yaml index 150edb4fb..4403339f1 100644 --- a/tests/external/ramalama-stack/build.yaml +++ b/tests/external/ramalama-stack/build.yaml @@ -8,6 +8,3 @@ distribution_spec: module: ramalama_stack==0.3.0a0 image_type: venv image_name: ramalama-stack-test -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/uv.lock b/uv.lock index 8c648c362..a3ca9881c 100644 --- a/uv.lock +++ b/uv.lock @@ -2008,6 +2008,7 @@ dependencies = [ { name = "opentelemetry-sdk" }, { name = "pillow" }, { name = "prompt-toolkit" }, + { name = "psycopg2-binary" }, { name = "pydantic" }, { name = "pyjwt", extra = ["crypto"] }, { name = "python-dotenv" }, @@ -2160,6 +2161,7 @@ requires-dist = [ { name = "opentelemetry-sdk", specifier = ">=1.30.0" }, { name = "pillow" }, { name = "prompt-toolkit" }, + { name = "psycopg2-binary" }, { name = "pydantic", specifier = ">=2.11.9" }, { name = "pyjwt", extras = ["crypto"], specifier = ">=2.10.0" }, { name = "python-dotenv" }, From 17f8ab31b5460c80d7ab405550eaefb77a009d66 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Fri, 21 Nov 2025 12:07:52 -0500 Subject: [PATCH 03/14] fix: library client usage of BuildConfig Signed-off-by: Charlie Doern --- src/llama_stack/core/build.py | 4 ++-- src/llama_stack/core/library_client.py | 16 +--------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py index ef64f2036..630916db4 100644 --- a/src/llama_stack/core/build.py +++ b/src/llama_stack/core/build.py @@ -9,7 +9,7 @@ import sys from pydantic import BaseModel from termcolor import cprint -from llama_stack.core.datatypes import BuildConfig, StackRunConfig +from llama_stack.core.datatypes import StackRunConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.distributions.template import DistributionTemplate from llama_stack.log import get_logger @@ -83,7 +83,7 @@ def get_provider_dependencies( return list(set(normal_deps)), list(set(special_deps)), list(set(external_provider_deps)) -def print_pip_install_help(config: BuildConfig): +def print_pip_install_help(config: StackRunConfig): normal_deps, special_deps, _ = get_provider_dependencies(config) cprint( diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py index d6be7aeca..06d23e3d1 100644 --- a/src/llama_stack/core/library_client.py +++ b/src/llama_stack/core/library_client.py @@ -41,7 +41,6 @@ from termcolor import cprint from llama_stack.core.build import print_pip_install_help from llama_stack.core.configure import parse_and_maybe_upgrade_config -from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider_data_context from llama_stack.core.resolver import ProviderRegistry from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls @@ -266,20 +265,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): file=sys.stderr, ) if self.config_path_or_distro_name.endswith(".yaml"): - providers: dict[str, list[BuildProvider]] = {} - for api, run_providers in self.config.providers.items(): - for provider in run_providers: - providers.setdefault(api, []).append( - BuildProvider(provider_type=provider.provider_type, module=provider.module) - ) - providers = dict(providers) - build_config = BuildConfig( - distribution_spec=DistributionSpec( - providers=providers, - ), - external_providers_dir=self.config.external_providers_dir, - ) - print_pip_install_help(build_config) + print_pip_install_help(self.config) else: prefix = "!" if in_notebook() else "" cprint( From 4a3f9151e30e0fee8fb2e25e02beebffdb17df8c Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Mon, 24 Nov 2025 11:32:53 -0500 Subject: [PATCH 04/14] fix: rename StackRunConfig to StackConfig since this object represents our config for list-deps, run, etc lets rename it to simply `StackConfig` Signed-off-by: Charlie Doern --- src/llama_stack/cli/stack/_list_deps.py | 6 +++--- src/llama_stack/cli/stack/run.py | 8 ++++---- src/llama_stack/cli/stack/utils.py | 4 ++-- src/llama_stack/core/build.py | 6 +++--- src/llama_stack/core/configure.py | 8 ++++---- .../core/conversations/conversations.py | 4 ++-- src/llama_stack/core/datatypes.py | 4 ++-- src/llama_stack/core/external.py | 4 ++-- src/llama_stack/core/inspect.py | 6 +++--- src/llama_stack/core/prompts/prompts.py | 4 ++-- src/llama_stack/core/providers.py | 6 +++--- src/llama_stack/core/resolver.py | 12 ++++++------ src/llama_stack/core/routers/__init__.py | 4 ++-- src/llama_stack/core/server/server.py | 8 ++++---- src/llama_stack/core/stack.py | 18 +++++++++--------- tests/backward_compat/test_run_config.py | 4 ++-- .../test_persistence_integration.py | 6 +++--- tests/unit/conversations/test_conversations.py | 6 +++--- tests/unit/core/test_stack_validation.py | 6 +++--- tests/unit/core/test_storage_references.py | 4 ++-- tests/unit/distribution/test_distribution.py | 6 +++--- tests/unit/prompts/prompts/conftest.py | 4 ++-- tests/unit/server/test_resolver.py | 6 +++--- 23 files changed, 72 insertions(+), 72 deletions(-) diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py index f11ce486a..80b67ce62 100644 --- a/src/llama_stack/cli/stack/_list_deps.py +++ b/src/llama_stack/cli/stack/_list_deps.py @@ -12,7 +12,7 @@ import yaml from termcolor import cprint from llama_stack.core.build import get_provider_dependencies -from llama_stack.core.datatypes import Provider, StackRunConfig +from llama_stack.core.datatypes import Provider, StackConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.log import get_logger from llama_stack_api import Api @@ -78,7 +78,7 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None: with open(config_file) as f: try: contents = yaml.safe_load(f) - run_config = StackRunConfig(**contents) + run_config = StackConfig(**contents) except Exception as e: cprint( f"Could not parse config file {config_file}: {e}", @@ -119,7 +119,7 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None: file=sys.stderr, ) sys.exit(1) - run_config = StackRunConfig(providers=provider_list, image_name="providers-run") + run_config = StackConfig(providers=provider_list, image_name="providers-run") normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(run_config) normal_deps += SERVER_DEPENDENCIES diff --git a/src/llama_stack/cli/stack/run.py b/src/llama_stack/cli/stack/run.py index 73d8d13d5..c81ac52f4 100644 --- a/src/llama_stack/cli/stack/run.py +++ b/src/llama_stack/cli/stack/run.py @@ -17,7 +17,7 @@ from termcolor import cprint from llama_stack.cli.stack.utils import ImageType from llama_stack.cli.subcommand import Subcommand -from llama_stack.core.datatypes import Api, Provider, StackRunConfig +from llama_stack.core.datatypes import Api, Provider, StackConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars from llama_stack.core.storage.datatypes import ( @@ -156,7 +156,7 @@ class StackRun(Subcommand): # Write config to disk in providers-run directory distro_dir = DISTRIBS_BASE_DIR / "providers-run" - config_file = distro_dir / "run.yaml" + config_file = distro_dir / "config.yaml" logger.info(f"Writing generated config to: {config_file}") with open(config_file, "w") as f: @@ -194,7 +194,7 @@ class StackRun(Subcommand): logger_config = LoggingConfig(**cfg) else: logger_config = None - config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents))) + config = StackConfig(**cast_image_name_to_string(replace_env_vars(config_contents))) port = args.port or config.server.port host = config.server.host or "0.0.0.0" @@ -318,7 +318,7 @@ class StackRun(Subcommand): ), ) - return StackRunConfig( + return StackConfig( image_name="providers-run", apis=apis, providers=providers, diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py index d49b142e0..cb4c754d9 100644 --- a/src/llama_stack/cli/stack/utils.py +++ b/src/llama_stack/cli/stack/utils.py @@ -16,7 +16,7 @@ from termcolor import cprint from llama_stack.core.datatypes import ( BuildConfig, Provider, - StackRunConfig, + StackConfig, StorageConfig, ) from llama_stack.core.distribution import get_provider_registry @@ -61,7 +61,7 @@ def generate_run_config( """ apis = list(build_config.distribution_spec.providers.keys()) distro_dir = DISTRIBS_BASE_DIR / image_name - run_config = StackRunConfig( + run_config = StackConfig( container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None), image_name=image_name, apis=apis, diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py index 630916db4..4e6ccc9f7 100644 --- a/src/llama_stack/core/build.py +++ b/src/llama_stack/core/build.py @@ -9,7 +9,7 @@ import sys from pydantic import BaseModel from termcolor import cprint -from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.datatypes import StackConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.distributions.template import DistributionTemplate from llama_stack.log import get_logger @@ -36,7 +36,7 @@ class ApiInput(BaseModel): def get_provider_dependencies( - config: StackRunConfig, + config: StackConfig, ) -> tuple[list[str], list[str], list[str]]: """Get normal and special dependencies from provider configuration.""" if isinstance(config, DistributionTemplate): @@ -83,7 +83,7 @@ def get_provider_dependencies( return list(set(normal_deps)), list(set(special_deps)), list(set(external_provider_deps)) -def print_pip_install_help(config: StackRunConfig): +def print_pip_install_help(config: StackConfig): normal_deps, special_deps, _ = get_provider_dependencies(config) cprint( diff --git a/src/llama_stack/core/configure.py b/src/llama_stack/core/configure.py index d738b8a61..7ec5b0864 100644 --- a/src/llama_stack/core/configure.py +++ b/src/llama_stack/core/configure.py @@ -10,7 +10,7 @@ from llama_stack.core.datatypes import ( LLAMA_STACK_RUN_CONFIG_VERSION, DistributionSpec, Provider, - StackRunConfig, + StackConfig, ) from llama_stack.core.distribution import ( builtin_automatically_routed_apis, @@ -44,7 +44,7 @@ def configure_single_provider(registry: dict[str, ProviderSpec], provider: Provi ) -def configure_api_providers(config: StackRunConfig, build_spec: DistributionSpec) -> StackRunConfig: +def configure_api_providers(config: StackConfig, build_spec: DistributionSpec) -> StackConfig: is_nux = len(config.providers) == 0 if is_nux: @@ -192,7 +192,7 @@ def upgrade_from_routing_table( return config_dict -def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig: +def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackConfig: if "routing_table" in config_dict: logger.info("Upgrading config...") config_dict = upgrade_from_routing_table(config_dict) @@ -200,4 +200,4 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION processed_config_dict = replace_env_vars(config_dict) - return StackRunConfig(**cast_image_name_to_string(processed_config_dict)) + return StackConfig(**cast_image_name_to_string(processed_config_dict)) diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py index 90402439b..3e867721e 100644 --- a/src/llama_stack/core/conversations/conversations.py +++ b/src/llama_stack/core/conversations/conversations.py @@ -10,7 +10,7 @@ from typing import Any, Literal from pydantic import BaseModel, TypeAdapter -from llama_stack.core.datatypes import AccessRule, StackRunConfig +from llama_stack.core.datatypes import AccessRule, StackConfig from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl from llama_stack.log import get_logger @@ -36,7 +36,7 @@ class ConversationServiceConfig(BaseModel): :param policy: Access control rules """ - run_config: StackRunConfig + run_config: StackConfig policy: list[AccessRule] = [] diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index 94d235678..5ab2b43dc 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -490,7 +490,7 @@ class ServerConfig(BaseModel): ) -class StackRunConfig(BaseModel): +class StackConfig(BaseModel): version: int = LLAMA_STACK_RUN_CONFIG_VERSION image_name: str = Field( @@ -565,7 +565,7 @@ can be instantiated multiple times (with different configs) if necessary. return v @model_validator(mode="after") - def validate_server_stores(self) -> "StackRunConfig": + def validate_server_stores(self) -> "StackConfig": backend_map = self.storage.backends stores = self.storage.stores kv_backends = { diff --git a/src/llama_stack/core/external.py b/src/llama_stack/core/external.py index d1a2d6e42..aa2a0c2c9 100644 --- a/src/llama_stack/core/external.py +++ b/src/llama_stack/core/external.py @@ -7,14 +7,14 @@ import yaml -from llama_stack.core.datatypes import BuildConfig, StackRunConfig +from llama_stack.core.datatypes import BuildConfig, StackConfig from llama_stack.log import get_logger from llama_stack_api import Api, ExternalApiSpec logger = get_logger(name=__name__, category="core") -def load_external_apis(config: StackRunConfig | BuildConfig | None) -> dict[Api, ExternalApiSpec]: +def load_external_apis(config: StackConfig | BuildConfig | None) -> dict[Api, ExternalApiSpec]: """Load external API specifications from the configured directory. Args: diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py index 272c9d1bc..3b60027f0 100644 --- a/src/llama_stack/core/inspect.py +++ b/src/llama_stack/core/inspect.py @@ -8,7 +8,7 @@ from importlib.metadata import version from pydantic import BaseModel -from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.datatypes import StackConfig from llama_stack.core.external import load_external_apis from llama_stack.core.server.routes import get_all_api_routes from llama_stack_api import ( @@ -22,7 +22,7 @@ from llama_stack_api import ( class DistributionInspectConfig(BaseModel): - run_config: StackRunConfig + run_config: StackConfig async def get_provider_impl(config, deps): @@ -40,7 +40,7 @@ class DistributionInspectImpl(Inspect): pass async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse: - run_config: StackRunConfig = self.config.run_config + run_config: StackConfig = self.config.run_config # Helper function to determine if a route should be included based on api_filter def should_include_route(webmethod) -> bool: diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py index ff67ad138..40539f342 100644 --- a/src/llama_stack/core/prompts/prompts.py +++ b/src/llama_stack/core/prompts/prompts.py @@ -9,7 +9,7 @@ from typing import Any from pydantic import BaseModel -from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.datatypes import StackConfig from llama_stack.core.storage.kvstore import KVStore, kvstore_impl from llama_stack_api import ListPromptsResponse, Prompt, Prompts @@ -20,7 +20,7 @@ class PromptServiceConfig(BaseModel): :param run_config: Stack run configuration containing distribution info """ - run_config: StackRunConfig + run_config: StackConfig async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]): diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py index e3fe3c7b3..1f0ecae6f 100644 --- a/src/llama_stack/core/providers.py +++ b/src/llama_stack/core/providers.py @@ -12,14 +12,14 @@ from pydantic import BaseModel from llama_stack.log import get_logger from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers -from .datatypes import StackRunConfig +from .datatypes import StackConfig from .utils.config import redact_sensitive_fields logger = get_logger(name=__name__, category="core") class ProviderImplConfig(BaseModel): - run_config: StackRunConfig + run_config: StackConfig async def get_provider_impl(config, deps): @@ -42,7 +42,7 @@ class ProviderImpl(Providers): async def list_providers(self) -> ListProvidersResponse: run_config = self.config.run_config - safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump())) + safe_config = StackConfig(**redact_sensitive_fields(run_config.model_dump())) providers_health = await self.get_providers_health() ret = [] for api, providers in safe_config.providers.items(): diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py index 6bc32c2d0..193131ace 100644 --- a/src/llama_stack/core/resolver.py +++ b/src/llama_stack/core/resolver.py @@ -14,7 +14,7 @@ from llama_stack.core.datatypes import ( AutoRoutedProviderSpec, Provider, RoutingTableProviderSpec, - StackRunConfig, + StackConfig, ) from llama_stack.core.distribution import builtin_automatically_routed_apis from llama_stack.core.external import load_external_apis @@ -147,7 +147,7 @@ ProviderRegistry = dict[Api, dict[str, ProviderSpec]] async def resolve_impls( - run_config: StackRunConfig, + run_config: StackConfig, provider_registry: ProviderRegistry, dist_registry: DistributionRegistry, policy: list[AccessRule], @@ -217,7 +217,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str, def validate_and_prepare_providers( - run_config: StackRunConfig, provider_registry: ProviderRegistry, routing_table_apis: set[Api], router_apis: set[Api] + run_config: StackConfig, provider_registry: ProviderRegistry, routing_table_apis: set[Api], router_apis: set[Api] ) -> dict[str, dict[str, ProviderWithSpec]]: """Validates providers, handles deprecations, and organizes them into a spec dictionary.""" providers_with_specs: dict[str, dict[str, ProviderWithSpec]] = {} @@ -261,7 +261,7 @@ def validate_provider(provider: Provider, api: Api, provider_registry: ProviderR def sort_providers_by_deps( - providers_with_specs: dict[str, dict[str, ProviderWithSpec]], run_config: StackRunConfig + providers_with_specs: dict[str, dict[str, ProviderWithSpec]], run_config: StackConfig ) -> list[tuple[str, ProviderWithSpec]]: """Sorts providers based on their dependencies.""" sorted_providers: list[tuple[str, ProviderWithSpec]] = topological_sort( @@ -278,7 +278,7 @@ async def instantiate_providers( sorted_providers: list[tuple[str, ProviderWithSpec]], router_apis: set[Api], dist_registry: DistributionRegistry, - run_config: StackRunConfig, + run_config: StackConfig, policy: list[AccessRule], internal_impls: dict[Api, Any] | None = None, ) -> dict[Api, Any]: @@ -357,7 +357,7 @@ async def instantiate_provider( deps: dict[Api, Any], inner_impls: dict[str, Any], dist_registry: DistributionRegistry, - run_config: StackRunConfig, + run_config: StackConfig, policy: list[AccessRule], ): provider_spec = provider.spec diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py index 289755bcb..1b7fe3556 100644 --- a/src/llama_stack/core/routers/__init__.py +++ b/src/llama_stack/core/routers/__init__.py @@ -10,7 +10,7 @@ from llama_stack.core.datatypes import ( AccessRule, RoutedProtocol, ) -from llama_stack.core.stack import StackRunConfig +from llama_stack.core.datatypes import StackConfig from llama_stack.core.store import DistributionRegistry from llama_stack.providers.utils.inference.inference_store import InferenceStore from llama_stack_api import Api, RoutingTable @@ -51,7 +51,7 @@ async def get_routing_table_impl( async def get_auto_router_impl( - api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config: StackRunConfig, policy: list[AccessRule] + api: Api, routing_table: RoutingTable, deps: dict[str, Any], run_config: StackConfig, policy: list[AccessRule] ) -> Any: from .datasets import DatasetIORouter from .eval_scoring import EvalRouter, ScoringRouter diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py index 0d3513980..0ab7ce260 100644 --- a/src/llama_stack/core/server/server.py +++ b/src/llama_stack/core/server/server.py @@ -34,7 +34,7 @@ from pydantic import BaseModel, ValidationError from llama_stack.core.access_control.access_control import AccessDeniedError from llama_stack.core.datatypes import ( AuthenticationRequiredError, - StackRunConfig, + StackConfig, process_cors_config, ) from llama_stack.core.distribution import builtin_automatically_routed_apis @@ -149,7 +149,7 @@ class StackApp(FastAPI): start background tasks (e.g. refresh model registry periodically) from the lifespan context manager. """ - def __init__(self, config: StackRunConfig, *args, **kwargs): + def __init__(self, config: StackConfig, *args, **kwargs): super().__init__(*args, **kwargs) self.stack: Stack = Stack(config) @@ -385,7 +385,7 @@ def create_app() -> StackApp: logger = get_logger(name=__name__, category="core::server", config=logger_config) config = replace_env_vars(config_contents) - config = StackRunConfig(**cast_image_name_to_string(config)) + config = StackConfig(**cast_image_name_to_string(config)) _log_run_config(run_config=config) @@ -506,7 +506,7 @@ def create_app() -> StackApp: return app -def _log_run_config(run_config: StackRunConfig): +def _log_run_config(run_config: StackConfig): """Logs the run config with redacted fields and disabled providers removed.""" logger.info("Run configuration:") safe_config = redact_sensitive_fields(run_config.model_dump(mode="json")) diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index 8ba1f2afd..554fae303 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -14,7 +14,7 @@ from typing import Any import yaml from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl -from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig +from llama_stack.core.datatypes import Provider, SafetyConfig, StackConfig, VectorStoresConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl @@ -108,7 +108,7 @@ REGISTRY_REFRESH_TASK = None TEST_RECORDING_CONTEXT = None -async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): +async def register_resources(run_config: StackConfig, impls: dict[Api, Any]): for rsrc, api, register_method, list_method in RESOURCES: objects = getattr(run_config.registered_resources, rsrc) if api not in impls: @@ -341,7 +341,7 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]: return config_dict -def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None: +def add_internal_implementations(impls: dict[Api, Any], run_config: StackConfig) -> None: """Add internal implementations (inspect and providers) to the implementations dictionary. Args: @@ -373,7 +373,7 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf impls[Api.conversations] = conversations_impl -def _initialize_storage(run_config: StackRunConfig): +def _initialize_storage(run_config: StackConfig): kv_backends: dict[str, StorageBackendConfig] = {} sql_backends: dict[str, StorageBackendConfig] = {} for backend_name, backend_config in run_config.storage.backends.items(): @@ -393,7 +393,7 @@ def _initialize_storage(run_config: StackRunConfig): class Stack: - def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None): + def __init__(self, run_config: StackConfig, provider_registry: ProviderRegistry | None = None): self.run_config = run_config self.provider_registry = provider_registry self.impls = None @@ -499,7 +499,7 @@ async def refresh_registry_task(impls: dict[Api, Any]): await asyncio.sleep(REGISTRY_REFRESH_INTERVAL_SECONDS) -def get_stack_run_config_from_distro(distro: str) -> StackRunConfig: +def get_stack_run_config_from_distro(distro: str) -> StackConfig: distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/run.yaml" with importlib.resources.as_file(distro_path) as path: @@ -507,12 +507,12 @@ def get_stack_run_config_from_distro(distro: str) -> StackRunConfig: raise ValueError(f"Distribution '{distro}' not found at {distro_path}") run_config = yaml.safe_load(path.open()) - return StackRunConfig(**replace_env_vars(run_config)) + return StackConfig(**replace_env_vars(run_config)) def run_config_from_adhoc_config_spec( adhoc_config_spec: str, provider_registry: ProviderRegistry | None = None -) -> StackRunConfig: +) -> StackConfig: """ Create an adhoc distribution from a list of API providers. @@ -552,7 +552,7 @@ def run_config_from_adhoc_config_spec( config=provider_config, ) ] - config = StackRunConfig( + config = StackConfig( image_name="distro-test", apis=list(provider_configs_by_api.keys()), providers=provider_configs_by_api, diff --git a/tests/backward_compat/test_run_config.py b/tests/backward_compat/test_run_config.py index 13aac85e4..ccc18c84f 100644 --- a/tests/backward_compat/test_run_config.py +++ b/tests/backward_compat/test_run_config.py @@ -17,7 +17,7 @@ from pathlib import Path import pytest import yaml -from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.datatypes import StackConfig def get_test_configs(): @@ -49,4 +49,4 @@ def test_load_run_config(config_file): with open(config_file) as f: config_data = yaml.safe_load(f) - StackRunConfig.model_validate(config_data) + StackConfig.model_validate(config_data) diff --git a/tests/integration/test_persistence_integration.py b/tests/integration/test_persistence_integration.py index e9b80dc0c..ff42b451f 100644 --- a/tests/integration/test_persistence_integration.py +++ b/tests/integration/test_persistence_integration.py @@ -6,7 +6,7 @@ import yaml -from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.datatypes import StackConfig from llama_stack.core.storage.datatypes import ( PostgresKVStoreConfig, PostgresSqlStoreConfig, @@ -20,7 +20,7 @@ def test_starter_distribution_config_loads_and_resolves(): with open("llama_stack/distributions/starter/run.yaml") as f: config_dict = yaml.safe_load(f) - config = StackRunConfig(**config_dict) + config = StackConfig(**config_dict) # Config should have named backends and explicit store references assert config.storage is not None @@ -50,7 +50,7 @@ def test_postgres_demo_distribution_config_loads(): with open("llama_stack/distributions/postgres-demo/run.yaml") as f: config_dict = yaml.safe_load(f) - config = StackRunConfig(**config_dict) + config = StackConfig(**config_dict) # Should have postgres backend assert config.storage is not None diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py index 3f9df5fc0..b481be63c 100644 --- a/tests/unit/conversations/test_conversations.py +++ b/tests/unit/conversations/test_conversations.py @@ -16,7 +16,7 @@ from llama_stack.core.conversations.conversations import ( ConversationServiceConfig, ConversationServiceImpl, ) -from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.datatypes import StackConfig from llama_stack.core.storage.datatypes import ( ServerStoresConfig, SqliteSqlStoreConfig, @@ -44,7 +44,7 @@ async def service(): ), ) register_sqlstore_backends({"sql_test": storage.backends["sql_test"]}) - run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage) + run_config = StackConfig(image_name="test", apis=[], providers={}, storage=storage) config = ConversationServiceConfig(run_config=run_config, policy=[]) service = ConversationServiceImpl(config, {}) @@ -151,7 +151,7 @@ async def test_policy_configuration(): ), ) register_sqlstore_backends({"sql_test": storage.backends["sql_test"]}) - run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage) + run_config = StackConfig(image_name="test", apis=[], providers={}, storage=storage) config = ConversationServiceConfig(run_config=run_config, policy=restrictive_policy) service = ConversationServiceImpl(config, {}) diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py index 5f75bc522..06c274a21 100644 --- a/tests/unit/core/test_stack_validation.py +++ b/tests/unit/core/test_stack_validation.py @@ -10,7 +10,7 @@ from unittest.mock import AsyncMock import pytest -from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, VectorStoresConfig +from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackConfig, VectorStoresConfig from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config from llama_stack.core.storage.datatypes import ServerStoresConfig, StorageConfig from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield @@ -19,7 +19,7 @@ from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, class TestVectorStoresValidation: async def test_validate_missing_model(self): """Test validation fails when model not found.""" - run_config = StackRunConfig( + run_config = StackConfig( image_name="test", providers={}, storage=StorageConfig( @@ -47,7 +47,7 @@ class TestVectorStoresValidation: async def test_validate_success(self): """Test validation passes with valid model.""" - run_config = StackRunConfig( + run_config = StackConfig( image_name="test", providers={}, storage=StorageConfig( diff --git a/tests/unit/core/test_storage_references.py b/tests/unit/core/test_storage_references.py index 7bceba74d..68afa5b73 100644 --- a/tests/unit/core/test_storage_references.py +++ b/tests/unit/core/test_storage_references.py @@ -11,7 +11,7 @@ from pydantic import ValidationError from llama_stack.core.datatypes import ( LLAMA_STACK_RUN_CONFIG_VERSION, - StackRunConfig, + StackConfig, ) from llama_stack.core.storage.datatypes import ( InferenceStoreReference, @@ -51,7 +51,7 @@ def _base_run_config(**overrides): ), ), ) - return StackRunConfig( + return StackConfig( version=LLAMA_STACK_RUN_CONFIG_VERSION, image_name="test-distro", apis=[], diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py index b8d6ba55d..b8ff484a7 100644 --- a/tests/unit/distribution/test_distribution.py +++ b/tests/unit/distribution/test_distribution.py @@ -11,7 +11,7 @@ import pytest import yaml from pydantic import BaseModel, Field, ValidationError -from llama_stack.core.datatypes import Api, Provider, StackRunConfig +from llama_stack.core.datatypes import Api, Provider, StackConfig from llama_stack.core.distribution import INTERNAL_APIS, get_provider_registry, providable_apis from llama_stack.core.storage.datatypes import ( InferenceStoreReference, @@ -53,7 +53,7 @@ def _default_storage() -> StorageConfig: ) -def make_stack_config(**overrides) -> StackRunConfig: +def make_stack_config(**overrides) -> StackConfig: storage = overrides.pop("storage", _default_storage()) defaults = dict( image_name="test_image", @@ -62,7 +62,7 @@ def make_stack_config(**overrides) -> StackRunConfig: storage=storage, ) defaults.update(overrides) - return StackRunConfig(**defaults) + return StackConfig(**defaults) @pytest.fixture diff --git a/tests/unit/prompts/prompts/conftest.py b/tests/unit/prompts/prompts/conftest.py index 8bfc1f03c..8ed5b429a 100644 --- a/tests/unit/prompts/prompts/conftest.py +++ b/tests/unit/prompts/prompts/conftest.py @@ -27,7 +27,7 @@ async def temp_prompt_store(tmp_path_factory): temp_dir = tmp_path_factory.getbasetemp() db_path = str(temp_dir / f"{unique_id}.db") - from llama_stack.core.datatypes import StackRunConfig + from llama_stack.core.datatypes import StackConfig storage = StorageConfig( backends={ @@ -41,7 +41,7 @@ async def temp_prompt_store(tmp_path_factory): prompts=KVStoreReference(backend="kv_test", namespace="prompts"), ), ) - mock_run_config = StackRunConfig( + mock_run_config = StackConfig( image_name="test-distribution", apis=[], providers={}, diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py index a1b03f630..5ba4b5cfc 100644 --- a/tests/unit/server/test_resolver.py +++ b/tests/unit/server/test_resolver.py @@ -11,7 +11,7 @@ from unittest.mock import AsyncMock, MagicMock from pydantic import BaseModel, Field -from llama_stack.core.datatypes import Api, Provider, StackRunConfig +from llama_stack.core.datatypes import Api, Provider, StackConfig from llama_stack.core.resolver import resolve_impls from llama_stack.core.routers.inference import InferenceRouter from llama_stack.core.routing_tables.models import ModelsRoutingTable @@ -71,7 +71,7 @@ class SampleImpl: pass -def make_run_config(**overrides) -> StackRunConfig: +def make_run_config(**overrides) -> StackConfig: storage = overrides.pop( "storage", StorageConfig( @@ -97,7 +97,7 @@ def make_run_config(**overrides) -> StackRunConfig: storage=storage, ) defaults.update(overrides) - return StackRunConfig(**defaults) + return StackConfig(**defaults) async def test_resolve_impls_basic(): From 0cd98c957e3247104e945b65e0aee9e503db6d8e Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Tue, 25 Nov 2025 12:56:57 -0500 Subject: [PATCH 05/14] chore: rename run.yaml to config.yaml since we only have one config, lets call it config.yaml! this should be treated as the source of truth for starting a stack change all file names, tests, etc. Signed-off-by: Charlie Doern --- .github/workflows/README.md | 2 +- .github/workflows/backward-compat.yml | 52 +++++++++---------- .github/workflows/integration-auth-tests.yml | 18 +++---- .github/workflows/providers-build.yml | 4 +- .github/workflows/providers-list-deps.yml | 2 +- .../test-external-provider-module.yml | 6 +-- CHANGELOG.md | 2 +- docs/docs/advanced_apis/evaluation.mdx | 2 +- docs/docs/building_applications/rag.mdx | 2 +- docs/docs/building_applications/tools.mdx | 2 +- docs/docs/concepts/evaluation_concepts.mdx | 2 +- docs/docs/contributing/new_api_provider.mdx | 2 +- docs/docs/deploying/kubernetes_deployment.mdx | 6 +-- docs/docs/distributions/building_distro.mdx | 10 ++-- docs/docs/distributions/configuration.mdx | 4 +- .../distributions/customizing_run_yaml.mdx | 18 +++---- .../distributions/importing_as_library.mdx | 2 +- docs/docs/distributions/index.mdx | 2 +- .../remote_hosted_distro/watsonx.md | 4 +- .../self_hosted_distro/dell-tgi.md | 6 +-- .../distributions/self_hosted_distro/dell.md | 4 +- .../self_hosted_distro/meta-reference-gpu.md | 12 ++--- .../self_hosted_distro/nvidia.md | 12 ++--- .../getting_started/detailed_tutorial.mdx | 2 +- .../external/external-providers-guide.mdx | 2 +- .../openai_file_operations_quick_reference.md | 2 +- .../openai_responses_limitations.mdx | 4 +- .../docs/references/evals_reference/index.mdx | 4 +- src/llama_stack/cli/stack/list_stacks.py | 8 +-- src/llama_stack/cli/stack/utils.py | 6 +-- src/llama_stack/core/routers/__init__.py | 2 +- src/llama_stack/core/routing_tables/models.py | 2 +- src/llama_stack/core/stack.py | 2 +- .../ci-tests/{run.yaml => config.yaml} | 0 .../dell/{run.yaml => config.yaml} | 0 src/llama_stack/distributions/dell/dell.py | 2 +- .../distributions/dell/doc_template.md | 12 ++--- .../{run.yaml => config.yaml} | 0 .../meta-reference-gpu/doc_template.md | 10 ++-- .../meta-reference-gpu/meta_reference.py | 2 +- .../nvidia/{run.yaml => config.yaml} | 0 .../distributions/nvidia/doc_template.md | 10 ++-- .../distributions/nvidia/nvidia.py | 2 +- .../oci/{run.yaml => config.yaml} | 0 src/llama_stack/distributions/oci/oci.py | 2 +- .../open-benchmark/{run.yaml => config.yaml} | 0 .../open-benchmark/open_benchmark.py | 2 +- .../postgres-demo/{run.yaml => config.yaml} | 0 .../starter-gpu/{run.yaml => config.yaml} | 0 .../starter/{run.yaml => config.yaml} | 0 .../distributions/starter/starter.py | 2 +- .../watsonx/{run.yaml => config.yaml} | 0 .../distributions/watsonx/watsonx.py | 2 +- src/llama_stack/log.py | 4 +- .../inline/inference/meta_reference/config.py | 2 +- .../providers/remote/inference/tgi/tgi.py | 4 +- .../providers/remote/inference/vllm/vllm.py | 2 +- tests/README.md | 2 +- tests/backward_compat/test_run_config.py | 8 +-- tests/integration/README.md | 2 +- tests/integration/conftest.py | 2 +- .../test_persistence_integration.py | 4 +- tests/unit/cli/test_stack_config.py | 2 +- tests/unit/distribution/test_stack_list.py | 4 +- 64 files changed, 147 insertions(+), 145 deletions(-) rename src/llama_stack/distributions/ci-tests/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/dell/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/meta-reference-gpu/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/nvidia/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/oci/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/open-benchmark/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/postgres-demo/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/starter-gpu/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/starter/{run.yaml => config.yaml} (100%) rename src/llama_stack/distributions/watsonx/{run.yaml => config.yaml} (100%) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index bb848209f..18f9a88bf 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -4,7 +4,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl | Name | File | Purpose | | ---- | ---- | ------- | -| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for run.yaml configs | +| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for config.yaml configs | | Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md | | API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. | | Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script | diff --git a/.github/workflows/backward-compat.yml b/.github/workflows/backward-compat.yml index 9f950a8b9..c3e5b1f59 100644 --- a/.github/workflows/backward-compat.yml +++ b/.github/workflows/backward-compat.yml @@ -1,6 +1,6 @@ name: Backward Compatibility Check -run-name: Check backward compatibility for run.yaml configs +run-name: Check backward compatibility for config.yaml configs on: pull_request: @@ -12,7 +12,7 @@ on: paths: - 'src/llama_stack/core/datatypes.py' - 'src/llama_stack/providers/datatypes.py' - - 'src/llama_stack/distributions/**/run.yaml' + - 'src/llama_stack/distributions/**/config.yaml' - 'tests/backward_compat/**' - '.github/workflows/backward-compat.yml' @@ -45,15 +45,15 @@ jobs: run: | uv sync --group dev - - name: Extract run.yaml files from main branch + - name: Extract config.yaml files from main branch id: extract_configs run: | - # Get list of run.yaml paths from main + # Get list of config.yaml paths from main git fetch origin main - CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/run.yaml$" || true) + CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/config.yaml$" || true) if [ -z "$CONFIG_PATHS" ]; then - echo "No run.yaml files found in main branch" + echo "No config.yaml files found in main branch" exit 1 fi @@ -125,7 +125,7 @@ jobs: echo "" echo "⚠️ WARNING: Breaking changes detected but acknowledged" echo "" - echo "This PR introduces backward-incompatible changes to run.yaml." + echo "This PR introduces backward-incompatible changes to config.yaml." echo "The changes have been properly acknowledged." echo "" exit 0 # Pass the check @@ -133,7 +133,7 @@ jobs: echo "" echo "❌ ERROR: Breaking changes detected without acknowledgment" echo "" - echo "This PR introduces backward-incompatible changes to run.yaml" + echo "This PR introduces backward-incompatible changes to config.yaml" echo "that will break existing user configurations." echo "" echo "To acknowledge this breaking change, do ONE of:" @@ -155,11 +155,11 @@ jobs: with: fetch-depth: 0 - - name: Extract ci-tests run.yaml from main + - name: Extract ci-tests config.yaml from main run: | git fetch origin main - git show origin/main:src/llama_stack/distributions/ci-tests/run.yaml > /tmp/main-ci-tests-run.yaml - echo "Extracted ci-tests run.yaml from main branch" + git show origin/main:src/llama_stack/distributions/ci-tests/config.yaml > /tmp/main-ci-tests-config.yaml + echo "Extracted ci-tests config.yaml from main branch" - name: Setup test environment uses: ./.github/actions/setup-test-environment @@ -175,7 +175,7 @@ jobs: continue-on-error: true uses: ./.github/actions/run-and-record-tests with: - stack-config: /tmp/main-ci-tests-run.yaml + stack-config: /tmp/main-ci-tests-config.yaml setup: 'ollama' inference-mode: 'replay' suite: 'base' @@ -258,21 +258,21 @@ jobs: env: GH_TOKEN: ${{ github.token }} - - name: Extract ci-tests run.yaml from release + - name: Extract ci-tests config.yaml from release if: steps.get_release.outputs.has_release == 'true' id: extract_config run: | RELEASE_TAG="${{ steps.get_release.outputs.tag }}" # Try with src/ prefix first (newer releases), then without (older releases) - if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then - echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (src/ path)" + if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/config.yaml" > /tmp/release-ci-tests-config.yaml 2>/dev/null; then + echo "Extracted ci-tests config.yaml from release $RELEASE_TAG (src/ path)" echo "has_config=true" >> $GITHUB_OUTPUT - elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then - echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (old path)" + elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/config.yaml" > /tmp/release-ci-tests-config.yaml 2>/dev/null; then + echo "Extracted ci-tests config.yaml from release $RELEASE_TAG (old path)" echo "has_config=true" >> $GITHUB_OUTPUT else - echo "::warning::ci-tests/run.yaml not found in release $RELEASE_TAG" + echo "::warning::ci-tests/config.yaml not found in release $RELEASE_TAG" echo "has_config=false" >> $GITHUB_OUTPUT fi @@ -292,7 +292,7 @@ jobs: continue-on-error: true uses: ./.github/actions/run-and-record-tests with: - stack-config: /tmp/release-ci-tests-run.yaml + stack-config: /tmp/release-ci-tests-config.yaml setup: 'ollama' inference-mode: 'replay' suite: 'base' @@ -318,7 +318,7 @@ jobs: continue-on-error: true uses: ./.github/actions/run-and-record-tests with: - stack-config: /tmp/release-ci-tests-run.yaml + stack-config: /tmp/release-ci-tests-config.yaml setup: 'ollama' inference-mode: 'replay' suite: 'base' @@ -447,11 +447,11 @@ jobs: run: | RELEASE_TAG="${{ steps.get_release.outputs.tag }}" - # Get run.yaml files from the release (try both src/ and old path) - CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/run.yaml$" || true) + # Get config.yaml files from the release (try both src/ and old path) + CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/config.yaml$" || true) if [ -z "$CONFIG_PATHS" ]; then - echo "::warning::No run.yaml files found in release $RELEASE_TAG" + echo "::warning::No config.yaml files found in release $RELEASE_TAG" echo "has_configs=false" >> $GITHUB_OUTPUT exit 0 fi @@ -523,7 +523,7 @@ jobs: ⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release. - Users on release \`$RELEASE_TAG\` will not be able to upgrade - - Existing run.yaml configurations will fail validation + - Existing config.yaml configurations will fail validation The tests pass on \`main\` but fail with this PR's changes. @@ -543,7 +543,7 @@ jobs: - Tests **PASS** on main branch ✅ - Tests **FAIL** on PR branch ❌ - Users on release \`$RELEASE_TAG\` will not be able to upgrade - - Existing run.yaml configurations will fail validation + - Existing config.yaml configurations will fail validation > **Note:** This is informational only and does not block merge. > Consider whether this breaking change is acceptable for users. @@ -570,7 +570,7 @@ jobs: cat >> $GITHUB_STEP_SUMMARY < $run_dir/run.yaml + cat <<'EOF' > $run_dir/config.yaml version: '2' image_name: kube apis: [] @@ -101,17 +101,17 @@ jobs: server: port: 8321 EOF - yq eval '.server.auth.provider_config.type = "${{ matrix.auth-provider }}"' -i $run_dir/run.yaml - yq eval '.server.auth.provider_config.tls_cafile = "${{ env.KUBERNETES_CA_CERT_PATH }}"' -i $run_dir/run.yaml - yq eval '.server.auth.provider_config.issuer = "${{ env.KUBERNETES_ISSUER }}"' -i $run_dir/run.yaml - yq eval '.server.auth.provider_config.audience = "${{ env.KUBERNETES_AUDIENCE }}"' -i $run_dir/run.yaml - yq eval '.server.auth.provider_config.jwks.uri = "${{ env.KUBERNETES_API_SERVER_URL }}"' -i $run_dir/run.yaml - yq eval '.server.auth.provider_config.jwks.token = "${{ env.TOKEN }}"' -i $run_dir/run.yaml - cat $run_dir/run.yaml + yq eval '.server.auth.provider_config.type = "${{ matrix.auth-provider }}"' -i $run_dir/config.yaml + yq eval '.server.auth.provider_config.tls_cafile = "${{ env.KUBERNETES_CA_CERT_PATH }}"' -i $run_dir/config.yaml + yq eval '.server.auth.provider_config.issuer = "${{ env.KUBERNETES_ISSUER }}"' -i $run_dir/config.yaml + yq eval '.server.auth.provider_config.audience = "${{ env.KUBERNETES_AUDIENCE }}"' -i $run_dir/config.yaml + yq eval '.server.auth.provider_config.jwks.uri = "${{ env.KUBERNETES_API_SERVER_URL }}"' -i $run_dir/config.yaml + yq eval '.server.auth.provider_config.jwks.token = "${{ env.TOKEN }}"' -i $run_dir/config.yaml + cat $run_dir/config.yaml # avoid line breaks in the server log, especially because we grep it below. export LLAMA_STACK_LOG_WIDTH=200 - nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 & + nohup uv run llama stack run $run_dir/config.yaml > server.log 2>&1 & - name: Wait for Llama Stack server to be ready run: | diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index f2559a258..02a2fb001 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -116,7 +116,7 @@ jobs: BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml) BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests" BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE" - BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml" + BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/config.yaml" if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" fi @@ -162,7 +162,7 @@ jobs: BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml) BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests" BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE" - BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml" + BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/config.yaml" if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" fi diff --git a/.github/workflows/providers-list-deps.yml b/.github/workflows/providers-list-deps.yml index a2e8a87c9..02497c15e 100644 --- a/.github/workflows/providers-list-deps.yml +++ b/.github/workflows/providers-list-deps.yml @@ -102,4 +102,4 @@ jobs: USE_COPY_NOT_MOUNT: "true" LLAMA_STACK_DIR: "." run: | - uv run llama stack list-deps src/llama_stack/distributions/ci-tests/run.yaml + uv run llama stack list-deps src/llama_stack/distributions/ci-tests/config.yaml diff --git a/.github/workflows/test-external-provider-module.yml b/.github/workflows/test-external-provider-module.yml index 39f2356aa..3d4e924af 100644 --- a/.github/workflows/test-external-provider-module.yml +++ b/.github/workflows/test-external-provider-module.yml @@ -43,8 +43,8 @@ jobs: nohup ramalama serve llama3.2:3b-instruct-fp16 > ramalama_server.log 2>&1 & - name: Apply image type to config file run: | - yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/run.yaml - cat tests/external/ramalama-stack/run.yaml + yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/config.yaml + cat tests/external/ramalama-stack/config.yaml - name: Install distribution dependencies run: | @@ -59,7 +59,7 @@ jobs: # Use the virtual environment created by the build step (name comes from build config) source ramalama-stack-test/bin/activate uv pip list - nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 & + nohup llama stack run tests/external/ramalama-stack/config.yaml > server.log 2>&1 & - name: Wait for Llama Stack server to be ready run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index c51a1b2aa..bba04fa11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -85,7 +85,7 @@ Published on: 2025-07-28T23:35:23Z ## Highlights * Automatic model registration for self-hosted providers (ollama and vllm currently). No need for `INFERENCE_MODEL` environment variables which need to be updated, etc. -* Much simplified starter distribution. Most `ENABLE_` env variables are now gone. When you set `VLLM_URL`, the `vllm` provider is auto-enabled. Similar for `MILVUS_URL`, `PGVECTOR_DB`, etc. Check the [run.yaml](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/starter/run.yaml) for more details. +* Much simplified starter distribution. Most `ENABLE_` env variables are now gone. When you set `VLLM_URL`, the `vllm` provider is auto-enabled. Similar for `MILVUS_URL`, `PGVECTOR_DB`, etc. Check the [config.yaml](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/starter/config.yaml) for more details. * All tests migrated to pytest now (thanks @Elbehery) * DPO implementation in the post-training provider (thanks @Nehanth) * (Huge!) Support for external APIs and providers thereof (thanks @leseb, @cdoern and others). This is a really big deal -- you can now add more APIs completely out of tree and experiment with them before (optionally) wanting to contribute back. diff --git a/docs/docs/advanced_apis/evaluation.mdx b/docs/docs/advanced_apis/evaluation.mdx index 1efaa4c5c..0d5be4184 100644 --- a/docs/docs/advanced_apis/evaluation.mdx +++ b/docs/docs/advanced_apis/evaluation.mdx @@ -96,7 +96,7 @@ We have built-in functionality to run the supported open-benchmarks using llama- Spin up llama stack server with 'open-benchmark' template ``` -llama stack run llama_stack/distributions/open-benchmark/run.yaml +llama stack run llama_stack/distributions/open-benchmark/config.yaml ``` diff --git a/docs/docs/building_applications/rag.mdx b/docs/docs/building_applications/rag.mdx index b1681dc62..4cddbe2bf 100644 --- a/docs/docs/building_applications/rag.mdx +++ b/docs/docs/building_applications/rag.mdx @@ -85,7 +85,7 @@ Llama Stack provides OpenAI-compatible RAG capabilities through: ## Configuring Default Embedding Models -To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so: +To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your config.yaml like so: ```yaml vector_stores: diff --git a/docs/docs/building_applications/tools.mdx b/docs/docs/building_applications/tools.mdx index f7b913fef..e6fe14f50 100644 --- a/docs/docs/building_applications/tools.mdx +++ b/docs/docs/building_applications/tools.mdx @@ -85,7 +85,7 @@ Features: - Context retrieval with token limits :::note[Default Configuration] -By default, llama stack run.yaml defines toolgroups for web search, wolfram alpha and rag, that are provided by tavily-search, wolfram-alpha and rag providers. +By default, llama stack config.yaml defines toolgroups for web search, wolfram alpha and rag, that are provided by tavily-search, wolfram-alpha and rag providers. ::: ## Model Context Protocol (MCP) diff --git a/docs/docs/concepts/evaluation_concepts.mdx b/docs/docs/concepts/evaluation_concepts.mdx index c7a13fd70..42a7ce336 100644 --- a/docs/docs/concepts/evaluation_concepts.mdx +++ b/docs/docs/concepts/evaluation_concepts.mdx @@ -47,7 +47,7 @@ We have built-in functionality to run the supported open-benckmarks using llama- Spin up llama stack server with 'open-benchmark' template ```bash -llama stack run llama_stack/distributions/open-benchmark/run.yaml +llama stack run llama_stack/distributions/open-benchmark/config.yaml ``` #### Run eval CLI diff --git a/docs/docs/contributing/new_api_provider.mdx b/docs/docs/contributing/new_api_provider.mdx index 2efaf08b4..b2e6b4d18 100644 --- a/docs/docs/contributing/new_api_provider.mdx +++ b/docs/docs/contributing/new_api_provider.mdx @@ -14,7 +14,7 @@ This guide will walk you through the process of adding a new API provider to Lla - Begin by reviewing the [core concepts](../concepts/) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.) - Determine the provider type ([Remote](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote) or [Inline](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline)). Remote providers make requests to external services, while inline providers execute implementation locally. - Add your provider to the appropriate [Registry](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/registry/). Specify pip dependencies necessary. -- Update any distribution [Templates](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/) `build.yaml` and `run.yaml` files if they should include your provider by default. Run [./scripts/distro_codegen.py](https://github.com/meta-llama/llama-stack/blob/main/scripts/distro_codegen.py) if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation. +- Update any distribution [Templates](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/) `build.yaml` and `config.yaml` files if they should include your provider by default. Run [./scripts/distro_codegen.py](https://github.com/meta-llama/llama-stack/blob/main/scripts/distro_codegen.py) if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation. Here are some example PRs to help you get started: diff --git a/docs/docs/deploying/kubernetes_deployment.mdx b/docs/docs/deploying/kubernetes_deployment.mdx index 48d08f0db..bb04da033 100644 --- a/docs/docs/deploying/kubernetes_deployment.mdx +++ b/docs/docs/deploying/kubernetes_deployment.mdx @@ -133,7 +133,7 @@ For more information about the operator, see the [llama-stack-k8s-operator repos ### Step 4: Deploy Llama Stack Server using Operator Create a `LlamaStackDistribution` custom resource to deploy the Llama Stack server. The operator will automatically create the necessary Deployment, Service, and other resources. -You can optionally override the default `run.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)). +You can optionally override the default `config.yaml` using `spec.server.userConfig` with a ConfigMap (see [userConfig spec](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/docs/api-overview.md#userconfigspec)). ```yaml cat < ProviderSpec: [ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module. -To install Llama Stack with this external provider a user can provider the following run.yaml: +To install Llama Stack with this external provider a user can provider the following config.yaml: ```yaml version: 2 diff --git a/docs/docs/providers/files/openai_file_operations_quick_reference.md b/docs/docs/providers/files/openai_file_operations_quick_reference.md index 43e2318e2..c07bc5f9a 100644 --- a/docs/docs/providers/files/openai_file_operations_quick_reference.md +++ b/docs/docs/providers/files/openai_file_operations_quick_reference.md @@ -51,7 +51,7 @@ results = await client.vector_stores.search( > **Note**: For detailed configuration examples and options, see [Configuration Examples](../openai_file_operations_support.md#configuration-examples) in the full documentation. -**Basic Setup**: Configure vector_io and files providers in your run.yaml +**Basic Setup**: Configure vector_io and files providers in your config.yaml ## Common Use Cases diff --git a/docs/docs/providers/openai_responses_limitations.mdx b/docs/docs/providers/openai_responses_limitations.mdx index 19007438e..6aaf07b8b 100644 --- a/docs/docs/providers/openai_responses_limitations.mdx +++ b/docs/docs/providers/openai_responses_limitations.mdx @@ -123,7 +123,7 @@ Connectors are MCP servers maintained and managed by the Responses API provider. **Open Questions:** - Should Llama Stack include built-in support for some, all, or none of OpenAI's connectors? -- Should there be a mechanism for administrators to add custom connectors via `run.yaml` or an API? +- Should there be a mechanism for administrators to add custom connectors via `config.yaml` or an API? --- @@ -210,7 +210,7 @@ Metadata allows you to attach additional information to a response for your own **Status:** Feature Request -When calling the OpenAI Responses API, model outputs go through safety models configured by OpenAI administrators. Perhaps Llama Stack should provide a mechanism to configure safety models (or non-model logic) for all Responses requests, either through `run.yaml` or an administrative API. +When calling the OpenAI Responses API, model outputs go through safety models configured by OpenAI administrators. Perhaps Llama Stack should provide a mechanism to configure safety models (or non-model logic) for all Responses requests, either through `config.yaml` or an administrative API. --- diff --git a/docs/docs/references/evals_reference/index.mdx b/docs/docs/references/evals_reference/index.mdx index 0ec555e66..85b2d9621 100644 --- a/docs/docs/references/evals_reference/index.mdx +++ b/docs/docs/references/evals_reference/index.mdx @@ -355,7 +355,7 @@ The purpose of scoring function is to calculate the score for each example based Firstly, you can see if the existing [llama stack scoring functions](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/scoring) can fulfill your need. If not, you need to write a new scoring function based on what benchmark author / other open source repo describe. ### Add new benchmark into template -Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/open-benchmark/run.yaml) +Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/open-benchmark/config.yaml) Secondly, you need to add the new benchmark you just created under the `benchmarks` resource in the same template. To add the new benchmark, you need to have - `benchmark_id`: identifier of the benchmark @@ -366,7 +366,7 @@ Secondly, you need to add the new benchmark you just created under the `benchmar Spin up llama stack server with 'open-benchmark' templates ```bash -llama stack run llama_stack/distributions/open-benchmark/run.yaml +llama stack run llama_stack/distributions/open-benchmark/config.yaml ``` Run eval benchmark CLI with your new benchmark id diff --git a/src/llama_stack/cli/stack/list_stacks.py b/src/llama_stack/cli/stack/list_stacks.py index ae59ba911..0153b3391 100644 --- a/src/llama_stack/cli/stack/list_stacks.py +++ b/src/llama_stack/cli/stack/list_stacks.py @@ -64,14 +64,14 @@ class StackListBuilds(Subcommand): for name, (path, source_type) in sorted(distributions.items()): row = [name, source_type, str(path)] # Check for build and run config files - # For built-in distributions, configs are named build.yaml and run.yaml - # For custom distributions, configs are named {name}-build.yaml and {name}-run.yaml + # For built-in distributions, configs are named build.yaml and config.yaml + # For custom distributions, configs are named {name}-build.yaml and {name}-config.yaml if source_type == "built-in": build_config = "Yes" if (path / "build.yaml").exists() else "No" - run_config = "Yes" if (path / "run.yaml").exists() else "No" + run_config = "Yes" if (path / "config.yaml").exists() else "No" else: build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No" - run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No" + run_config = "Yes" if (path / f"{name}-config.yaml").exists() else "No" row.extend([build_config, run_config]) rows.append(row) print_table(rows, headers, separate_rows=True) diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py index cb4c754d9..e02448e04 100644 --- a/src/llama_stack/cli/stack/utils.py +++ b/src/llama_stack/cli/stack/utils.py @@ -57,7 +57,7 @@ def generate_run_config( image_name: str, ) -> Path: """ - Generate a run.yaml template file for user to edit from a build.yaml file + Generate a config.yaml template file for user to edit from a build.yaml file """ apis = list(build_config.distribution_spec.providers.keys()) distro_dir = DISTRIBS_BASE_DIR / image_name @@ -123,7 +123,7 @@ def generate_run_config( ) run_config.providers[api].append(p_spec) - run_config_file = build_dir / f"{image_name}-run.yaml" + run_config_file = build_dir / f"{image_name}-config.yaml" with open(run_config_file, "w") as f: to_write = json.loads(run_config.model_dump_json()) @@ -131,7 +131,7 @@ def generate_run_config( # Only print this message for non-container builds since it will be displayed before the # container is built - # For non-container builds, the run.yaml is generated at the very end of the build process so it + # For non-container builds, the config.yaml is generated at the very end of the build process so it # makes sense to display this message if build_config.image_type != LlamaStackImageType.CONTAINER.value: cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr) diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py index 1b7fe3556..a4327563f 100644 --- a/src/llama_stack/core/routers/__init__.py +++ b/src/llama_stack/core/routers/__init__.py @@ -9,8 +9,8 @@ from typing import Any from llama_stack.core.datatypes import ( AccessRule, RoutedProtocol, + StackConfig, ) -from llama_stack.core.datatypes import StackConfig from llama_stack.core.store import DistributionRegistry from llama_stack.providers.utils.inference.inference_store import InferenceStore from llama_stack_api import Api, RoutingTable diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py index 1facbb27b..bc96db985 100644 --- a/src/llama_stack/core/routing_tables/models.py +++ b/src/llama_stack/core/routing_tables/models.py @@ -224,7 +224,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): existing_models = await self.get_all_with_type("model") # we may have an alias for the model registered by the user (or during initialization - # from run.yaml) that we need to keep track of + # from config.yaml) that we need to keep track of model_ids = {} for model in existing_models: if model.provider_id != provider_id: diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index 554fae303..96f9eb8b9 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -500,7 +500,7 @@ async def refresh_registry_task(impls: dict[Api, Any]): def get_stack_run_config_from_distro(distro: str) -> StackConfig: - distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/run.yaml" + distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/config.yaml" with importlib.resources.as_file(distro_path) as path: if not path.exists(): diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/config.yaml similarity index 100% rename from src/llama_stack/distributions/ci-tests/run.yaml rename to src/llama_stack/distributions/ci-tests/config.yaml diff --git a/src/llama_stack/distributions/dell/run.yaml b/src/llama_stack/distributions/dell/config.yaml similarity index 100% rename from src/llama_stack/distributions/dell/run.yaml rename to src/llama_stack/distributions/dell/config.yaml diff --git a/src/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py index 52a07b7f1..50da2bd70 100644 --- a/src/llama_stack/distributions/dell/dell.py +++ b/src/llama_stack/distributions/dell/dell.py @@ -111,7 +111,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, providers=providers, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider, embedding_provider], "vector_io": [chromadb_provider], diff --git a/src/llama_stack/distributions/dell/doc_template.md b/src/llama_stack/distributions/dell/doc_template.md index 1530f665a..c3730360f 100644 --- a/src/llama_stack/distributions/dell/doc_template.md +++ b/src/llama_stack/distributions/dell/doc_template.md @@ -141,14 +141,14 @@ docker run \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ - -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \ + -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-config.yaml \ -e INFERENCE_MODEL=$INFERENCE_MODEL \ -e DEH_URL=$DEH_URL \ -e SAFETY_MODEL=$SAFETY_MODEL \ -e DEH_SAFETY_URL=$DEH_SAFETY_URL \ -e CHROMA_URL=$CHROMA_URL \ llamastack/distribution-{{ name }} \ - --config /root/my-run.yaml \ + --config /root/my-config.yaml \ --port $LLAMA_STACK_PORT ``` @@ -157,16 +157,16 @@ docker run \ You can also run the Docker container with a custom run configuration file by mounting it into the container: ```bash -# Set the path to your custom run.yaml file -CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +# Set the path to your custom config.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml docker run -it \ --pull always \ --network host \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ - -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ - -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \ + -e RUN_CONFIG_PATH=/app/custom-config.yaml \ -e INFERENCE_MODEL=$INFERENCE_MODEL \ -e DEH_URL=$DEH_URL \ -e CHROMA_URL=$CHROMA_URL \ diff --git a/src/llama_stack/distributions/meta-reference-gpu/run.yaml b/src/llama_stack/distributions/meta-reference-gpu/config.yaml similarity index 100% rename from src/llama_stack/distributions/meta-reference-gpu/run.yaml rename to src/llama_stack/distributions/meta-reference-gpu/config.yaml diff --git a/src/llama_stack/distributions/meta-reference-gpu/doc_template.md b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md index af71d8388..5f4caa964 100644 --- a/src/llama_stack/distributions/meta-reference-gpu/doc_template.md +++ b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md @@ -73,8 +73,8 @@ docker run \ You can also run the Docker container with a custom run configuration file by mounting it into the container: ```bash -# Set the path to your custom run.yaml file -CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +# Set the path to your custom config.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml LLAMA_STACK_PORT=8321 docker run \ @@ -83,8 +83,8 @@ docker run \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ - -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ - -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \ + -e RUN_CONFIG_PATH=/app/custom-config.yaml \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT ``` @@ -105,7 +105,7 @@ Make sure you have the Llama Stack CLI available. ```bash llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ -llama stack run distributions/{{ name }}/run.yaml \ +llama stack run distributions/{{ name }}/config.yaml \ --port 8321 ``` diff --git a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py index a515794d5..83aba6b82 100644 --- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py +++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py @@ -105,7 +105,7 @@ def get_distribution_template() -> DistributionTemplate: template_path=Path(__file__).parent / "doc_template.md", providers=providers, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider, embedding_provider], "vector_io": [vector_io_provider], diff --git a/src/llama_stack/distributions/nvidia/run.yaml b/src/llama_stack/distributions/nvidia/config.yaml similarity index 100% rename from src/llama_stack/distributions/nvidia/run.yaml rename to src/llama_stack/distributions/nvidia/config.yaml diff --git a/src/llama_stack/distributions/nvidia/doc_template.md b/src/llama_stack/distributions/nvidia/doc_template.md index 054a1e3ec..7152ee268 100644 --- a/src/llama_stack/distributions/nvidia/doc_template.md +++ b/src/llama_stack/distributions/nvidia/doc_template.md @@ -128,8 +128,8 @@ docker run \ You can also run the Docker container with a custom run configuration file by mounting it into the container: ```bash -# Set the path to your custom run.yaml file -CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml +# Set the path to your custom config.yaml file +CUSTOM_RUN_CONFIG=/path/to/your/custom-config.yaml LLAMA_STACK_PORT=8321 docker run \ @@ -137,8 +137,8 @@ docker run \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ - -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \ - -e RUN_CONFIG_PATH=/app/custom-run.yaml \ + -v $CUSTOM_RUN_CONFIG:/app/custom-config.yaml \ + -e RUN_CONFIG_PATH=/app/custom-config.yaml \ -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT @@ -162,7 +162,7 @@ INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct llama stack list-deps nvidia | xargs -L1 uv pip install NVIDIA_API_KEY=$NVIDIA_API_KEY \ INFERENCE_MODEL=$INFERENCE_MODEL \ -llama stack run ./run.yaml \ +llama stack run ./config.yaml \ --port 8321 ``` diff --git a/src/llama_stack/distributions/nvidia/nvidia.py b/src/llama_stack/distributions/nvidia/nvidia.py index a92a2e6f8..6959a210d 100644 --- a/src/llama_stack/distributions/nvidia/nvidia.py +++ b/src/llama_stack/distributions/nvidia/nvidia.py @@ -81,7 +81,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate: template_path=Path(__file__).parent / "doc_template.md", providers=providers, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], "datasetio": [datasetio_provider], diff --git a/src/llama_stack/distributions/oci/run.yaml b/src/llama_stack/distributions/oci/config.yaml similarity index 100% rename from src/llama_stack/distributions/oci/run.yaml rename to src/llama_stack/distributions/oci/config.yaml diff --git a/src/llama_stack/distributions/oci/oci.py b/src/llama_stack/distributions/oci/oci.py index 1f21840f1..338dd3661 100644 --- a/src/llama_stack/distributions/oci/oci.py +++ b/src/llama_stack/distributions/oci/oci.py @@ -74,7 +74,7 @@ def get_distribution_template(name: str = "oci") -> DistributionTemplate: template_path=Path(__file__).parent / "doc_template.md", providers=providers, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], "vector_io": [vector_io_provider], diff --git a/src/llama_stack/distributions/open-benchmark/run.yaml b/src/llama_stack/distributions/open-benchmark/config.yaml similarity index 100% rename from src/llama_stack/distributions/open-benchmark/run.yaml rename to src/llama_stack/distributions/open-benchmark/config.yaml diff --git a/src/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py index 1f4dbf2c2..7d79231dd 100644 --- a/src/llama_stack/distributions/open-benchmark/open_benchmark.py +++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py @@ -261,7 +261,7 @@ def get_distribution_template() -> DistributionTemplate: providers=providers, available_models_by_provider=available_models, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": inference_providers, "vector_io": vector_io_providers, diff --git a/src/llama_stack/distributions/postgres-demo/run.yaml b/src/llama_stack/distributions/postgres-demo/config.yaml similarity index 100% rename from src/llama_stack/distributions/postgres-demo/run.yaml rename to src/llama_stack/distributions/postgres-demo/config.yaml diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/config.yaml similarity index 100% rename from src/llama_stack/distributions/starter-gpu/run.yaml rename to src/llama_stack/distributions/starter-gpu/config.yaml diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/config.yaml similarity index 100% rename from src/llama_stack/distributions/starter/run.yaml rename to src/llama_stack/distributions/starter/config.yaml diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index 8a0efdf1f..1576721ab 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -276,7 +276,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: template_path=None, providers=providers, run_configs={ - "run.yaml": base_run_settings, + "config.yaml": base_run_settings, "run-with-postgres-store.yaml": postgres_run_settings, }, run_config_env_vars={ diff --git a/src/llama_stack/distributions/watsonx/run.yaml b/src/llama_stack/distributions/watsonx/config.yaml similarity index 100% rename from src/llama_stack/distributions/watsonx/run.yaml rename to src/llama_stack/distributions/watsonx/config.yaml diff --git a/src/llama_stack/distributions/watsonx/watsonx.py b/src/llama_stack/distributions/watsonx/watsonx.py index d79aea872..edc011a6c 100644 --- a/src/llama_stack/distributions/watsonx/watsonx.py +++ b/src/llama_stack/distributions/watsonx/watsonx.py @@ -69,7 +69,7 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate: template_path=None, providers=providers, run_configs={ - "run.yaml": RunConfigSettings( + "config.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], "files": [files_provider], diff --git a/src/llama_stack/log.py b/src/llama_stack/log.py index c11c2c06f..0bc59deaf 100644 --- a/src/llama_stack/log.py +++ b/src/llama_stack/log.py @@ -92,10 +92,10 @@ def config_to_category_levels(category: str, level: str): def parse_yaml_config(yaml_config: LoggingConfig) -> dict[str, int]: """ - Helper function to parse a yaml logging configuration found in the run.yaml + Helper function to parse a yaml logging configuration found in the config.yaml Parameters: - yaml_config (Logging): the logger config object found in the run.yaml + yaml_config (Logging): the logger config object found in the config.yaml Returns: Dict[str, int]: A dictionary mapping categories to their log levels. diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py index ec6e8bfe8..189133a4b 100644 --- a/src/llama_stack/providers/inline/inference/meta_reference/config.py +++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py @@ -16,7 +16,7 @@ class MetaReferenceInferenceConfig(BaseModel): # this is a placeholder to indicate inference model id # the actual inference model id is dtermined by the moddel id in the request # Note: you need to register the model before using it for inference - # models in the resouce list in the run.yaml config will be registered automatically + # models in the resouce list in the config.yaml config will be registered automatically model: str | None = None torch_seed: int | None = None max_seq_len: int = 4096 diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py index 5dc8c33f7..976df921b 100644 --- a/src/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py @@ -51,7 +51,9 @@ class _HfAdapter(OpenAIMixin): class TGIAdapter(_HfAdapter): async def initialize(self, config: TGIImplConfig) -> None: if not config.base_url: - raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.") + raise ValueError( + "You must provide a URL in config.yaml (or via the TGI_URL environment variable) to use TGI." + ) log.info(f"Initializing TGI client with url={config.base_url}") # Extract base URL without /v1 for HF client initialization base_url_str = str(config.base_url).rstrip("/") diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py index 6664ca36b..45d9176aa 100644 --- a/src/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py @@ -46,7 +46,7 @@ class VLLMInferenceAdapter(OpenAIMixin): async def initialize(self) -> None: if not self.config.base_url: raise ValueError( - "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM." + "You must provide a URL in config.yaml (or via the VLLM_URL environment variable) to use vLLM." ) async def health(self) -> HealthResponse: diff --git a/tests/README.md b/tests/README.md index c00829d3e..5cf9d95af 100644 --- a/tests/README.md +++ b/tests/README.md @@ -35,7 +35,7 @@ For running integration tests, you must provide a few things: - **`server:`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running. - **`server::`** - same as above but with a custom port (e.g., `server:starter:8322`) - a URL which points to a Llama Stack distribution server - - a distribution name (e.g., `starter`) or a path to a `run.yaml` file + - a distribution name (e.g., `starter`) or a path to a `config.yaml` file - a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface. - Any API keys you need to use should be set in the environment, or can be passed in with the --env option. diff --git a/tests/backward_compat/test_run_config.py b/tests/backward_compat/test_run_config.py index ccc18c84f..bd832df10 100644 --- a/tests/backward_compat/test_run_config.py +++ b/tests/backward_compat/test_run_config.py @@ -5,10 +5,10 @@ # the root directory of this source tree. """ -Backward compatibility test for run.yaml files. +Backward compatibility test for config.yaml files. This test ensures that changes to StackRunConfig don't break -existing run.yaml files from previous versions. +existing config.yaml files from previous versions. """ import os @@ -36,10 +36,10 @@ def get_test_configs(): else: # Local mode: test current distribution configs repo_root = Path(__file__).parent.parent.parent - config_files = sorted((repo_root / "src" / "llama_stack" / "distributions").glob("*/run.yaml")) + config_files = sorted((repo_root / "src" / "llama_stack" / "distributions").glob("*/config.yaml")) if not config_files: - pytest.skip("No run.yaml files found in distributions/") + pytest.skip("No config.yaml files found in distributions/") return config_files diff --git a/tests/integration/README.md b/tests/integration/README.md index 3559b785c..48f511261 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -25,7 +25,7 @@ Here are the most important options: - **`server:`** - automatically start a server with the given config (e.g., `server:starter`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running. - **`server::`** - same as above but with a custom port (e.g., `server:starter:8322`) - a URL which points to a Llama Stack distribution server - - a distribution name (e.g., `starter`) or a path to a `run.yaml` file + - a distribution name (e.g., `starter`) or a path to a `config.yaml` file - a comma-separated list of api=provider pairs, e.g. `inference=ollama,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface. - `--env`: set environment variables, e.g. --env KEY=value. this is a utility option to set environment variables required by various providers. diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 0d0af687f..9854eedc6 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -151,7 +151,7 @@ def pytest_addoption(parser): """ a 'pointer' to the stack. this can be either be: (a) a template name like `starter`, or - (b) a path to a run.yaml file, or + (b) a path to a config.yaml file, or (c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or (d) a server config like `server:ci-tests`, or (e) a docker config like `docker:ci-tests` (builds and runs container) diff --git a/tests/integration/test_persistence_integration.py b/tests/integration/test_persistence_integration.py index ff42b451f..1de737db2 100644 --- a/tests/integration/test_persistence_integration.py +++ b/tests/integration/test_persistence_integration.py @@ -17,7 +17,7 @@ from llama_stack.core.storage.datatypes import ( def test_starter_distribution_config_loads_and_resolves(): """Integration: Actual starter config should parse and have correct storage structure.""" - with open("llama_stack/distributions/starter/run.yaml") as f: + with open("llama_stack/distributions/starter/config.yaml") as f: config_dict = yaml.safe_load(f) config = StackConfig(**config_dict) @@ -47,7 +47,7 @@ def test_starter_distribution_config_loads_and_resolves(): def test_postgres_demo_distribution_config_loads(): """Integration: Postgres demo should use Postgres backend for all stores.""" - with open("llama_stack/distributions/postgres-demo/run.yaml") as f: + with open("llama_stack/distributions/postgres-demo/config.yaml") as f: config_dict = yaml.safe_load(f) config = StackConfig(**config_dict) diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py index 6aefac003..a82d30805 100644 --- a/tests/unit/cli/test_stack_config.py +++ b/tests/unit/cli/test_stack_config.py @@ -302,7 +302,7 @@ def test_providers_flag_generates_config_with_api_keys(): # Read the generated config file from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR - config_file = DISTRIBS_BASE_DIR / "providers-run" / "run.yaml" + config_file = DISTRIBS_BASE_DIR / "providers-run" / "config.yaml" with open(config_file) as f: config_dict = yaml.safe_load(f) diff --git a/tests/unit/distribution/test_stack_list.py b/tests/unit/distribution/test_stack_list.py index 725ce3410..7a51ee7e6 100644 --- a/tests/unit/distribution/test_stack_list.py +++ b/tests/unit/distribution/test_stack_list.py @@ -32,7 +32,7 @@ def mock_distribs_base_dir(tmp_path): starter_custom = custom_dir / "starter" starter_custom.mkdir() (starter_custom / "starter-build.yaml").write_text("# build config") - (starter_custom / "starter-run.yaml").write_text("# run config") + (starter_custom / "starter-config.yaml").write_text("# run config") return custom_dir @@ -48,7 +48,7 @@ def mock_distro_dir(tmp_path): distro_path = distro_dir / distro_name distro_path.mkdir() (distro_path / "build.yaml").write_text("# build config") - (distro_path / "run.yaml").write_text("# run config") + (distro_path / "config.yaml").write_text("# run config") return distro_dir From f05d5138e867987f9ab22b4e4ee85edf2987d8ec Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Wed, 26 Nov 2025 15:52:21 -0500 Subject: [PATCH 06/14] fix: rename some more usages change some more run.yaml into config.yaml references, alter some parameter names, etc Signed-off-by: Charlie Doern --- .github/workflows/test-external.yml | 6 +++--- docs/docs/concepts/apis/external.mdx | 4 ++-- src/llama_stack/cli/stack/_list_deps.py | 10 +++++----- src/llama_stack/core/conversations/conversations.py | 4 ++-- src/llama_stack/core/inspect.py | 8 ++++---- src/llama_stack/core/prompts/prompts.py | 4 ++-- src/llama_stack/core/providers.py | 4 ++-- src/llama_stack/core/stack.py | 10 +++++----- tests/external/{run-byoa.yaml => config.yaml} | 0 tests/unit/prompts/prompts/conftest.py | 2 +- 10 files changed, 26 insertions(+), 26 deletions(-) rename tests/external/{run-byoa.yaml => config.yaml} (100%) diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml index fed3967ee..02544a9cd 100644 --- a/.github/workflows/test-external.yml +++ b/.github/workflows/test-external.yml @@ -44,14 +44,14 @@ jobs: - name: Print distro dependencies run: | - uv run --no-sync llama stack list-deps tests/external/run-byoa.yaml + uv run --no-sync llama stack list-deps tests/external/config.yaml - name: Build distro from config file run: | uv venv ci-test source ci-test/bin/activate uv pip install -e . - LLAMA_STACK_LOGGING=all=CRITICAL llama stack list-deps tests/external/run-byoa.yaml | xargs -L1 uv pip install + LLAMA_STACK_LOGGING=all=CRITICAL llama stack list-deps tests/external/config.yaml | xargs -L1 uv pip install - name: Start Llama Stack server in background if: ${{ matrix.image-type }} == 'venv' @@ -62,7 +62,7 @@ jobs: # Use the virtual environment created by the build step (name comes from build config) source ci-test/bin/activate uv pip list - nohup llama stack run tests/external/run-byoa.yaml > server.log 2>&1 & + nohup llama stack run tests/external/config.yaml > server.log 2>&1 & - name: Wait for Llama Stack server to be ready run: | diff --git a/docs/docs/concepts/apis/external.mdx b/docs/docs/concepts/apis/external.mdx index 005b85647..4c2c92410 100644 --- a/docs/docs/concepts/apis/external.mdx +++ b/docs/docs/concepts/apis/external.mdx @@ -337,7 +337,7 @@ uv pip install -e . 7. Configure Llama Stack to use the provider: ```yaml -# ~/.llama/run-byoa.yaml +# ~/.llama/config.yaml version: "2" image_name: "llama-stack-api-weather" apis: @@ -356,7 +356,7 @@ server: 8. Run the server: ```bash -llama stack run ~/.llama/run-byoa.yaml +llama stack run ~/.llama/config.yaml ``` 9. Test the API: diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py index 80b67ce62..dbfb1bb51 100644 --- a/src/llama_stack/cli/stack/_list_deps.py +++ b/src/llama_stack/cli/stack/_list_deps.py @@ -78,7 +78,7 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None: with open(config_file) as f: try: contents = yaml.safe_load(f) - run_config = StackConfig(**contents) + config = StackConfig(**contents) except Exception as e: cprint( f"Could not parse config file {config_file}: {e}", @@ -119,16 +119,16 @@ def run_stack_list_deps_command(args: argparse.Namespace) -> None: file=sys.stderr, ) sys.exit(1) - run_config = StackConfig(providers=provider_list, image_name="providers-run") + config = StackConfig(providers=provider_list, image_name="providers-run") - normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(run_config) + normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(config) normal_deps += SERVER_DEPENDENCIES # Add external API dependencies - if run_config.external_apis_dir: + if config.external_apis_dir: from llama_stack.core.external import load_external_apis - external_apis = load_external_apis(run_config) + external_apis = load_external_apis(config) if external_apis: for _, api_spec in external_apis.items(): normal_deps.extend(api_spec.pip_packages) diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py index 3e867721e..812156e09 100644 --- a/src/llama_stack/core/conversations/conversations.py +++ b/src/llama_stack/core/conversations/conversations.py @@ -36,7 +36,7 @@ class ConversationServiceConfig(BaseModel): :param policy: Access control rules """ - run_config: StackConfig + config: StackConfig policy: list[AccessRule] = [] @@ -56,7 +56,7 @@ class ConversationServiceImpl(Conversations): self.policy = config.policy # Use conversations store reference from run config - conversations_ref = config.run_config.storage.stores.conversations + conversations_ref = config.config.storage.stores.conversations if not conversations_ref: raise ValueError("storage.stores.conversations must be configured in run config") diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py index 3b60027f0..f14326f2d 100644 --- a/src/llama_stack/core/inspect.py +++ b/src/llama_stack/core/inspect.py @@ -22,7 +22,7 @@ from llama_stack_api import ( class DistributionInspectConfig(BaseModel): - run_config: StackConfig + config: StackConfig async def get_provider_impl(config, deps): @@ -40,7 +40,7 @@ class DistributionInspectImpl(Inspect): pass async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse: - run_config: StackConfig = self.config.run_config + config: StackConfig = self.config.config # Helper function to determine if a route should be included based on api_filter def should_include_route(webmethod) -> bool: @@ -55,7 +55,7 @@ class DistributionInspectImpl(Inspect): return not webmethod.deprecated and webmethod.level == api_filter ret = [] - external_apis = load_external_apis(run_config) + external_apis = load_external_apis(config) all_endpoints = get_all_api_routes(external_apis) for api, endpoints in all_endpoints.items(): # Always include provider and inspect APIs, filter others based on run config @@ -72,7 +72,7 @@ class DistributionInspectImpl(Inspect): ] ) else: - providers = run_config.providers.get(api.value, []) + providers = config.providers.get(api.value, []) if providers: # Only process if there are providers for this API ret.extend( [ diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py index 40539f342..44e560091 100644 --- a/src/llama_stack/core/prompts/prompts.py +++ b/src/llama_stack/core/prompts/prompts.py @@ -20,7 +20,7 @@ class PromptServiceConfig(BaseModel): :param run_config: Stack run configuration containing distribution info """ - run_config: StackConfig + config: StackConfig async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]): @@ -40,7 +40,7 @@ class PromptServiceImpl(Prompts): async def initialize(self) -> None: # Use prompts store reference from run config - prompts_ref = self.config.run_config.storage.stores.prompts + prompts_ref = self.config.config.storage.stores.prompts if not prompts_ref: raise ValueError("storage.stores.prompts must be configured in run config") self.kvstore = await kvstore_impl(prompts_ref) diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py index 1f0ecae6f..c758c65bc 100644 --- a/src/llama_stack/core/providers.py +++ b/src/llama_stack/core/providers.py @@ -19,7 +19,7 @@ logger = get_logger(name=__name__, category="core") class ProviderImplConfig(BaseModel): - run_config: StackConfig + config: StackConfig async def get_provider_impl(config, deps): @@ -41,7 +41,7 @@ class ProviderImpl(Providers): pass async def list_providers(self) -> ListProvidersResponse: - run_config = self.config.run_config + run_config = self.config safe_config = StackConfig(**redact_sensitive_fields(run_config.model_dump())) providers_health = await self.get_providers_health() ret = [] diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index 96f9eb8b9..9310bce41 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -341,7 +341,7 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]: return config_dict -def add_internal_implementations(impls: dict[Api, Any], run_config: StackConfig) -> None: +def add_internal_implementations(impls: dict[Api, Any], config: StackConfig) -> None: """Add internal implementations (inspect and providers) to the implementations dictionary. Args: @@ -349,25 +349,25 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackConfig) run_config: Stack run configuration """ inspect_impl = DistributionInspectImpl( - DistributionInspectConfig(run_config=run_config), + DistributionInspectConfig(config=config), deps=impls, ) impls[Api.inspect] = inspect_impl providers_impl = ProviderImpl( - ProviderImplConfig(run_config=run_config), + ProviderImplConfig(config=config), deps=impls, ) impls[Api.providers] = providers_impl prompts_impl = PromptServiceImpl( - PromptServiceConfig(run_config=run_config), + PromptServiceConfig(config=config), deps=impls, ) impls[Api.prompts] = prompts_impl conversations_impl = ConversationServiceImpl( - ConversationServiceConfig(run_config=run_config), + ConversationServiceConfig(config=config), deps=impls, ) impls[Api.conversations] = conversations_impl diff --git a/tests/external/run-byoa.yaml b/tests/external/config.yaml similarity index 100% rename from tests/external/run-byoa.yaml rename to tests/external/config.yaml diff --git a/tests/unit/prompts/prompts/conftest.py b/tests/unit/prompts/prompts/conftest.py index 8ed5b429a..bd3fcc785 100644 --- a/tests/unit/prompts/prompts/conftest.py +++ b/tests/unit/prompts/prompts/conftest.py @@ -47,7 +47,7 @@ async def temp_prompt_store(tmp_path_factory): providers={}, storage=storage, ) - config = PromptServiceConfig(run_config=mock_run_config) + config = PromptServiceConfig(config=mock_run_config) store = PromptServiceImpl(config, deps={}) register_kvstore_backends({"kv_test": storage.backends["kv_test"]}) From 0424afb7ed6cc9d0f98aa7d10b1fae75baea8fe2 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Fri, 21 Nov 2025 12:06:47 -0500 Subject: [PATCH 07/14] feat: remove BuildConfig and its usage Signed-off-by: Charlie Doern --- src/llama_stack/cli/stack/utils.py | 127 ---------------------- src/llama_stack/core/build.py | 2 +- src/llama_stack/core/datatypes.py | 32 ------ src/llama_stack/core/distribution.py | 13 +-- src/llama_stack/core/external.py | 4 +- src/llama_stack/distributions/template.py | 33 ------ 6 files changed, 9 insertions(+), 202 deletions(-) diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py index e02448e04..51e92f3df 100644 --- a/src/llama_stack/cli/stack/utils.py +++ b/src/llama_stack/cli/stack/utils.py @@ -4,36 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import json -import sys from enum import Enum -from functools import lru_cache from pathlib import Path -import yaml -from termcolor import cprint - -from llama_stack.core.datatypes import ( - BuildConfig, - Provider, - StackConfig, - StorageConfig, -) -from llama_stack.core.distribution import get_provider_registry -from llama_stack.core.resolver import InvalidProviderError -from llama_stack.core.storage.datatypes import ( - InferenceStoreReference, - KVStoreReference, - ServerStoresConfig, - SqliteKVStoreConfig, - SqliteSqlStoreConfig, - SqlStoreReference, -) -from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR -from llama_stack.core.utils.dynamic import instantiate_class_type -from llama_stack.core.utils.image_types import LlamaStackImageType -from llama_stack_api import Api - TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions" @@ -49,103 +22,3 @@ def print_subcommand_description(parser, subparsers): description = subcommand.description description_text += f" {name:<21} {description}\n" parser.epilog = description_text - - -def generate_run_config( - build_config: BuildConfig, - build_dir: Path, - image_name: str, -) -> Path: - """ - Generate a config.yaml template file for user to edit from a build.yaml file - """ - apis = list(build_config.distribution_spec.providers.keys()) - distro_dir = DISTRIBS_BASE_DIR / image_name - run_config = StackConfig( - container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None), - image_name=image_name, - apis=apis, - providers={}, - storage=StorageConfig( - backends={ - "kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")), - "sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")), - }, - stores=ServerStoresConfig( - metadata=KVStoreReference(backend="kv_default", namespace="registry"), - inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), - conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"), - ), - ), - external_providers_dir=build_config.external_providers_dir - if build_config.external_providers_dir - else EXTERNAL_PROVIDERS_DIR, - ) - # build providers dict - provider_registry = get_provider_registry(build_config) - for api in apis: - run_config.providers[api] = [] - providers = build_config.distribution_spec.providers[api] - - for provider in providers: - pid = provider.provider_type.split("::")[-1] - - p = provider_registry[Api(api)][provider.provider_type] - if p.deprecation_error: - raise InvalidProviderError(p.deprecation_error) - - try: - config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class) - except (ModuleNotFoundError, ValueError) as exc: - # HACK ALERT: - # This code executes after building is done, the import cannot work since the - # package is either available in the venv or container - not available on the host. - # TODO: use a "is_external" flag in ProviderSpec to check if the provider is - # external - cprint( - f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}", - color="yellow", - file=sys.stderr, - ) - # Set config_type to None to avoid UnboundLocalError - config_type = None - - if config_type is not None and hasattr(config_type, "sample_run_config"): - config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}") - else: - config = {} - - p_spec = Provider( - provider_id=pid, - provider_type=provider.provider_type, - config=config, - module=provider.module, - ) - run_config.providers[api].append(p_spec) - - run_config_file = build_dir / f"{image_name}-config.yaml" - - with open(run_config_file, "w") as f: - to_write = json.loads(run_config.model_dump_json()) - f.write(yaml.dump(to_write, sort_keys=False)) - - # Only print this message for non-container builds since it will be displayed before the - # container is built - # For non-container builds, the config.yaml is generated at the very end of the build process so it - # makes sense to display this message - if build_config.image_type != LlamaStackImageType.CONTAINER.value: - cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr) - return run_config_file - - -@lru_cache -def available_templates_specs() -> dict[str, BuildConfig]: - import yaml - - template_specs = {} - for p in TEMPLATES_PATH.rglob("*build.yaml"): - template_name = p.parent.name - with open(p) as f: - build_config = BuildConfig(**yaml.safe_load(f)) - template_specs[template_name] = build_config - return template_specs diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py index 4e6ccc9f7..6c53e1439 100644 --- a/src/llama_stack/core/build.py +++ b/src/llama_stack/core/build.py @@ -46,7 +46,7 @@ def get_provider_dependencies( deps = [] external_provider_deps = [] - registry = get_provider_registry(config) + registry = get_provider_registry(config, True) for api_str, provider_or_providers in providers.items(): providers_for_api = registry[Api(api_str)] diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index 5ab2b43dc..05f1d3690 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -607,35 +607,3 @@ can be instantiated multiple times (with different configs) if necessary. _ensure_backend(stores.responses, sql_backends, "storage.stores.responses") _ensure_backend(stores.prompts, kv_backends, "storage.stores.prompts") return self - - -class BuildConfig(BaseModel): - version: int = LLAMA_STACK_BUILD_CONFIG_VERSION - - distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ") - image_type: str = Field( - default="venv", - description="Type of package to build (container | venv)", - ) - image_name: str | None = Field( - default=None, - description="Name of the distribution to build", - ) - external_providers_dir: Path | None = Field( - default=None, - description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. " - "pip_packages MUST contain the provider package name.", - ) - external_apis_dir: Path | None = Field( - default=None, - description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.", - ) - - @field_validator("external_providers_dir") - @classmethod - def validate_external_providers_dir(cls, v): - if v is None: - return None - if isinstance(v, str): - return Path(v) - return v diff --git a/src/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py index 658c75ef2..554b99ddb 100644 --- a/src/llama_stack/core/distribution.py +++ b/src/llama_stack/core/distribution.py @@ -12,7 +12,7 @@ from typing import Any import yaml from pydantic import BaseModel -from llama_stack.core.datatypes import BuildConfig, DistributionSpec +from llama_stack.core.datatypes import StackConfig from llama_stack.core.external import load_external_apis from llama_stack.log import get_logger from llama_stack_api import ( @@ -85,7 +85,9 @@ def _load_inline_provider_spec(spec_data: dict[str, Any], api: Api, provider_nam return spec -def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]: +def get_provider_registry( + config: StackConfig | None = None, building: bool = False +) -> dict[Api, dict[str, ProviderSpec]]: """Get the provider registry, optionally including external providers. This function loads both built-in providers and external providers from YAML files or from their provided modules. @@ -161,7 +163,7 @@ def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]: registry = get_external_providers_from_module( registry=registry, config=config, - building=(isinstance(config, BuildConfig) or isinstance(config, DistributionSpec)), + building=building, ) return registry @@ -223,10 +225,7 @@ def get_external_providers_from_module( registry: dict[Api, dict[str, ProviderSpec]], config, building: bool ) -> dict[Api, dict[str, ProviderSpec]]: provider_list = None - if isinstance(config, BuildConfig): - provider_list = config.distribution_spec.providers.items() - else: - provider_list = config.providers.items() + provider_list = config.providers.items() if provider_list is None: logger.warning("Could not get list of providers from config") return registry diff --git a/src/llama_stack/core/external.py b/src/llama_stack/core/external.py index aa2a0c2c9..94f8d7525 100644 --- a/src/llama_stack/core/external.py +++ b/src/llama_stack/core/external.py @@ -7,14 +7,14 @@ import yaml -from llama_stack.core.datatypes import BuildConfig, StackConfig +from llama_stack.core.datatypes import StackConfig from llama_stack.log import get_logger from llama_stack_api import Api, ExternalApiSpec logger = get_logger(name=__name__, category="core") -def load_external_apis(config: StackConfig | BuildConfig | None) -> dict[Api, ExternalApiSpec]: +def load_external_apis(config: StackConfig | None) -> dict[Api, ExternalApiSpec]: """Load external API specifications from the configured directory. Args: diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py index a8e5f2839..afeb8ee92 100644 --- a/src/llama_stack/distributions/template.py +++ b/src/llama_stack/distributions/template.py @@ -16,10 +16,8 @@ from llama_stack.core.datatypes import ( LLAMA_STACK_RUN_CONFIG_VERSION, Api, BenchmarkInput, - BuildConfig, BuildProvider, DatasetInput, - DistributionSpec, ModelInput, Provider, SafetyConfig, @@ -38,7 +36,6 @@ from llama_stack.core.storage.datatypes import ( from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig from llama_stack.core.utils.dynamic import instantiate_class_type -from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack_api import DatasetPurpose, ModelType @@ -320,28 +317,6 @@ class DistributionTemplate(BaseModel): available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None - def build_config(self) -> BuildConfig: - # Create minimal providers for build config (without runtime configs) - build_providers = {} - for api, providers in self.providers.items(): - build_providers[api] = [] - for provider in providers: - # Create a minimal build provider object with only essential build information - build_provider = BuildProvider( - provider_type=provider.provider_type, - module=provider.module, - ) - build_providers[api].append(build_provider) - - return BuildConfig( - distribution_spec=DistributionSpec( - description=self.description, - container_image=self.container_image, - providers=build_providers, - ), - image_type=LlamaStackImageType.VENV.value, # default to venv - ) - def generate_markdown_docs(self) -> str: providers_table = "| API | Provider(s) |\n" providers_table += "|-----|-------------|\n" @@ -413,14 +388,6 @@ class DistributionTemplate(BaseModel): for output_dir in [yaml_output_dir, doc_output_dir]: output_dir.mkdir(parents=True, exist_ok=True) - build_config = self.build_config() - with open(yaml_output_dir / "build.yaml", "w") as f: - yaml.safe_dump( - filter_empty_values(build_config.model_dump(exclude_none=True)), - f, - sort_keys=False, - ) - for yaml_pth, settings in self.run_configs.items(): run_config = settings.run_config(self.name, self.providers, self.container_image) with open(yaml_output_dir / yaml_pth, "w") as f: From 7a57957f13e2c5ea3cc87edb22b90f605cd87ffa Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Mon, 1 Dec 2025 11:10:05 -0500 Subject: [PATCH 08/14] fix: fixup config_resolution.py fix some config resolution logic that still had "Modes" (build or run) and was using run.yaml Signed-off-by: Charlie Doern --- .github/workflows/backward-compat.yml | 2 +- src/llama_stack/cli/stack/_list_deps.py | 4 +-- src/llama_stack/cli/stack/run.py | 8 ++--- src/llama_stack/core/server/server.py | 4 +-- .../core/utils/config_resolution.py | 34 +++++++------------ 5 files changed, 22 insertions(+), 30 deletions(-) diff --git a/.github/workflows/backward-compat.yml b/.github/workflows/backward-compat.yml index 199c37a6b..1eab4c2e5 100644 --- a/.github/workflows/backward-compat.yml +++ b/.github/workflows/backward-compat.yml @@ -1,6 +1,6 @@ name: Backward Compatibility Check -run-name: Check backward compatibility for config.yaml configs +run-name: Check backward compatibility for config.yaml files on: pull_request: diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py index dbfb1bb51..fb1680259 100644 --- a/src/llama_stack/cli/stack/_list_deps.py +++ b/src/llama_stack/cli/stack/_list_deps.py @@ -64,9 +64,9 @@ def format_output_deps_only( def run_stack_list_deps_command(args: argparse.Namespace) -> None: if args.config: try: - from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro + from llama_stack.core.utils.config_resolution import resolve_config_or_distro - config_file = resolve_config_or_distro(args.config, Mode.RUN) + config_file = resolve_config_or_distro(args.config) except ValueError as e: cprint( f"Could not parse config file {args.config}: {e}", diff --git a/src/llama_stack/cli/stack/run.py b/src/llama_stack/cli/stack/run.py index 247ae666d..9bd5b269e 100644 --- a/src/llama_stack/cli/stack/run.py +++ b/src/llama_stack/cli/stack/run.py @@ -30,7 +30,7 @@ from llama_stack.core.storage.datatypes import ( StorageConfig, ) from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR -from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro +from llama_stack.core.utils.config_resolution import resolve_config_or_distro from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import LoggingConfig, get_logger @@ -108,9 +108,9 @@ class StackRun(Subcommand): if args.config: try: - from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro + from llama_stack.core.utils.config_resolution import resolve_config_or_distro - config_file = resolve_config_or_distro(args.config, Mode.RUN) + config_file = resolve_config_or_distro(args.config) except ValueError as e: self.parser.error(str(e)) elif args.providers: @@ -187,7 +187,7 @@ class StackRun(Subcommand): if not config_file: self.parser.error("Config file is required") - config_file = resolve_config_or_distro(str(config_file), Mode.RUN) + config_file = resolve_config_or_distro(str(config_file)) with open(config_file) as fp: config_contents = yaml.safe_load(fp) if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")): diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py index 0ab7ce260..7c84ee56f 100644 --- a/src/llama_stack/core/server/server.py +++ b/src/llama_stack/core/server/server.py @@ -53,7 +53,7 @@ from llama_stack.core.stack import ( from llama_stack.core.telemetry import Telemetry from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, setup_logger from llama_stack.core.utils.config import redact_sensitive_fields -from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro +from llama_stack.core.utils.config_resolution import resolve_config_or_distro from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.log import LoggingConfig, get_logger, setup_logging from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError @@ -374,7 +374,7 @@ def create_app() -> StackApp: if config_file is None: raise ValueError("LLAMA_STACK_CONFIG environment variable is required") - config_file = resolve_config_or_distro(config_file, Mode.RUN) + config_file = resolve_config_or_distro(config_file) # Load and process configuration logger_config = None diff --git a/src/llama_stack/core/utils/config_resolution.py b/src/llama_stack/core/utils/config_resolution.py index 2a85837b6..438f6f0ed 100644 --- a/src/llama_stack/core/utils/config_resolution.py +++ b/src/llama_stack/core/utils/config_resolution.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import StrEnum from pathlib import Path from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR @@ -16,21 +15,14 @@ logger = get_logger(name=__name__, category="core") DISTRO_DIR = Path(__file__).parent.parent.parent.parent / "llama_stack" / "distributions" -class Mode(StrEnum): - RUN = "run" - BUILD = "build" - - def resolve_config_or_distro( config_or_distro: str, - mode: Mode = Mode.RUN, ) -> Path: """ Resolve a config/distro argument to a concrete config file path. Args: config_or_distro: User input (file path, distribution name, or built distribution) - mode: Mode resolving for ("run", "build", "server") Returns: Path to the resolved config file @@ -47,7 +39,7 @@ def resolve_config_or_distro( # Strategy 2: Try as distribution name (if no .yaml extension) if not config_or_distro.endswith(".yaml"): - distro_config = _get_distro_config_path(config_or_distro, mode) + distro_config = _get_distro_config_path(config_or_distro) if distro_config.exists(): logger.debug(f"Using distribution: {distro_config}") return distro_config @@ -63,34 +55,34 @@ def resolve_config_or_distro( return distro_config # Strategy 4: Try as built distribution name - distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml" + distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-config.yaml" if distrib_config.exists(): logger.debug(f"Using built distribution: {distrib_config}") return distrib_config - distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml" + distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / "config.yaml" if distrib_config.exists(): logger.debug(f"Using built distribution: {distrib_config}") return distrib_config # Strategy 5: Failed - provide helpful error - raise ValueError(_format_resolution_error(config_or_distro, mode)) + raise ValueError(_format_resolution_error(config_or_distro)) -def _get_distro_config_path(distro_name: str, mode: str) -> Path: +def _get_distro_config_path(distro_name: str, path: str | None = None) -> Path: """Get the config file path for a distro.""" - if not mode.endswith(".yaml"): - mode = f"{mode}.yaml" - return DISTRO_DIR / distro_name / mode + if not path or not path.endswith(".yaml"): + path = "config.yaml" + return DISTRO_DIR / distro_name / path -def _format_resolution_error(config_or_distro: str, mode: Mode) -> str: +def _format_resolution_error(config_or_distro: str) -> str: """Format a helpful error message for resolution failures.""" from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR - distro_path = _get_distro_config_path(config_or_distro, mode) - distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml" - distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml" + distro_path = _get_distro_config_path(config_or_distro) + distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-config.yaml" + distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-config.yaml" available_distros = _get_available_distros() distros_str = ", ".join(available_distros) if available_distros else "none found" @@ -111,7 +103,7 @@ Did you mean one of these distributions? def _get_available_distros() -> list[str]: """Get list of available distro names.""" - if not DISTRO_DIR.exists() and not DISTRIBS_BASE_DIR.exists(): + if not DISTRO_DIR.exists() or not DISTRIBS_BASE_DIR.exists(): return [] return list( From ec1a308dac5cf9c96befc102090ec1f6de8c17c2 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Mon, 1 Dec 2025 11:49:41 -0500 Subject: [PATCH 09/14] fix: remove all build.yaml files Signed-off-by: Charlie Doern --- .../distributions/ci-tests/build.yaml | 55 ------------------ src/llama_stack/distributions/dell/build.yaml | 30 ---------- .../meta-reference-gpu/build.yaml | 29 ---------- .../distributions/nvidia/build.yaml | 26 --------- src/llama_stack/distributions/oci/build.yaml | 32 ----------- .../distributions/open-benchmark/build.yaml | 33 ----------- .../distributions/starter-gpu/build.yaml | 56 ------------------- .../distributions/starter/build.yaml | 56 ------------------- .../distributions/watsonx/build.yaml | 30 ---------- tests/external/build.yaml | 10 ---- tests/external/ramalama-stack/build.yaml | 10 ---- 11 files changed, 367 deletions(-) delete mode 100644 src/llama_stack/distributions/ci-tests/build.yaml delete mode 100644 src/llama_stack/distributions/dell/build.yaml delete mode 100644 src/llama_stack/distributions/meta-reference-gpu/build.yaml delete mode 100644 src/llama_stack/distributions/nvidia/build.yaml delete mode 100644 src/llama_stack/distributions/oci/build.yaml delete mode 100644 src/llama_stack/distributions/open-benchmark/build.yaml delete mode 100644 src/llama_stack/distributions/starter-gpu/build.yaml delete mode 100644 src/llama_stack/distributions/starter/build.yaml delete mode 100644 src/llama_stack/distributions/watsonx/build.yaml delete mode 100644 tests/external/build.yaml delete mode 100644 tests/external/ramalama-stack/build.yaml diff --git a/src/llama_stack/distributions/ci-tests/build.yaml b/src/llama_stack/distributions/ci-tests/build.yaml deleted file mode 100644 index a4a9b6354..000000000 --- a/src/llama_stack/distributions/ci-tests/build.yaml +++ /dev/null @@ -1,55 +0,0 @@ -version: 2 -distribution_spec: - description: CI tests for Llama Stack - providers: - inference: - - provider_type: remote::cerebras - - provider_type: remote::ollama - - provider_type: remote::vllm - - provider_type: remote::tgi - - provider_type: remote::fireworks - - provider_type: remote::together - - provider_type: remote::bedrock - - provider_type: remote::nvidia - - provider_type: remote::openai - - provider_type: remote::anthropic - - provider_type: remote::gemini - - provider_type: remote::vertexai - - provider_type: remote::groq - - provider_type: remote::sambanova - - provider_type: remote::azure - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - - provider_type: inline::sqlite-vec - - provider_type: inline::milvus - - provider_type: remote::chromadb - - provider_type: remote::pgvector - - provider_type: remote::qdrant - - provider_type: remote::weaviate - files: - - provider_type: inline::localfs - safety: - - provider_type: inline::llama-guard - - provider_type: inline::code-scanner - agents: - - provider_type: inline::meta-reference - post_training: - - provider_type: inline::torchtune-cpu - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - batches: - - provider_type: inline::reference -image_type: venv diff --git a/src/llama_stack/distributions/dell/build.yaml b/src/llama_stack/distributions/dell/build.yaml deleted file mode 100644 index 2c809e0fe..000000000 --- a/src/llama_stack/distributions/dell/build.yaml +++ /dev/null @@ -1,30 +0,0 @@ -version: 2 -distribution_spec: - description: Dell's distribution of Llama Stack. TGI inference via Dell's custom - container - providers: - inference: - - provider_type: remote::tgi - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - - provider_type: remote::chromadb - - provider_type: remote::pgvector - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime -image_type: venv diff --git a/src/llama_stack/distributions/meta-reference-gpu/build.yaml b/src/llama_stack/distributions/meta-reference-gpu/build.yaml deleted file mode 100644 index 62a845b83..000000000 --- a/src/llama_stack/distributions/meta-reference-gpu/build.yaml +++ /dev/null @@ -1,29 +0,0 @@ -version: 2 -distribution_spec: - description: Use Meta Reference for running LLM inference - providers: - inference: - - provider_type: inline::meta-reference - vector_io: - - provider_type: inline::faiss - - provider_type: remote::chromadb - - provider_type: remote::pgvector - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol -image_type: venv diff --git a/src/llama_stack/distributions/nvidia/build.yaml b/src/llama_stack/distributions/nvidia/build.yaml deleted file mode 100644 index 86e085575..000000000 --- a/src/llama_stack/distributions/nvidia/build.yaml +++ /dev/null @@ -1,26 +0,0 @@ -version: 2 -distribution_spec: - description: Use NVIDIA NIM for running LLM inference, evaluation and safety - providers: - inference: - - provider_type: remote::nvidia - vector_io: - - provider_type: inline::faiss - safety: - - provider_type: remote::nvidia - agents: - - provider_type: inline::meta-reference - eval: - - provider_type: remote::nvidia - post_training: - - provider_type: remote::nvidia - datasetio: - - provider_type: inline::localfs - - provider_type: remote::nvidia - scoring: - - provider_type: inline::basic - tool_runtime: - - provider_type: inline::rag-runtime - files: - - provider_type: inline::localfs -image_type: venv diff --git a/src/llama_stack/distributions/oci/build.yaml b/src/llama_stack/distributions/oci/build.yaml deleted file mode 100644 index ac9fe967d..000000000 --- a/src/llama_stack/distributions/oci/build.yaml +++ /dev/null @@ -1,32 +0,0 @@ -version: 2 -distribution_spec: - description: Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM - inference with scalable cloud services - providers: - inference: - - provider_type: remote::oci - vector_io: - - provider_type: inline::faiss - - provider_type: remote::chromadb - - provider_type: remote::pgvector - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - files: - - provider_type: inline::localfs -image_type: venv diff --git a/src/llama_stack/distributions/open-benchmark/build.yaml b/src/llama_stack/distributions/open-benchmark/build.yaml deleted file mode 100644 index 9613ef60d..000000000 --- a/src/llama_stack/distributions/open-benchmark/build.yaml +++ /dev/null @@ -1,33 +0,0 @@ -version: 2 -distribution_spec: - description: Distribution for running open benchmarks - providers: - inference: - - provider_type: remote::openai - - provider_type: remote::anthropic - - provider_type: remote::gemini - - provider_type: remote::groq - - provider_type: remote::together - vector_io: - - provider_type: inline::sqlite-vec - - provider_type: remote::chromadb - - provider_type: remote::pgvector - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol -image_type: venv diff --git a/src/llama_stack/distributions/starter-gpu/build.yaml b/src/llama_stack/distributions/starter-gpu/build.yaml deleted file mode 100644 index 57ba12dc4..000000000 --- a/src/llama_stack/distributions/starter-gpu/build.yaml +++ /dev/null @@ -1,56 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers. - This distribution is intended for GPU-enabled environments. - providers: - inference: - - provider_type: remote::cerebras - - provider_type: remote::ollama - - provider_type: remote::vllm - - provider_type: remote::tgi - - provider_type: remote::fireworks - - provider_type: remote::together - - provider_type: remote::bedrock - - provider_type: remote::nvidia - - provider_type: remote::openai - - provider_type: remote::anthropic - - provider_type: remote::gemini - - provider_type: remote::vertexai - - provider_type: remote::groq - - provider_type: remote::sambanova - - provider_type: remote::azure - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - - provider_type: inline::sqlite-vec - - provider_type: inline::milvus - - provider_type: remote::chromadb - - provider_type: remote::pgvector - - provider_type: remote::qdrant - - provider_type: remote::weaviate - files: - - provider_type: inline::localfs - safety: - - provider_type: inline::llama-guard - - provider_type: inline::code-scanner - agents: - - provider_type: inline::meta-reference - post_training: - - provider_type: inline::huggingface-gpu - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - batches: - - provider_type: inline::reference -image_type: venv diff --git a/src/llama_stack/distributions/starter/build.yaml b/src/llama_stack/distributions/starter/build.yaml deleted file mode 100644 index 9fe7ccd14..000000000 --- a/src/llama_stack/distributions/starter/build.yaml +++ /dev/null @@ -1,56 +0,0 @@ -version: 2 -distribution_spec: - description: Quick start template for running Llama Stack with several popular providers. - This distribution is intended for CPU-only environments. - providers: - inference: - - provider_type: remote::cerebras - - provider_type: remote::ollama - - provider_type: remote::vllm - - provider_type: remote::tgi - - provider_type: remote::fireworks - - provider_type: remote::together - - provider_type: remote::bedrock - - provider_type: remote::nvidia - - provider_type: remote::openai - - provider_type: remote::anthropic - - provider_type: remote::gemini - - provider_type: remote::vertexai - - provider_type: remote::groq - - provider_type: remote::sambanova - - provider_type: remote::azure - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - - provider_type: inline::sqlite-vec - - provider_type: inline::milvus - - provider_type: remote::chromadb - - provider_type: remote::pgvector - - provider_type: remote::qdrant - - provider_type: remote::weaviate - files: - - provider_type: inline::localfs - safety: - - provider_type: inline::llama-guard - - provider_type: inline::code-scanner - agents: - - provider_type: inline::meta-reference - post_training: - - provider_type: inline::torchtune-cpu - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - batches: - - provider_type: inline::reference -image_type: venv diff --git a/src/llama_stack/distributions/watsonx/build.yaml b/src/llama_stack/distributions/watsonx/build.yaml deleted file mode 100644 index 8fc27c8c8..000000000 --- a/src/llama_stack/distributions/watsonx/build.yaml +++ /dev/null @@ -1,30 +0,0 @@ -version: 2 -distribution_spec: - description: Use watsonx for running LLM inference - providers: - inference: - - provider_type: remote::watsonx - - provider_type: inline::sentence-transformers - vector_io: - - provider_type: inline::faiss - safety: - - provider_type: inline::llama-guard - agents: - - provider_type: inline::meta-reference - eval: - - provider_type: inline::meta-reference - datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs - scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust - tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol - files: - - provider_type: inline::localfs -image_type: venv diff --git a/tests/external/build.yaml b/tests/external/build.yaml deleted file mode 100644 index fde6cb178..000000000 --- a/tests/external/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -version: '2' -distribution_spec: - description: Custom distro for CI tests - providers: - weather: - - provider_type: remote::kaze -image_type: venv -image_name: ci-test -external_providers_dir: ~/.llama/providers.d -external_apis_dir: ~/.llama/apis.d diff --git a/tests/external/ramalama-stack/build.yaml b/tests/external/ramalama-stack/build.yaml deleted file mode 100644 index 4403339f1..000000000 --- a/tests/external/ramalama-stack/build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -version: 2 -distribution_spec: - description: Use (an external) Ramalama server for running LLM inference - container_image: null - providers: - inference: - - provider_type: remote::ramalama - module: ramalama_stack==0.3.0a0 -image_type: venv -image_name: ramalama-stack-test From 457482d7a067a5e5589e4785eeaae8ea8053c617 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Mon, 1 Dec 2025 13:51:25 -0500 Subject: [PATCH 10/14] fix: reference ProviderImpl config properly self.config.run_config -> self.config.config Signed-off-by: Charlie Doern --- src/llama_stack/core/providers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py index c758c65bc..85f2f9221 100644 --- a/src/llama_stack/core/providers.py +++ b/src/llama_stack/core/providers.py @@ -41,7 +41,7 @@ class ProviderImpl(Providers): pass async def list_providers(self) -> ListProvidersResponse: - run_config = self.config + run_config = self.config.config safe_config = StackConfig(**redact_sensitive_fields(run_config.model_dump())) providers_health = await self.get_providers_health() ret = [] From 94506126ae0456903b021d64d975bac650baf70c Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Mon, 1 Dec 2025 14:07:17 -0500 Subject: [PATCH 11/14] fix: tests should not use BuildConfig test_distribution and test_conversations were using BuildConfig and improper StackConfig semantics Signed-off-by: Charlie Doern --- .github/workflows/providers-build.yml | 6 +- .../unit/conversations/test_conversations.py | 8 +- tests/unit/distribution/test_distribution.py | 90 +++++++++---------- 3 files changed, 48 insertions(+), 56 deletions(-) diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index b4dae2c29..523a2d01c 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -113,7 +113,7 @@ jobs: - name: Build container image run: | - BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml) + BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/config.yaml) BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests" BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE" BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/config.yaml" @@ -155,11 +155,11 @@ jobs: run: | yq -i ' .distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest" - ' src/llama_stack/distributions/ci-tests/build.yaml + ' src/llama_stack/distributions/ci-tests/config.yaml - name: Build UBI9 container image run: | - BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml) + BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/config.yaml) BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests" BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE" BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/config.yaml" diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py index b481be63c..1aeb61a04 100644 --- a/tests/unit/conversations/test_conversations.py +++ b/tests/unit/conversations/test_conversations.py @@ -44,9 +44,9 @@ async def service(): ), ) register_sqlstore_backends({"sql_test": storage.backends["sql_test"]}) - run_config = StackConfig(image_name="test", apis=[], providers={}, storage=storage) + stack_config = StackConfig(image_name="test", apis=[], providers={}, storage=storage) - config = ConversationServiceConfig(run_config=run_config, policy=[]) + config = ConversationServiceConfig(config=stack_config, policy=[]) service = ConversationServiceImpl(config, {}) await service.initialize() yield service @@ -151,9 +151,9 @@ async def test_policy_configuration(): ), ) register_sqlstore_backends({"sql_test": storage.backends["sql_test"]}) - run_config = StackConfig(image_name="test", apis=[], providers={}, storage=storage) + stack_config = StackConfig(image_name="test", apis=[], providers={}, storage=storage) - config = ConversationServiceConfig(run_config=run_config, policy=restrictive_policy) + config = ConversationServiceConfig(config=stack_config, policy=restrictive_policy) service = ConversationServiceImpl(config, {}) await service.initialize() diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py index b8ff484a7..762d8219f 100644 --- a/tests/unit/distribution/test_distribution.py +++ b/tests/unit/distribution/test_distribution.py @@ -395,27 +395,25 @@ pip_packages: def test_external_provider_from_module_building(self, mock_providers): """Test loading an external provider from a module during build (building=True, partial spec).""" - from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec + from llama_stack.core.datatypes import StackConfig from llama_stack_api import Api - # No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec - build_config = BuildConfig( - version=2, - image_type="container", + # No importlib patch needed, should not import module when building + config = StackConfig( image_name="test_image", - distribution_spec=DistributionSpec( - description="test", - providers={ - "inference": [ - BuildProvider( - provider_type="external_test", - module="external_test", - ) - ] - }, - ), + apis=[], + providers={ + "inference": [ + Provider( + provider_id="external_test", + provider_type="external_test", + config={}, + module="external_test", + ) + ] + }, ) - registry = get_provider_registry(build_config) + registry = get_provider_registry(config, building=True) assert Api.inference in registry assert "external_test" in registry[Api.inference] provider = registry[Api.inference]["external_test"] @@ -492,31 +490,29 @@ class TestGetExternalProvidersFromModule: assert result[Api.inference]["versioned_test"].module == "versioned_test==1.0.0" def test_buildconfig_does_not_import_module(self, mock_providers): - """Test that BuildConfig does not import the module (building=True).""" - from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec + """Test that StackConfig does not import the module when building (building=True).""" + from llama_stack.core.datatypes import StackConfig from llama_stack.core.distribution import get_external_providers_from_module - build_config = BuildConfig( - version=2, - image_type="container", + config = StackConfig( image_name="test_image", - distribution_spec=DistributionSpec( - description="test", - providers={ - "inference": [ - BuildProvider( - provider_type="build_test", - module="build_test==1.0.0", - ) - ] - }, - ), + apis=[], + providers={ + "inference": [ + Provider( + provider_id="build_test", + provider_type="build_test", + config={}, + module="build_test==1.0.0", + ) + ] + }, ) # Should not call import_module at all when building with patch("importlib.import_module") as mock_import: registry = {Api.inference: {}} - result = get_external_providers_from_module(registry, build_config, building=True) + result = get_external_providers_from_module(registry, config, building=True) # Verify module was NOT imported mock_import.assert_not_called() @@ -530,28 +526,24 @@ class TestGetExternalProvidersFromModule: assert provider.api == Api.inference def test_buildconfig_multiple_providers(self, mock_providers): - """Test BuildConfig with multiple providers for the same API.""" - from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec + """Test StackConfig with multiple providers for the same API.""" + from llama_stack.core.datatypes import StackConfig from llama_stack.core.distribution import get_external_providers_from_module - build_config = BuildConfig( - version=2, - image_type="container", + config = StackConfig( image_name="test_image", - distribution_spec=DistributionSpec( - description="test", - providers={ - "inference": [ - BuildProvider(provider_type="provider1", module="provider1"), - BuildProvider(provider_type="provider2", module="provider2"), - ] - }, - ), + apis=[], + providers={ + "inference": [ + Provider(provider_id="provider1", provider_type="provider1", config={}, module="provider1"), + Provider(provider_id="provider2", provider_type="provider2", config={}, module="provider2"), + ] + }, ) with patch("importlib.import_module") as mock_import: registry = {Api.inference: {}} - result = get_external_providers_from_module(registry, build_config, building=True) + result = get_external_providers_from_module(registry, config, building=True) mock_import.assert_not_called() assert "provider1" in result[Api.inference] From b2ab5c28073e29f59eba875fc2c69ef853522b69 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Mon, 1 Dec 2025 14:26:35 -0500 Subject: [PATCH 12/14] fix: fix `llama stack list` llama stack list was still using build.yaml, convert cmd and tests to just use config.yaml Signed-off-by: Charlie Doern --- src/llama_stack/cli/stack/list_stacks.py | 16 +++++++--------- tests/unit/distribution/test_stack_list.py | 8 +++----- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/llama_stack/cli/stack/list_stacks.py b/src/llama_stack/cli/stack/list_stacks.py index 0153b3391..761332374 100644 --- a/src/llama_stack/cli/stack/list_stacks.py +++ b/src/llama_stack/cli/stack/list_stacks.py @@ -59,19 +59,17 @@ class StackListBuilds(Subcommand): print("No distributions found") return - headers = ["Stack Name", "Source", "Path", "Build Config", "Run Config"] + headers = ["Stack Name", "Source", "Path", "Config"] rows = [] for name, (path, source_type) in sorted(distributions.items()): row = [name, source_type, str(path)] - # Check for build and run config files - # For built-in distributions, configs are named build.yaml and config.yaml - # For custom distributions, configs are named {name}-build.yaml and {name}-config.yaml + # Check for config files + # For built-in distributions, configs are named config.yaml + # For custom distributions, configs are named {name}-config.yaml if source_type == "built-in": - build_config = "Yes" if (path / "build.yaml").exists() else "No" - run_config = "Yes" if (path / "config.yaml").exists() else "No" + config = "Yes" if (path / "config.yaml").exists() else "No" else: - build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No" - run_config = "Yes" if (path / f"{name}-config.yaml").exists() else "No" - row.extend([build_config, run_config]) + config = "Yes" if (path / f"{name}-config.yaml").exists() else "No" + row.extend([config]) rows.append(row) print_table(rows, headers, separate_rows=True) diff --git a/tests/unit/distribution/test_stack_list.py b/tests/unit/distribution/test_stack_list.py index 7a51ee7e6..eef39bbbc 100644 --- a/tests/unit/distribution/test_stack_list.py +++ b/tests/unit/distribution/test_stack_list.py @@ -31,8 +31,7 @@ def mock_distribs_base_dir(tmp_path): # Create a custom distribution starter_custom = custom_dir / "starter" starter_custom.mkdir() - (starter_custom / "starter-build.yaml").write_text("# build config") - (starter_custom / "starter-config.yaml").write_text("# run config") + (starter_custom / "starter-config.yaml").write_text("# config") return custom_dir @@ -47,8 +46,7 @@ def mock_distro_dir(tmp_path): for distro_name in ["starter", "nvidia", "dell"]: distro_path = distro_dir / distro_name distro_path.mkdir() - (distro_path / "build.yaml").write_text("# build config") - (distro_path / "config.yaml").write_text("# run config") + (distro_path / "config.yaml").write_text("# config") return distro_dir @@ -112,7 +110,7 @@ class TestStackList: # Add a hidden directory hidden_dir = mock_distro_dir / ".hidden" hidden_dir.mkdir() - (hidden_dir / "build.yaml").write_text("# build") + (hidden_dir / "config.yaml").write_text("# config") # Add a __pycache__ directory pycache_dir = mock_distro_dir / "__pycache__" From b064251f6a88fd8262a69b7970229d0c26122f3d Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Mon, 1 Dec 2025 14:27:05 -0500 Subject: [PATCH 13/14] fix: new provider docs remove reference of build.yaml from new provider docs Signed-off-by: Charlie Doern --- docs/docs/contributing/new_api_provider.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/contributing/new_api_provider.mdx b/docs/docs/contributing/new_api_provider.mdx index b2e6b4d18..511763fe3 100644 --- a/docs/docs/contributing/new_api_provider.mdx +++ b/docs/docs/contributing/new_api_provider.mdx @@ -14,7 +14,7 @@ This guide will walk you through the process of adding a new API provider to Lla - Begin by reviewing the [core concepts](../concepts/) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.) - Determine the provider type ([Remote](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote) or [Inline](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline)). Remote providers make requests to external services, while inline providers execute implementation locally. - Add your provider to the appropriate [Registry](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/registry/). Specify pip dependencies necessary. -- Update any distribution [Templates](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/) `build.yaml` and `config.yaml` files if they should include your provider by default. Run [./scripts/distro_codegen.py](https://github.com/meta-llama/llama-stack/blob/main/scripts/distro_codegen.py) if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation. +- Update any distribution [Templates](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/) `config.yaml` files if they should include your provider by default. Run [./scripts/distro_codegen.py](https://github.com/meta-llama/llama-stack/blob/main/scripts/distro_codegen.py) if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation. Here are some example PRs to help you get started: From 0f4790f531339544c63399eef34ce627f1f8a930 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Tue, 2 Dec 2025 11:59:14 -0500 Subject: [PATCH 14/14] fix: address review comments 1. building: bool is now listing: bool 2. self.config.conf is now self.stack_config Signed-off-by: Charlie Doern --- src/llama_stack/core/build.py | 2 +- src/llama_stack/core/distribution.py | 18 ++++---- src/llama_stack/core/inspect.py | 4 +- src/llama_stack/core/prompts/prompts.py | 4 +- src/llama_stack/core/providers.py | 4 +- tests/unit/distribution/test_distribution.py | 48 ++++++++++---------- 6 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py index 6c53e1439..52478472c 100644 --- a/src/llama_stack/core/build.py +++ b/src/llama_stack/core/build.py @@ -46,7 +46,7 @@ def get_provider_dependencies( deps = [] external_provider_deps = [] - registry = get_provider_registry(config, True) + registry = get_provider_registry(config=config, listing=True) for api_str, provider_or_providers in providers.items(): providers_for_api = registry[Api(api_str)] diff --git a/src/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py index 554b99ddb..97f1edcd5 100644 --- a/src/llama_stack/core/distribution.py +++ b/src/llama_stack/core/distribution.py @@ -86,7 +86,7 @@ def _load_inline_provider_spec(spec_data: dict[str, Any], api: Api, provider_nam def get_provider_registry( - config: StackConfig | None = None, building: bool = False + config: StackConfig | None = None, listing: bool = False ) -> dict[Api, dict[str, ProviderSpec]]: """Get the provider registry, optionally including external providers. @@ -111,13 +111,13 @@ def get_provider_registry( safety/ llama-guard.yaml - This method is overloaded in that it can be called from a variety of places: during build, during run, during stack construction. - So when building external providers from a module, there are scenarios where the pip package required to import the module might not be available yet. + This method is overloaded in that it can be called from a variety of places: during list-deps, during run, during stack construction. + So when listing external providers from a module, there are scenarios where the pip package required to import the module might not be available yet. There is special handling for all of the potential cases this method can be called from. Args: config: Optional object containing the external providers directory path - building: Optional bool delineating whether or not this is being called from a build process + listing: Optional bool delineating whether or not this is being called from a list-deps process Returns: A dictionary mapping APIs to their available providers @@ -163,7 +163,7 @@ def get_provider_registry( registry = get_external_providers_from_module( registry=registry, config=config, - building=building, + listing=listing, ) return registry @@ -222,7 +222,7 @@ def get_external_providers_from_dir( def get_external_providers_from_module( - registry: dict[Api, dict[str, ProviderSpec]], config, building: bool + registry: dict[Api, dict[str, ProviderSpec]], config, listing: bool ) -> dict[Api, dict[str, ProviderSpec]]: provider_list = None provider_list = config.providers.items() @@ -235,14 +235,14 @@ def get_external_providers_from_module( continue # get provider using module try: - if not building: + if not listing: package_name = provider.module.split("==")[0] module = importlib.import_module(f"{package_name}.provider") # if config class is wrong you will get an error saying module could not be imported spec = module.get_provider_spec() else: - # pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run - # in the case we are building we CANNOT import this module of course because it has not been installed. + # pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon list-deps and run + # in the case we are listing we CANNOT import this module of course because it has not been installed. spec = ProviderSpec( api=Api(provider_api), provider_type=provider.provider_type, diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py index f14326f2d..d6d89a82c 100644 --- a/src/llama_stack/core/inspect.py +++ b/src/llama_stack/core/inspect.py @@ -33,14 +33,14 @@ async def get_provider_impl(config, deps): class DistributionInspectImpl(Inspect): def __init__(self, config: DistributionInspectConfig, deps): - self.config = config + self.stack_config = config.config self.deps = deps async def initialize(self) -> None: pass async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse: - config: StackConfig = self.config.config + config: StackConfig = self.stack_config # Helper function to determine if a route should be included based on api_filter def should_include_route(webmethod) -> bool: diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py index 44e560091..f2a604b37 100644 --- a/src/llama_stack/core/prompts/prompts.py +++ b/src/llama_stack/core/prompts/prompts.py @@ -34,13 +34,13 @@ class PromptServiceImpl(Prompts): """Built-in prompt service implementation using KVStore.""" def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]): - self.config = config + self.stack_config = config.config self.deps = deps self.kvstore: KVStore async def initialize(self) -> None: # Use prompts store reference from run config - prompts_ref = self.config.config.storage.stores.prompts + prompts_ref = self.stack_config.storage.stores.prompts if not prompts_ref: raise ValueError("storage.stores.prompts must be configured in run config") self.kvstore = await kvstore_impl(prompts_ref) diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py index 85f2f9221..2514e8775 100644 --- a/src/llama_stack/core/providers.py +++ b/src/llama_stack/core/providers.py @@ -30,7 +30,7 @@ async def get_provider_impl(config, deps): class ProviderImpl(Providers): def __init__(self, config, deps): - self.config = config + self.stack_config = config.config self.deps = deps async def initialize(self) -> None: @@ -41,7 +41,7 @@ class ProviderImpl(Providers): pass async def list_providers(self) -> ListProvidersResponse: - run_config = self.config.config + run_config = self.stack_config safe_config = StackConfig(**redact_sensitive_fields(run_config.model_dump())) providers_health = await self.get_providers_health() ret = [] diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py index 762d8219f..4884f70ba 100644 --- a/tests/unit/distribution/test_distribution.py +++ b/tests/unit/distribution/test_distribution.py @@ -270,7 +270,7 @@ class TestProviderRegistry: external_providers_dir="/nonexistent/dir", ) with pytest.raises(FileNotFoundError): - get_provider_registry(config) + get_provider_registry(config=config) def test_empty_api_directory(self, api_directories, mock_providers, base_config): """Test handling of empty API directory.""" @@ -339,7 +339,7 @@ pip_packages: ] }, ) - registry = get_provider_registry(config) + registry = get_provider_registry(config=config) assert Api.inference in registry assert "external_test" in registry[Api.inference] provider = registry[Api.inference]["external_test"] @@ -368,7 +368,7 @@ pip_packages: }, ) with pytest.raises(ValueError) as exc_info: - get_provider_registry(config) + get_provider_registry(config=config) assert "get_provider_spec not found" in str(exc_info.value) def test_external_provider_from_module_missing_get_provider_spec(self, mock_providers): @@ -391,14 +391,14 @@ pip_packages: }, ) with pytest.raises(AttributeError): - get_provider_registry(config) + get_provider_registry(config=config) - def test_external_provider_from_module_building(self, mock_providers): - """Test loading an external provider from a module during build (building=True, partial spec).""" + def test_external_provider_from_module_listing(self, mock_providers): + """Test loading an external provider from a module during list-deps (listing=True, partial spec).""" from llama_stack.core.datatypes import StackConfig from llama_stack_api import Api - # No importlib patch needed, should not import module when building + # No importlib patch needed, should not import module when listing config = StackConfig( image_name="test_image", apis=[], @@ -413,7 +413,7 @@ pip_packages: ] }, ) - registry = get_provider_registry(config, building=True) + registry = get_provider_registry(config=config, listing=True) assert Api.inference in registry assert "external_test" in registry[Api.inference] provider = registry[Api.inference]["external_test"] @@ -446,7 +446,7 @@ class TestGetExternalProvidersFromModule: }, ) registry = {Api.inference: {}} - result = get_external_providers_from_module(registry, config, building=False) + result = get_external_providers_from_module(registry, config, listing=False) # Should not add anything to registry assert len(result[Api.inference]) == 0 @@ -485,12 +485,12 @@ class TestGetExternalProvidersFromModule: }, ) registry = {Api.inference: {}} - result = get_external_providers_from_module(registry, config, building=False) + result = get_external_providers_from_module(registry, config, listing=False) assert "versioned_test" in result[Api.inference] assert result[Api.inference]["versioned_test"].module == "versioned_test==1.0.0" def test_buildconfig_does_not_import_module(self, mock_providers): - """Test that StackConfig does not import the module when building (building=True).""" + """Test that StackConfig does not import the module when listing (listing=True).""" from llama_stack.core.datatypes import StackConfig from llama_stack.core.distribution import get_external_providers_from_module @@ -509,10 +509,10 @@ class TestGetExternalProvidersFromModule: }, ) - # Should not call import_module at all when building + # Should not call import_module at all when listing with patch("importlib.import_module") as mock_import: registry = {Api.inference: {}} - result = get_external_providers_from_module(registry, config, building=True) + result = get_external_providers_from_module(registry, config, listing=True) # Verify module was NOT imported mock_import.assert_not_called() @@ -543,14 +543,14 @@ class TestGetExternalProvidersFromModule: with patch("importlib.import_module") as mock_import: registry = {Api.inference: {}} - result = get_external_providers_from_module(registry, config, building=True) + result = get_external_providers_from_module(registry, config, listing=True) mock_import.assert_not_called() assert "provider1" in result[Api.inference] assert "provider2" in result[Api.inference] def test_distributionspec_does_not_import_module(self, mock_providers): - """Test that DistributionSpec does not import the module (building=True).""" + """Test that DistributionSpec does not import the module (listing=True).""" from llama_stack.core.datatypes import BuildProvider, DistributionSpec from llama_stack.core.distribution import get_external_providers_from_module @@ -566,10 +566,10 @@ class TestGetExternalProvidersFromModule: }, ) - # Should not call import_module at all when building + # Should not call import_module at all when listing with patch("importlib.import_module") as mock_import: registry = {Api.inference: {}} - result = get_external_providers_from_module(registry, dist_spec, building=True) + result = get_external_providers_from_module(registry, dist_spec, listing=True) # Verify module was NOT imported mock_import.assert_not_called() @@ -623,7 +623,7 @@ class TestGetExternalProvidersFromModule: }, ) registry = {Api.inference: {}} - result = get_external_providers_from_module(registry, config, building=False) + result = get_external_providers_from_module(registry, config, listing=False) # Only the matching provider_type should be added assert "list_test" in result[Api.inference] @@ -671,7 +671,7 @@ class TestGetExternalProvidersFromModule: }, ) registry = {Api.inference: {}} - result = get_external_providers_from_module(registry, config, building=False) + result = get_external_providers_from_module(registry, config, listing=False) # Only the matching provider_type should be added assert "wanted" in result[Api.inference] @@ -726,7 +726,7 @@ class TestGetExternalProvidersFromModule: }, ) registry = {Api.inference: {}} - result = get_external_providers_from_module(registry, config, building=False) + result = get_external_providers_from_module(registry, config, listing=False) # Both provider types should be added to registry assert "remote::ollama" in result[Api.inference] @@ -760,7 +760,7 @@ class TestGetExternalProvidersFromModule: registry = {Api.inference: {}} with pytest.raises(ValueError) as exc_info: - get_external_providers_from_module(registry, config, building=False) + get_external_providers_from_module(registry, config, listing=False) assert "get_provider_spec not found" in str(exc_info.value) @@ -797,7 +797,7 @@ class TestGetExternalProvidersFromModule: registry = {Api.inference: {}} with pytest.raises(RuntimeError) as exc_info: - get_external_providers_from_module(registry, config, building=False) + get_external_providers_from_module(registry, config, listing=False) assert "Something went wrong" in str(exc_info.value) @@ -810,7 +810,7 @@ class TestGetExternalProvidersFromModule: providers={}, ) registry = {Api.inference: {}} - result = get_external_providers_from_module(registry, config, building=False) + result = get_external_providers_from_module(registry, config, listing=False) # Should return registry unchanged assert result == registry @@ -866,7 +866,7 @@ class TestGetExternalProvidersFromModule: }, ) registry = {Api.inference: {}, Api.safety: {}} - result = get_external_providers_from_module(registry, config, building=False) + result = get_external_providers_from_module(registry, config, listing=False) assert "inf_test" in result[Api.inference] assert "safe_test" in result[Api.safety]