feat: convert provider config to a file path

currently provider.config is a dictionary. Introduce the ability to specify either a file path or the current in-file dictionary. Allowing users to specify a file path enables more robust config management allowing stack administrators to swap in different provider configs seamlessly Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-12-25 23:52:03 +00:00 · 2025-06-23 11:20:54 -04:00 · 2025-06-23 11:20:54 -04:00 · 31cc971503
commit 31cc971503
parent 4d0d2d685f
14 changed files with 226 additions and 174 deletions
--- a/llama_stack/distribution/configure.py
+++ b/llama_stack/distribution/configure.py
@ -16,6 +16,7 @@ from llama_stack.distribution.datatypes import (
 from llama_stack.distribution.distribution import (
    builtin_automatically_routed_apis,
    get_provider_registry,
+    resolve_config,
 )
 from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
@ -30,7 +31,7 @@ def configure_single_provider(registry: dict[str, ProviderSpec], provider: Provi
    config_type = instantiate_class_type(provider_spec.config_class)
    try:
        if provider.config:
-            existing = config_type(**provider.config)
+            existing = resolve_config(provider=provider, provider_spec=provider_spec)
        else:
            existing = None
    except Exception:
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -150,7 +150,10 @@ class Provider(BaseModel):
    # when the provider is enabled via a conditional environment variable
    provider_id: str | None
    provider_type: str
-    config: dict[str, Any]
+    config: str | dict[str, Any] | None = Field(
+        default=None,
+        description="Provider configuration dictionary or path to provider configuration file",
+    )


 class LoggingConfig(BaseModel):
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -7,11 +7,15 @@
 import glob
 import importlib
 import os
+from pathlib import Path
 from typing import Any

 import yaml
 from pydantic import BaseModel

+from llama_stack.distribution.datatypes import Provider
+from llama_stack.distribution.utils.dynamic import instantiate_class_type
+from llama_stack.distribution.utils.env import replace_env_vars
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    AdapterSpec,
@ -188,3 +192,22 @@ def get_provider_registry(
                        logger.error(f"Failed to load provider spec from {spec_path}: {e}")
                        raise e
    return ret
+
+
+def resolve_config(provider: Provider, provider_spec: ProviderSpec | None = None, api: str | None = None):
+    if not provider_spec:
+        if not api:
+            raise ValueError("In order to get provider spec, must have API")
+        registry = get_provider_registry()
+        provider_spec = registry[Api(api)][provider.provider_type]
+    config_type = instantiate_class_type(provider_spec.config_class)
+    try:
+        if provider.config and isinstance(provider.config, str):
+            with open(Path(provider.config).expanduser().resolve()) as f:
+                config: dict[str, Any] = yaml.safe_load(f)
+                replaced = replace_env_vars(config)
+                return config_type(**replaced)
+        elif provider.config is not None:
+            return config_type(**provider.config)
+    except Exception as e:
+        raise ValueError("Error getting provider config") from e
--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
@ -10,6 +10,7 @@ from typing import Any
 from pydantic import BaseModel

 from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
+from llama_stack.distribution.distribution import resolve_config
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import HealthResponse, HealthStatus

@ -51,12 +52,13 @@ class ProviderImpl(Providers):
                # Skip providers that are not enabled
                if p.provider_id is None:
                    continue
+                config = resolve_config(provider=p, api=api)
                ret.append(
                    ProviderInfo(
                        api=api,
                        provider_id=p.provider_id,
                        provider_type=p.provider_type,
-                        config=p.config,
+                        config=dict(config),
                        health=providers_health.get(api, {}).get(
                            p.provider_id,
                            HealthResponse(
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@ -34,9 +34,8 @@ from llama_stack.distribution.datatypes import (
    RoutingTableProviderSpec,
    StackRunConfig,
 )
-from llama_stack.distribution.distribution import builtin_automatically_routed_apis
+from llama_stack.distribution.distribution import builtin_automatically_routed_apis, resolve_config
 from llama_stack.distribution.store import DistributionRegistry
-from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    Api,
@ -156,7 +155,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
            "__builtin__": ProviderWithSpec(
                provider_id="__routing_table__",
                provider_type="__routing_table__",
-                config={},
+                config=None,
                spec=RoutingTableProviderSpec(
                    api=info.routing_table_api,
                    router_api=info.router_api,
@ -171,7 +170,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
            "__builtin__": ProviderWithSpec(
                provider_id="__autorouted__",
                provider_type="__autorouted__",
-                config={},
+                config=None,
                spec=AutoRoutedProviderSpec(
                    api=info.router_api,
                    module="llama_stack.distribution.routers",
@ -329,8 +328,7 @@ async def instantiate_provider(
    module = importlib.import_module(provider_spec.module)
    args = []
    if isinstance(provider_spec, RemoteProviderSpec):
-        config_type = instantiate_class_type(provider_spec.config_class)
-        config = config_type(**provider.config)
+        config = resolve_config(provider=provider, provider_spec=provider_spec)

        method = "get_adapter_impl"
        args = [config, deps]
@ -348,8 +346,7 @@ async def instantiate_provider(
    else:
        method = "get_provider_impl"

-        config_type = instantiate_class_type(provider_spec.config_class)
-        config = config_type(**provider.config)
+        config = resolve_config(provider=provider, provider_spec=provider_spec)
        args = [config, deps]
        if "policy" in inspect.signature(getattr(module, method)).parameters:
            args.append(policy)
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -42,11 +42,10 @@ from llama_stack.distribution.server.routes import (
 )
 from llama_stack.distribution.stack import (
    construct_stack,
-    replace_env_vars,
-    validate_env_pair,
 )
 from llama_stack.distribution.utils.config import redact_sensitive_fields
 from llama_stack.distribution.utils.context import preserve_contexts_async_generator
+from llama_stack.distribution.utils.env import replace_env_vars, validate_env_pair
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
@ -408,9 +407,10 @@ def main(args: argparse.Namespace | None = None):
        log_line = f"Using config file: {config_file}"
    elif args.template:
        config_file = Path(REPO_ROOT) / "llama_stack" / "templates" / args.template / "run.yaml"
-        if not config_file.exists():
+        provider_configs = Path(REPO_ROOT) / "llama_stack" / "templates" / args.config / "provider_configs"
+        if not config_file.exists() or not provider_configs.exists():
            raise ValueError(f"Template {args.template} does not exist")
-        log_line = f"Using template {args.template} config file: {config_file}"
+        log_line = f"Using template {args.template} config file: {config_file} and provider_config directory: {provider_configs}"
    else:
        raise ValueError("Either --config or --template must be provided")

--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -5,8 +5,6 @@
 # the root directory of this source tree.

 import importlib.resources
-import os
-import re
 import tempfile
 from typing import Any

@ -40,6 +38,7 @@ from llama_stack.distribution.providers import ProviderImpl, ProviderImplConfig
 from llama_stack.distribution.resolver import ProviderRegistry, resolve_impls
 from llama_stack.distribution.store.registry import create_dist_registry
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
+from llama_stack.distribution.utils.env import replace_env_vars
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api

@ -123,136 +122,6 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
            )


-class EnvVarError(Exception):
-    def __init__(self, var_name: str, path: str = ""):
-        self.var_name = var_name
-        self.path = path
-        super().__init__(
-            f"Environment variable '{var_name}' not set or empty {f'at {path}' if path else ''}. "
-            f"Use ${{env.{var_name}:=default_value}} to provide a default value, "
-            f"${{env.{var_name}:+value_if_set}} to make the field conditional, "
-            f"or ensure the environment variable is set."
-        )
-
-
-def replace_env_vars(config: Any, path: str = "") -> Any:
-    if isinstance(config, dict):
-        result = {}
-        for k, v in config.items():
-            try:
-                result[k] = replace_env_vars(v, f"{path}.{k}" if path else k)
-            except EnvVarError as e:
-                raise EnvVarError(e.var_name, e.path) from None
-        return result
-
-    elif isinstance(config, list):
-        result = []
-        for i, v in enumerate(config):
-            try:
-                result.append(replace_env_vars(v, f"{path}[{i}]"))
-            except EnvVarError as e:
-                raise EnvVarError(e.var_name, e.path) from None
-        return result
-
-    elif isinstance(config, str):
-        # Pattern supports bash-like syntax: := for default and :+ for conditional and a optional value
-        pattern = r"\${env\.([A-Z0-9_]+)(?::([=+])([^}]*))?}"
-
-        def get_env_var(match: re.Match):
-            env_var = match.group(1)
-            operator = match.group(2)  # '=' for default, '+' for conditional
-            value_expr = match.group(3)
-
-            env_value = os.environ.get(env_var)
-
-            if operator == "=":  # Default value syntax: ${env.FOO:=default}
-                # If the env is set like ${env.FOO:=default} then use the env value when set
-                if env_value:
-                    value = env_value
-                else:
-                    # If the env is not set, look for a default value
-                    # value_expr returns empty string (not None) when not matched
-                    # This means ${env.FOO:=} and it's accepted and returns empty string - just like bash
-                    if value_expr == "":
-                        return ""
-                    else:
-                        value = value_expr
-
-            elif operator == "+":  # Conditional value syntax: ${env.FOO:+value_if_set}
-                # If the env is set like ${env.FOO:+value_if_set} then use the value_if_set
-                if env_value:
-                    if value_expr:
-                        value = value_expr
-                    # This means ${env.FOO:+}
-                    else:
-                        # Just like bash, this doesn't care whether the env is set or not and applies
-                        # the value, in this case the empty string
-                        return ""
-                else:
-                    # Just like bash, this doesn't care whether the env is set or not, since it's not set
-                    # we return an empty string
-                    value = ""
-            else:  # No operator case: ${env.FOO}
-                if not env_value:
-                    raise EnvVarError(env_var, path)
-                value = env_value
-
-            # expand "~" from the values
-            return os.path.expanduser(value)
-
-        try:
-            result = re.sub(pattern, get_env_var, config)
-            return _convert_string_to_proper_type(result)
-        except EnvVarError as e:
-            raise EnvVarError(e.var_name, e.path) from None
-
-    return config
-
-
-def _convert_string_to_proper_type(value: str) -> Any:
-    # This might be tricky depending on what the config type is, if  'str | None' we are
-    # good, if 'str' we need to keep the empty string... 'str | None' is more common and
-    # providers config should be typed this way.
-    # TODO: we could try to load the config class and see if the config has a field with type 'str | None'
-    # and then convert the empty string to None or not
-    if value == "":
-        return None
-
-    lowered = value.lower()
-    if lowered == "true":
-        return True
-    elif lowered == "false":
-        return False
-
-    try:
-        return int(value)
-    except ValueError:
-        pass
-
-    try:
-        return float(value)
-    except ValueError:
-        pass
-
-    return value
-
-
-def validate_env_pair(env_pair: str) -> tuple[str, str]:
-    """Validate and split an environment variable key-value pair."""
-    try:
-        key, value = env_pair.split("=", 1)
-        key = key.strip()
-        if not key:
-            raise ValueError(f"Empty key in environment variable pair: {env_pair}")
-        if not all(c.isalnum() or c == "_" for c in key):
-            raise ValueError(f"Key must contain only alphanumeric characters and underscores: {key}")
-        return key, value
-    except ValueError as e:
-        raise ValueError(
-            f"Invalid environment variable format '{env_pair}': {str(e)}. Expected format: KEY=value"
-        ) from e
-
-
 def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None:
    """Add internal implementations (inspect and providers) to the implementations dictionary.

--- a/llama_stack/distribution/utils/env.py
+++ b/llama_stack/distribution/utils/env.py
@ -0,0 +1,143 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+import re
+from typing import Any
+
+from llama_stack.log import get_logger
+
+logger = get_logger(name=__name__, category="core")
+
+
+class EnvVarError(Exception):
+    def __init__(self, var_name: str, path: str = ""):
+        self.var_name = var_name
+        self.path = path
+        super().__init__(
+            f"Environment variable '{var_name}' not set or empty {f'at {path}' if path else ''}. "
+            f"Use ${{env.{var_name}:=default_value}} to provide a default value, "
+            f"${{env.{var_name}:+value_if_set}} to make the field conditional, "
+            f"or ensure the environment variable is set."
+        )
+
+
+def replace_env_vars(config: Any, path: str = "") -> Any:
+    if isinstance(config, dict):
+        result_dict = {}
+        for k, v in config.items():
+            try:
+                result_dict[k] = replace_env_vars(v, f"{path}.{k}" if path else k)
+            except EnvVarError as e:
+                raise EnvVarError(e.var_name, e.path) from None
+        return result_dict
+
+    elif isinstance(config, list):
+        result_list = []
+        for i, v in enumerate(config):
+            try:
+                result_list.append(replace_env_vars(v, f"{path}[{i}]"))
+            except EnvVarError as e:
+                raise EnvVarError(e.var_name, e.path) from None
+        return result_list
+
+    elif isinstance(config, str):
+        # Pattern supports bash-like syntax: := for default and :+ for conditional and a optional value
+        pattern = r"\${env\.([A-Z0-9_]+)(?::([=+])([^}]*))?}"
+
+        def get_env_var(match: re.Match):
+            env_var = match.group(1)
+            operator = match.group(2)  # '=' for default, '+' for conditional
+            value_expr = match.group(3)
+
+            env_value = os.environ.get(env_var)
+
+            if operator == "=":  # Default value syntax: ${env.FOO:=default}
+                # If the env is set like ${env.FOO:=default} then use the env value when set
+                if env_value:
+                    value = env_value
+                else:
+                    # If the env is not set, look for a default value
+                    # value_expr returns empty string (not None) when not matched
+                    # This means ${env.FOO:=} and it's accepted and returns empty string - just like bash
+                    if value_expr == "":
+                        return ""
+                    else:
+                        value = value_expr
+
+            elif operator == "+":  # Conditional value syntax: ${env.FOO:+value_if_set}
+                # If the env is set like ${env.FOO:+value_if_set} then use the value_if_set
+                if env_value:
+                    if value_expr:
+                        value = value_expr
+                    # This means ${env.FOO:+}
+                    else:
+                        # Just like bash, this doesn't care whether the env is set or not and applies
+                        # the value, in this case the empty string
+                        return ""
+                else:
+                    # Just like bash, this doesn't care whether the env is set or not, since it's not set
+                    # we return an empty string
+                    value = ""
+            else:  # No operator case: ${env.FOO}
+                if not env_value:
+                    raise EnvVarError(env_var, path)
+                value = env_value
+
+            # expand "~" from the values
+            return os.path.expanduser(value)
+
+        try:
+            result = re.sub(pattern, get_env_var, config)
+            return _convert_string_to_proper_type(result)
+        except EnvVarError as e:
+            raise EnvVarError(e.var_name, e.path) from None
+
+    return config
+
+
+def _convert_string_to_proper_type(value: str) -> Any:
+    # This might be tricky depending on what the config type is, if  'str | None' we are
+    # good, if 'str' we need to keep the empty string... 'str | None' is more common and
+    # providers config should be typed this way.
+    # TODO: we could try to load the config class and see if the config has a field with type 'str | None'
+    # and then convert the empty string to None or not
+    if value == "":
+        return None
+
+    lowered = value.lower()
+    if lowered == "true":
+        return True
+    elif lowered == "false":
+        return False
+
+    try:
+        return int(value)
+    except ValueError:
+        pass
+
+    try:
+        return float(value)
+    except ValueError:
+        pass
+
+    return value
+
+
+def validate_env_pair(env_pair: str) -> tuple[str, str]:
+    """Validate and split an environment variable key-value pair."""
+    try:
+        key, value = env_pair.split("=", 1)
+        key = key.strip()
+        if not key:
+            raise ValueError(f"Empty key in environment variable pair: {env_pair}")
+        if not all(c.isalnum() or c == "_" for c in key):
+            raise ValueError(f"Key must contain only alphanumeric characters and underscores: {key}")
+        return key, value
+    except ValueError as e:
+        raise ValueError(
+            f"Invalid environment variable format '{env_pair}': {str(e)}. Expected format: KEY=value"
+        ) from e