feat: refactor external providers dir (#2049)

# What does this PR do? currently the "default" dir for external providers is `/etc/llama-stack/providers.d` This dir is not used anywhere nor created. Switch to a more friendly `~/.llama/providers.d/` This allows external providers to actually create this dir and/or populate it upon installation, `pip` cannot create directories in `etc`. If a user does not specify a dir, default to this one see https://github.com/containers/ramalama-stack/issues/36 Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-05-15 14:17:03 -04:00 · 2025-05-15 14:17:03 -04:00 · e46de23be6
commit e46de23be6
parent 7e25c8df28
14 changed files with 166 additions and 90 deletions
--- a/.github/workflows/test-external-providers.yml
+++ b/.github/workflows/test-external-providers.yml
@ -47,8 +47,8 @@ jobs:
      - name: Create provider configuration
        run: |
-          mkdir -p /tmp/providers.d/remote/inference
+          mkdir -p /home/runner/.llama/providers.d/remote/inference
-          cp tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml /tmp/providers.d/remote/inference/custom_ollama.yaml
+          cp tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml
      - name: Build distro from config file
        run: |
@ -66,7 +66,7 @@ jobs:
      - name: Wait for Llama Stack server to be ready
        run: |
          for i in {1..30}; do
-            if ! grep -q "remote::custom_ollama from /tmp/providers.d/remote/inference/custom_ollama.yaml" server.log; then
+            if ! grep -q "remote::custom_ollama from /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml" server.log; then
              echo "Waiting for Llama Stack server to load the provider..."
              sleep 1
            else
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@ -178,7 +178,7 @@ image_name: ollama
 image_type: conda
 # If some providers are external, you can specify the path to the implementation
-external_providers_dir: /etc/llama-stack/providers.d
+external_providers_dir: ~/.llama/providers.d
 ```
 ```
@ -206,7 +206,7 @@ distribution_spec:
 image_type: container
 image_name: ci-test
 # Path to external provider implementations
-external_providers_dir: /etc/llama-stack/providers.d
+external_providers_dir: ~/.llama/providers.d
 ```
 Here's an example for a custom Ollama provider:
--- a/docs/source/providers/external.md
+++ b/docs/source/providers/external.md
@ -10,7 +10,7 @@ Llama Stack supports external providers that live outside of the main codebase.
 To enable external providers, you need to configure the `external_providers_dir` in your Llama Stack configuration. This directory should contain your external provider specifications:
 ```yaml
-external_providers_dir: /etc/llama-stack/providers.d/
+external_providers_dir: ~/.llama/providers.d/
 ```
 ## Directory Structure
@ -182,7 +182,7 @@ dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"]
 3. Create the provider specification:
 ```yaml
-# /etc/llama-stack/providers.d/remote/inference/custom_ollama.yaml
+# ~/.llama/providers.d/remote/inference/custom_ollama.yaml
 adapter:
  adapter_type: custom_ollama
  pip_packages: ["ollama", "aiohttp"]
@ -201,7 +201,7 @@ uv pip install -e .
 5. Configure Llama Stack to use external providers:
 ```yaml
-external_providers_dir: /etc/llama-stack/providers.d/
+external_providers_dir: ~/.llama/providers.d/
 ```
 The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@ -36,7 +36,8 @@ from llama_stack.distribution.datatypes import (
 )
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.resolver import InvalidProviderError
-from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.distribution.stack import replace_env_vars
 from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.distribution.utils.exec import formulate_run_args, run_command
 from llama_stack.distribution.utils.image_types import LlamaStackImageType
@ -202,7 +203,9 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
    else:
        with open(args.config) as f:
            try:
-                build_config = BuildConfig(**yaml.safe_load(f))
+                contents = yaml.safe_load(f)
                contents = replace_env_vars(contents)
                build_config = BuildConfig(**contents)
            except Exception as e:
                cprint(
                    f"Could not parse config file {args.config}: {e}",
@ -248,6 +251,8 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
        run_config = Path(run_config)
        config_dict = yaml.safe_load(run_config.read_text())
        config = parse_and_maybe_upgrade_config(config_dict)
        if not os.path.exists(str(config.external_providers_dir)):
            os.makedirs(str(config.external_providers_dir), exist_ok=True)
        run_args = formulate_run_args(args.image_type, args.image_name, config, args.template)
        run_args.extend([run_config, str(os.getenv("LLAMA_STACK_PORT", 8321))])
        run_command(run_args)
@ -267,7 +272,9 @@ def _generate_run_config(
        image_name=image_name,
        apis=apis,
        providers={},
-        external_providers_dir=build_config.external_providers_dir if build_config.external_providers_dir else None,
+        external_providers_dir=build_config.external_providers_dir
        if build_config.external_providers_dir
        else EXTERNAL_PROVIDERS_DIR,
    )
    # build providers dict
    provider_registry = get_provider_registry(build_config)
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@ -33,7 +33,8 @@ class StackRun(Subcommand):
        self.parser.add_argument(
            "config",
            type=str,
-            help="Path to config file to use for the run",
+            nargs="?",  # Make it optional
            help="Path to config file to use for the run. Required for venv and conda environments.",
        )
        self.parser.add_argument(
            "--port",
@ -82,6 +83,13 @@ class StackRun(Subcommand):
        from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
        from llama_stack.distribution.utils.exec import formulate_run_args, run_command
        image_type, image_name = self._get_image_type_and_name(args)
        # Check if config is required based on image type
        if (image_type in [ImageType.CONDA.value, ImageType.VENV.value]) and not args.config:
            self.parser.error("Config file is required for venv and conda environments")
        if args.config:
            config_file = Path(args.config)
            has_yaml_suffix = args.config.endswith(".yaml")
            template_name = None
@ -115,10 +123,14 @@ class StackRun(Subcommand):
            try:
                config = parse_and_maybe_upgrade_config(config_dict)
                if not os.path.exists(str(config.external_providers_dir)):
                    os.makedirs(str(config.external_providers_dir), exist_ok=True)
            except AttributeError as e:
                self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
-
+        else:
-        image_type, image_name = self._get_image_type_and_name(args)
+            config = None
            config_file = None
            template_name = None
        # If neither image type nor image name is provided, assume the server should be run directly
        # using the current environment packages.
@ -141,7 +153,10 @@ class StackRun(Subcommand):
        else:
            run_args = formulate_run_args(image_type, image_name, config, template_name)
-            run_args.extend([str(config_file), str(args.port)])
+            run_args.extend([str(args.port)])
            if config_file:
                run_args.extend(["--config", str(config_file)])
            if args.env:
                for env_var in args.env:
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@ -154,6 +154,12 @@ get_python_cmd() {
    fi
 }
 # Add other required item commands generic to all containers
 add_to_container << EOF
 # Allows running as non-root user
 RUN mkdir -p /.llama/providers.d /.cache
 EOF
 if [ -n "$run_config" ]; then
  # Copy the run config to the build context since it's an absolute path
  cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
@ -166,17 +172,19 @@ EOF
  # and update the configuration to reference the new container path
  python_cmd=$(get_python_cmd)
  external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
-  if [ -n "$external_providers_dir" ]; then
+  external_providers_dir=$(eval echo "$external_providers_dir")
  if [ -n "$external_providers_dir" ] && [ -d "$external_providers_dir" ]; then
    echo "Copying external providers directory: $external_providers_dir"
    cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d"
    add_to_container << EOF
-COPY $external_providers_dir /app/providers.d
+COPY providers.d /.llama/providers.d
 EOF
-    # Edit the run.yaml file to change the external_providers_dir to /app/providers.d
+    # Edit the run.yaml file to change the external_providers_dir to /.llama/providers.d
    if [ "$(uname)" = "Darwin" ]; then
-      sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /app/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
+      sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
      rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak"
    else
-      sed -i 's|external_providers_dir:.*|external_providers_dir: /app/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
+      sed -i 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
    fi
  fi
 fi
@ -255,9 +263,6 @@ fi
 # Add other require item commands genearic to all containers
 add_to_container << EOF
 # Allows running as non-root user
 RUN mkdir -p /.llama /.cache
 RUN chmod -R g+rw /app /.llama /.cache
 EOF
--- a/llama_stack/distribution/configure.py
+++ b/llama_stack/distribution/configure.py
@ -17,6 +17,7 @@ from llama_stack.distribution.distribution import (
    builtin_automatically_routed_apis,
    get_provider_registry,
 )
 from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
 from llama_stack.providers.datatypes import Api, ProviderSpec
@ -170,4 +171,7 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi
    config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION
    if not config_dict.get("external_providers_dir", None):
        config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
    return StackRunConfig(**config_dict)
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -5,9 +5,10 @@
 # the root directory of this source tree.
 from enum import Enum
 from pathlib import Path
 from typing import Annotated, Any
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
 from llama_stack.apis.datasetio import DatasetIO
@ -312,11 +313,20 @@ a default SQLite store will be used.""",
        description="Configuration for the HTTP(S) server",
    )
-    external_providers_dir: str | None = Field(
+    external_providers_dir: Path | None = Field(
        default=None,
        description="Path to directory containing external provider implementations. The providers code and dependencies must be installed on the system.",
    )
    @field_validator("external_providers_dir")
    @classmethod
    def validate_external_providers_dir(cls, v):
        if v is None:
            return None
        if isinstance(v, str):
            return Path(v)
        return v
 class BuildConfig(BaseModel):
    version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -145,7 +145,7 @@ def get_provider_registry(
    # Check if config has the external_providers_dir attribute
    if config and hasattr(config, "external_providers_dir") and config.external_providers_dir:
-        external_providers_dir = os.path.abspath(config.external_providers_dir)
+        external_providers_dir = os.path.abspath(os.path.expanduser(config.external_providers_dir))
        if not os.path.exists(external_providers_dir):
            raise FileNotFoundError(f"External providers directory not found: {external_providers_dir}")
        logger.info(f"Loading external providers from {external_providers_dir}")
--- a/llama_stack/distribution/start_stack.sh
+++ b/llama_stack/distribution/start_stack.sh
@ -29,7 +29,7 @@ error_handler() {
 trap 'error_handler ${LINENO}' ERR
 if [ $# -lt 3 ]; then
-  echo "Usage: $0 <env_type> <env_path_or_name> <yaml_config> <port> <script_args...>"
+  echo "Usage: $0 <env_type> <env_path_or_name> <port> [--config <yaml_config>] [--env KEY=VALUE]..."
  exit 1
 fi
@ -40,23 +40,30 @@ env_path_or_name="$1"
 container_image="localhost/$env_path_or_name"
 shift
 yaml_config="$1"
 shift
 port="$1"
 shift
 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 source "$SCRIPT_DIR/common.sh"
-# Initialize env_vars as an string
+# Initialize variables
 yaml_config=""
 env_vars=""
 other_args=""
-# Process environment variables from --env arguments
+
 # Process remaining arguments
 while [[ $# -gt 0 ]]; do
  case "$1" in
    --config|--yaml-config)
      if [[ -n "$2" ]]; then
        yaml_config="$2"
        shift 2
      else
        echo -e "${RED}Error: $1 requires a CONFIG argument${NC}" >&2
        exit 1
      fi
      ;;
    --env)
      if [[ -n "$2" ]]; then
        env_vars="$env_vars --env $2"
        shift 2
@ -71,6 +78,13 @@ while [[ $# -gt 0 ]]; do
      ;;
  esac
 done
 # Check if yaml_config is required based on env_type
 if [[ "$env_type" == "venv" || "$env_type" == "conda" ]] && [ -z "$yaml_config" ]; then
  echo -e "${RED}Error: --config is required for venv and conda environments${NC}" >&2
  exit 1
 fi
 PYTHON_BINARY="python"
 case "$env_type" in
  "venv")
@ -106,8 +120,14 @@ esac
 if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then
    set -x
    if [ -n "$yaml_config" ]; then
        yaml_config_arg="--yaml-config $yaml_config"
    else
        yaml_config_arg=""
    fi
    $PYTHON_BINARY -m llama_stack.distribution.server.server \
-    --yaml-config "$yaml_config" \
+    $yaml_config_arg \
    --port "$port" \
    $env_vars \
    $other_args
@ -149,15 +169,26 @@ elif [[ "$env_type" == "container" ]]; then
        version_tag=$(curl -s $URL | jq -r '.info.version')
    fi
-    $CONTAINER_BINARY run $CONTAINER_OPTS -it \
+    # Build the command with optional yaml config
    cmd="$CONTAINER_BINARY run $CONTAINER_OPTS -it \
    -p $port:$port \
    $env_vars \
    -v "$yaml_config:/app/config.yaml" \
    $mounts \
    --env LLAMA_STACK_PORT=$port \
    --entrypoint python \
    $container_image:$version_tag \
-    -m llama_stack.distribution.server.server \
+    -m llama_stack.distribution.server.server"
-    --yaml-config /app/config.yaml \
+
-    $other_args
+    # Add yaml config if provided, otherwise use default
    if [ -n "$yaml_config" ]; then
        cmd="$cmd -v $yaml_config:/app/run.yaml --yaml-config /app/run.yaml"
    else
        cmd="$cmd --yaml-config /app/run.yaml"
    fi
    # Add any other args
    cmd="$cmd $other_args"
    # Execute the command
    eval $cmd
 fi
--- a/llama_stack/distribution/utils/config_dirs.py
+++ b/llama_stack/distribution/utils/config_dirs.py
@ -14,3 +14,5 @@ DISTRIBS_BASE_DIR = LLAMA_STACK_CONFIG_DIR / "distributions"
 DEFAULT_CHECKPOINT_DIR = LLAMA_STACK_CONFIG_DIR / "checkpoints"
 RUNTIME_BASE_DIR = LLAMA_STACK_CONFIG_DIR / "runtime"
 EXTERNAL_PROVIDERS_DIR = LLAMA_STACK_CONFIG_DIR / "providers.d"
--- a/llama_stack/distribution/utils/exec.py
+++ b/llama_stack/distribution/utils/exec.py
@ -22,8 +22,10 @@ from llama_stack.distribution.utils.image_types import LlamaStackImageType
 def formulate_run_args(image_type, image_name, config, template_name) -> list:
    env_name = ""
-    if image_type == LlamaStackImageType.CONTAINER.value or config.container_image:
+    if image_type == LlamaStackImageType.CONTAINER.value:
-        env_name = f"distribution-{template_name}" if template_name else config.container_image
+        env_name = (
            f"distribution-{template_name}" if template_name else (config.container_image if config else image_name)
        )
    elif image_type == LlamaStackImageType.CONDA.value:
        current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
        env_name = image_name or current_conda_env
--- a/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
@ -6,4 +6,4 @@ distribution_spec:
    - remote::custom_ollama
 image_type: container
 image_name: ci-test
-external_providers_dir: /tmp/providers.d
+external_providers_dir: ~/.llama/providers.d
--- a/tests/external-provider/llama-stack-provider-ollama/run.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml
@ -91,4 +91,4 @@ tool_groups:
  provider_id: wolfram-alpha
 server:
  port: 8321
-external_providers_dir: /tmp/providers.d
+external_providers_dir: ~/.llama/providers.d