refactor: install external provider via module

using `module` in the provider class and the fact that `build` and `run` configs BOTH use the `class Provider` now, enables us to point to an external provider via a `module`. For example, say this is in your build config: ``` - provider_id: ramalama provider_type: remote::ramalama module: ramalama_stack ``` during build (in the various scripts), additionally to installing any pip dependencies we will also install this module and use the `get_provider_spec` method to retreive the ProviderSpec that is currently specified using `providers.d`. Most (if not all) external providers today have a `get_provider_spec` method that sits unused. Utilizing this method rather than the providers.d route allows for a much easier installation process for external providers and limits the amount of extra configuration a regular user has to do to get their stack off the ground. In production so far, providing instructions for installing external providers for users has been difficult: they need to install the module as a pre-req, create the providers.d directory, copy in the provider spec, and also copy in the necessary build/run yaml files. Using the module is a more seamless discovery method Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-07-25 21:57:45 +00:00 · 2025-07-06 20:00:58 -04:00 · 2025-07-06 20:00:58 -04:00 · dcc6b1eee9
commit dcc6b1eee9
parent 233f8c81bf
6 changed files with 508 additions and 232 deletions
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@ -94,7 +94,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
            )
            sys.exit(1)
    elif args.providers:
-        providers_list: dict[str, str | list[str]] = dict()
+        provider_list: dict[str, list[Provider]] = dict()
        for api_provider in args.providers.split(","):
            if "=" not in api_provider:
                cprint(
@ -103,7 +103,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
                    file=sys.stderr,
                )
                sys.exit(1)
-            api, provider = api_provider.split("=")
+            api, provider_type = api_provider.split("=")
            providers_for_api = get_provider_registry().get(Api(api), None)
            if providers_for_api is None:
                cprint(
@ -112,16 +112,14 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
                    file=sys.stderr,
                )
                sys.exit(1)
-            if provider in providers_for_api:
-                if api not in providers_list:
-                    providers_list[api] = []
-                # Use type guarding to ensure we have a list
-                provider_value = providers_list[api]
-                if isinstance(provider_value, list):
-                    provider_value.append(provider)
-                else:
-                    # Convert string to list and append
-                    providers_list[api] = [provider_value, provider]
+            if provider_type in providers_for_api:
+                provider = Provider(
+                    provider_type=provider_type,
+                    provider_id=provider_type.split("::")[1],
+                    config={},
+                    module=None,
+                )
+                provider_list.setdefault(api, []).append(provider)
            else:
                cprint(
                    f"{provider} is not a valid provider for the {api} API.",
@ -130,7 +128,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
                )
                sys.exit(1)
        distribution_spec = DistributionSpec(
-            providers=providers_list,
+            providers=provider_list,
            description=",".join(args.providers),
        )
        if not args.image_type:
@ -191,7 +189,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:

        cprint("Tip: use <TAB> to see options for the providers.\n", color="green", file=sys.stderr)

-        providers: dict[str, str | list[str]] = dict()
+        providers: dict[str, list[Provider]] = dict()
        for api, providers_for_api in get_provider_registry().items():
            available_providers = [x for x in providers_for_api.keys() if x not in ("remote", "remote::sample")]
            if not available_providers:
@ -237,11 +235,13 @@ def run_stack_build_command(args: argparse.Namespace) -> None:

    if args.print_deps_only:
        print(f"# Dependencies for {args.template or args.config or image_name}")
-        normal_deps, special_deps = get_provider_dependencies(build_config)
+        normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
        normal_deps += SERVER_DEPENDENCIES
        print(f"uv pip install {' '.join(normal_deps)}")
        for special_dep in special_deps:
            print(f"uv pip install {special_dep}")
+        for external_dep in external_provider_dependencies:
+            print(f"uv pip install {external_dep}")
        return

    try:
@ -304,27 +304,25 @@ def _generate_run_config(
    provider_registry = get_provider_registry(build_config)
    for api in apis:
        run_config.providers[api] = []
-        provider_types = build_config.distribution_spec.providers[api]
-        if isinstance(provider_types, str):
-            provider_types = [provider_types]
+        providers = build_config.distribution_spec.providers[api]

-        for i, provider_type in enumerate(provider_types):
-            pid = provider_type.split("::")[-1]
+        for provider in providers:
+            pid = provider.provider_id

-            p = provider_registry[Api(api)][provider_type]
+            p = provider_registry[Api(api)][provider.provider_type]
            if p.deprecation_error:
                raise InvalidProviderError(p.deprecation_error)

            try:
-                config_type = instantiate_class_type(provider_registry[Api(api)][provider_type].config_class)
-            except ModuleNotFoundError:
+                config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
+            except (ModuleNotFoundError, ValueError) as exc:
                # HACK ALERT:
                # This code executes after building is done, the import cannot work since the
                # package is either available in the venv or container - not available on the host.
                # TODO: use a "is_external" flag in ProviderSpec to check if the provider is
                # external
                cprint(
-                    f"Failed to import provider {provider_type} for API {api} - assuming it's external, skipping",
+                    f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
                    color="yellow",
                    file=sys.stderr,
                )
@ -337,9 +335,10 @@ def _generate_run_config(
                config = {}

            p_spec = Provider(
-                provider_id=f"{pid}-{i}" if len(provider_types) > 1 else pid,
-                provider_type=provider_type,
+                provider_id=pid,
+                provider_type=provider.provider_type,
                config=config,
+                module=provider.module,
            )
            run_config.providers[api].append(p_spec)

--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@ -42,7 +42,7 @@ class ApiInput(BaseModel):

 def get_provider_dependencies(
    config: BuildConfig | DistributionTemplate,
-) -> tuple[list[str], list[str]]:
+) -> tuple[list[str], list[str], list[str]]:
    """Get normal and special dependencies from provider configuration."""
    if isinstance(config, DistributionTemplate):
        config = config.build_config()
@ -51,6 +51,7 @@ def get_provider_dependencies(
    additional_pip_packages = config.additional_pip_packages

    deps = []
+    external_provider_deps = []
    registry = get_provider_registry(config)
    for api_str, provider_or_providers in providers.items():
        providers_for_api = registry[Api(api_str)]
@ -65,8 +66,16 @@ def get_provider_dependencies(
                raise ValueError(f"Provider `{provider}` is not available for API `{api_str}`")

            provider_spec = providers_for_api[provider_type]
-            deps.extend(provider_spec.pip_packages)
-            if provider_spec.container_image:
+            if hasattr(provider_spec, "is_external") and provider_spec.is_external:
+                # this ensures we install the top level module for our external providers
+                if provider_spec.module:
+                    if isinstance(provider_spec.module, str):
+                        external_provider_deps.append(provider_spec.module)
+                    else:
+                        external_provider_deps.extend(provider_spec.module)
+            if hasattr(provider_spec, "pip_packages"):
+                deps.extend(provider_spec.pip_packages)
+            if hasattr(provider_spec, "container_image") and provider_spec.container_image:
                raise ValueError("A stack's dependencies cannot have a container image")

    normal_deps = []
@ -79,7 +88,7 @@ def get_provider_dependencies(

    normal_deps.extend(additional_pip_packages or [])

-    return list(set(normal_deps)), list(set(special_deps))
+    return list(set(normal_deps)), list(set(special_deps)), list(set(external_provider_deps))


 def print_pip_install_help(config: BuildConfig):
@ -104,7 +113,7 @@ def build_image(
 ):
    container_base = build_config.distribution_spec.container_image or "python:3.12-slim"

-    normal_deps, special_deps = get_provider_dependencies(build_config)
+    normal_deps, special_deps, external_provider_deps = get_provider_dependencies(build_config)
    normal_deps += SERVER_DEPENDENCIES
    if build_config.external_apis_dir:
        external_apis = load_external_apis(build_config)
@ -116,34 +125,47 @@ def build_image(
        script = str(importlib.resources.files("llama_stack") / "distribution/build_container.sh")
        args = [
            script,
+            "--template-or-config",
            template_or_config,
+            "--image-name",
            image_name,
+            "--container-base",
            container_base,
+            "--normal-deps",
            " ".join(normal_deps),
        ]
-
        # When building from a config file (not a template), include the run config path in the
        # build arguments
        if run_config is not None:
-            args.append(run_config)
+            args.extend(["--run-config", run_config])
    elif build_config.image_type == LlamaStackImageType.CONDA.value:
        script = str(importlib.resources.files("llama_stack") / "distribution/build_conda_env.sh")
        args = [
            script,
+            "--env-name",
            str(image_name),
+            "--build-file-path",
            str(build_file_path),
+            "--normal-deps",
            " ".join(normal_deps),
        ]
    elif build_config.image_type == LlamaStackImageType.VENV.value:
        script = str(importlib.resources.files("llama_stack") / "distribution/build_venv.sh")
        args = [
            script,
+            "--env-name",
            str(image_name),
+            "--normal-deps",
            " ".join(normal_deps),
        ]

+    # Always pass both arguments, even if empty, to maintain consistent positional arguments
    if special_deps:
-        args.append("#".join(special_deps))
+        args.extend(["--optional-deps", "#".join(special_deps)])
+    if external_provider_deps:
+        args.extend(
+            ["--external-provider-deps", "#".join(external_provider_deps)]
+        )  # the script will install external provider module, get its deps, and install those too.

    return_code = run_command(args)

--- a/llama_stack/distribution/build_conda_env.sh
+++ b/llama_stack/distribution/build_conda_env.sh
@ -9,10 +9,91 @@
 LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
 LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-}
 TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
+PYPI_VERSION=${PYPI_VERSION:-}
 # This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out
 # Reference: https://github.com/astral-sh/uv/pull/1694
 UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}

+set -euo pipefail
+
+# Define color codes
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+NC='\033[0m' # No Color
+
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+source "$SCRIPT_DIR/common.sh"
+
+# Usage function
+usage() {
+  echo "Usage: $0 --env-name <conda_env_name> --build-file-path <build_file_path> --normal-deps <pip_dependencies> [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
+  echo "Example: $0 --env-name my-conda-env --build-file-path ./my-stack-build.yaml --normal-deps 'numpy pandas scipy' --external-provider-deps 'foo' --optional-deps 'bar'"
+  exit 1
+}
+
+# Parse arguments
+env_name=""
+build_file_path=""
+normal_deps=""
+external_provider_deps=""
+optional_deps=""
+
+while [[ $# -gt 0 ]]; do
+  key="$1"
+  case "$key" in
+    --env-name)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --env-name requires a string value" >&2
+        usage
+      fi
+      env_name="$2"
+      shift 2
+      ;;
+    --build-file-path)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --build-file-path requires a string value" >&2
+        usage
+      fi
+      build_file_path="$2"
+      shift 2
+      ;;
+    --normal-deps)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --normal-deps requires a string value" >&2
+        usage
+      fi
+      normal_deps="$2"
+      shift 2
+      ;;
+    --external-provider-deps)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --external-provider-deps requires a string value" >&2
+        usage
+      fi
+      external_provider_deps="$2"
+      shift 2
+      ;;
+    --optional-deps)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --optional-deps requires a string value" >&2
+        usage
+      fi
+      optional_deps="$2"
+      shift 2
+      ;;
+    *)
+      echo "Unknown option: $1" >&2
+      usage
+      ;;
+  esac
+done
+
+# Check required arguments
+if [[ -z "$env_name" || -z "$build_file_path" || -z "$normal_deps" ]]; then
+  echo "Error: --env-name, --build-file-path, and --normal-deps are required." >&2
+  usage
+fi
+
 if [ -n "$LLAMA_STACK_DIR" ]; then
  echo "Using llama-stack-dir=$LLAMA_STACK_DIR"
 fi
@ -20,50 +101,18 @@ if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
  echo "Using llama-stack-client-dir=$LLAMA_STACK_CLIENT_DIR"
 fi

-if [ "$#" -lt 3 ]; then
-  echo "Usage: $0 <distribution_type> <conda_env_name> <build_file_path> <pip_dependencies> [<special_pip_deps>]" >&2
-  echo "Example: $0 <distribution_type> my-conda-env ./my-stack-build.yaml 'numpy pandas scipy'" >&2
-  exit 1
-fi
-
-special_pip_deps="$4"
-
-set -euo pipefail
-
-env_name="$1"
-build_file_path="$2"
-pip_dependencies="$3"
-
-# Define color codes
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-NC='\033[0m' # No Color
-
-# this is set if we actually create a new conda in which case we need to clean up
-ENVNAME=""
-
-SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
-source "$SCRIPT_DIR/common.sh"
-
 ensure_conda_env_python310() {
-  local env_name="$1"
-  local pip_dependencies="$2"
-  local special_pip_deps="$3"
+  # Use only global variables set by flag parser
  local python_version="3.12"

-  # Check if conda command is available
  if ! is_command_available conda; then
    printf "${RED}Error: conda command not found. Is Conda installed and in your PATH?${NC}" >&2
    exit 1
  fi

-  # Check if the environment exists
  if conda env list | grep -q "^${env_name} "; then
    printf "Conda environment '${env_name}' exists. Checking Python version...\n"
-
-    # Check Python version in the environment
    current_version=$(conda run -n "${env_name}" python --version 2>&1 | cut -d' ' -f2 | cut -d'.' -f1,2)
-
    if [ "$current_version" = "$python_version" ]; then
      printf "Environment '${env_name}' already has Python ${python_version}. No action needed.\n"
    else
@ -73,37 +122,37 @@ ensure_conda_env_python310() {
  else
    printf "Conda environment '${env_name}' does not exist. Creating with Python ${python_version}...\n"
    conda create -n "${env_name}" python="${python_version}" -y
-
-    ENVNAME="${env_name}"
-    # setup_cleanup_handlers
  fi

  eval "$(conda shell.bash hook)"
  conda deactivate && conda activate "${env_name}"
-
  "$CONDA_PREFIX"/bin/pip install uv

  if [ -n "$TEST_PYPI_VERSION" ]; then
-    # these packages are damaged in test-pypi, so install them first
    uv pip install fastapi libcst
    uv pip install --extra-index-url https://test.pypi.org/simple/ \
      llama-stack=="$TEST_PYPI_VERSION" \
-      "$pip_dependencies"
-    if [ -n "$special_pip_deps" ]; then
-      IFS='#' read -ra parts <<<"$special_pip_deps"
+      "$normal_deps"
+    if [ -n "$optional_deps" ]; then
+      IFS='#' read -ra parts <<<"$optional_deps"
+      for part in "${parts[@]}"; do
+        echo "$part"
+        uv pip install $part
+      done
+    fi
+    if [ -n "$external_provider_deps" ]; then
+      IFS='#' read -ra parts <<<"$external_provider_deps"
      for part in "${parts[@]}"; do
        echo "$part"
        uv pip install "$part"
      done
    fi
  else
-    # Re-installing llama-stack in the new conda environment
    if [ -n "$LLAMA_STACK_DIR" ]; then
      if [ ! -d "$LLAMA_STACK_DIR" ]; then
        printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: $LLAMA_STACK_DIR${NC}\n" >&2
        exit 1
      fi
-
      printf "Installing from LLAMA_STACK_DIR: $LLAMA_STACK_DIR\n"
      uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"
    else
@ -115,31 +164,44 @@ ensure_conda_env_python310() {
      fi
      uv pip install --no-cache-dir "$SPEC_VERSION"
    fi
-
    if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
      if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then
        printf "${RED}Warning: LLAMA_STACK_CLIENT_DIR is set but directory does not exist: $LLAMA_STACK_CLIENT_DIR${NC}\n" >&2
        exit 1
      fi
-
      printf "Installing from LLAMA_STACK_CLIENT_DIR: $LLAMA_STACK_CLIENT_DIR\n"
      uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"
    fi
-
-    # Install pip dependencies
    printf "Installing pip dependencies\n"
-    uv pip install $pip_dependencies
-    if [ -n "$special_pip_deps" ]; then
-      IFS='#' read -ra parts <<<"$special_pip_deps"
+    uv pip install $normal_deps
+    if [ -n "$optional_deps" ]; then
+      IFS='#' read -ra parts <<<"$optional_deps"
      for part in "${parts[@]}"; do
        echo "$part"
        uv pip install $part
      done
    fi
+    if [ -n "$external_provider_deps" ]; then
+      IFS='#' read -ra parts <<<"$external_provider_deps"
+      for part in "${parts[@]}"; do
+        echo "Getting provider spec for module: $part and installing dependencies"
+        package_name=$(echo "$part" | sed 's/[<>=!].*//')
+        python3 -c "
+import importlib
+import sys
+try:
+    module = importlib.import_module(f'$package_name.provider')
+    spec = module.get_provider_spec()
+    if hasattr(spec, 'pip_packages') and spec.pip_packages:
+        print('\\n'.join(spec.pip_packages))
+except Exception as e:
+    print(f'Error getting provider spec for $package_name: {e}', file=sys.stderr)
+" | uv pip install -r -
+      done
+    fi
  fi
-
  mv "$build_file_path" "$CONDA_PREFIX"/llamastack-build.yaml
  echo "Build spec configuration saved at $CONDA_PREFIX/llamastack-build.yaml"
 }

-ensure_conda_env_python310 "$env_name" "$pip_dependencies" "$special_pip_deps"
+ensure_conda_env_python310 "$env_name" "$build_file_path" "$normal_deps" "$optional_deps" "$external_provider_deps"
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@ -27,52 +27,103 @@ RUN_CONFIG_PATH=/app/run.yaml

 BUILD_CONTEXT_DIR=$(pwd)

-if [ "$#" -lt 4 ]; then
-  # This only works for templates
-  echo "Usage: $0 <template_or_config> <image_name> <container_base> <pip_dependencies> [<run_config>] [<special_pip_deps>]" >&2
-  exit 1
-fi
 set -euo pipefail

-template_or_config="$1"
-shift
-image_name="$1"
-shift
-container_base="$1"
-shift
-pip_dependencies="$1"
-shift
-
-# Handle optional arguments
-run_config=""
-special_pip_deps=""
-
-# Check if there are more arguments
-# The logics is becoming cumbersom, we should refactor it if we can do better
-if [ $# -gt 0 ]; then
-  # Check if the argument ends with .yaml
-  if [[ "$1" == *.yaml ]]; then
-    run_config="$1"
-    shift
-    # If there's another argument after .yaml, it must be special_pip_deps
-    if [ $# -gt 0 ]; then
-      special_pip_deps="$1"
-    fi
-  else
-    # If it's not .yaml, it must be special_pip_deps
-    special_pip_deps="$1"
-  fi
-fi
-
 # Define color codes
 RED='\033[0;31m'
 NC='\033[0m' # No Color

+# Usage function
+usage() {
+  echo "Usage: $0 --image-name <image_name> --container-base <container_base> --normal-deps <pip_dependencies> [--run-config <run_config>] [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
+  echo "Example: $0 --image-name llama-stack-img --container-base python:3.12-slim --normal-deps 'numpy pandas' --run-config ./run.yaml --external-provider-deps 'foo' --optional-deps 'bar'"
+  exit 1
+}
+
+# Parse arguments
+image_name=""
+container_base=""
+normal_deps=""
+external_provider_deps=""
+optional_deps=""
+run_config=""
+template_or_config=""
+
+while [[ $# -gt 0 ]]; do
+  key="$1"
+  case "$key" in
+    --image-name)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --image-name requires a string value" >&2
+        usage
+      fi
+      image_name="$2"
+      shift 2
+      ;;
+    --container-base)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --container-base requires a string value" >&2
+        usage
+      fi
+      container_base="$2"
+      shift 2
+      ;;
+    --normal-deps)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --normal-deps requires a string value" >&2
+        usage
+      fi
+      normal_deps="$2"
+      shift 2
+      ;;
+    --external-provider-deps)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --external-provider-deps requires a string value" >&2
+        usage
+      fi
+      external_provider_deps="$2"
+      shift 2
+      ;;
+    --optional-deps)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --optional-deps requires a string value" >&2
+        usage
+      fi
+      optional_deps="$2"
+      shift 2
+      ;;
+    --run-config)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --run-config requires a string value" >&2
+        usage
+      fi
+      run_config="$2"
+      shift 2
+      ;;
+    --template-or-config)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --template-or-config requires a string value" >&2
+        usage
+      fi
+      template_or_config="$2"
+      shift 2
+      ;;
+    *)
+      echo "Unknown option: $1" >&2
+      usage
+      ;;
+  esac
+done
+
+# Check required arguments
+if [[ -z "$image_name" || -z "$container_base" || -z "$normal_deps" ]]; then
+  echo "Error: --image-name, --container-base, and --normal-deps are required." >&2
+  usage
+fi
+
 CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
 CONTAINER_OPTS=${CONTAINER_OPTS:---progress=plain}
-
 TEMP_DIR=$(mktemp -d)
-
 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 source "$SCRIPT_DIR/common.sh"

@ -81,18 +132,15 @@ add_to_container() {
  if [ -t 0 ]; then
    printf '%s\n' "$1" >>"$output_file"
  else
-    # If stdin is not a terminal, read from it (heredoc)
    cat >>"$output_file"
  fi
 }

-# Check if container command is available
 if ! is_command_available "$CONTAINER_BINARY"; then
  printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
  exit 1
 fi

-# Update and install UBI9 components if UBI9 base image is used
 if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
  add_to_container << EOF
 FROM $container_base
@ -135,16 +183,16 @@ EOF

 # Add pip dependencies first since llama-stack is what will change most often
 # so we can reuse layers.
-if [ -n "$pip_dependencies" ]; then
-  read -ra pip_args <<< "$pip_dependencies"
+if [ -n "$normal_deps" ]; then
+  read -ra pip_args <<<  "$normal_deps"
  quoted_deps=$(printf " %q" "${pip_args[@]}")
  add_to_container << EOF
-RUN $MOUNT_CACHE uv pip install $quoted_deps
+RUN $MOUNT_CACHE uv pip install --no-cache $quoted_deps
 EOF
 fi

-if [ -n "$special_pip_deps" ]; then
-  IFS='#' read -ra parts <<<"$special_pip_deps"
+if [ -n "$optional_deps" ]; then
+  IFS='#' read -ra parts <<<"$optional_deps"
  for part in "${parts[@]}"; do
    read -ra pip_args <<< "$part"
    quoted_deps=$(printf " %q" "${pip_args[@]}")
@ -154,7 +202,31 @@ EOF
  done
 fi

-# Function to get Python command
+if [ -n "$external_provider_deps" ]; then
+  IFS='#' read -ra parts <<<"$external_provider_deps"
+  for part in "${parts[@]}"; do
+    add_to_container <<EOF
+RUN uv pip install --no-cache "$part"
+EOF
+    add_to_container <<EOF
+RUN python3 - <<PYTHON | uv pip install --no-cache -r -
+import importlib
+import sys
+
+try:
+    package_name = '$part'.split('==')[0].split('>=')[0].split('<=')[0].split('!=')[0].split('<')[0].split('>')[0]
+    module = importlib.import_module(f'{package_name}.provider')
+    spec = module.get_provider_spec()
+    if hasattr(spec, 'pip_packages') and spec.pip_packages:
+        if isinstance(spec.pip_packages, (list, tuple)):
+            print('\n'.join(spec.pip_packages))
+except Exception as e:
+    print(f'Error getting provider spec for {package_name}: {e}', file=sys.stderr)
+PYTHON
+EOF
+  done
+fi
+
 get_python_cmd() {
    if is_command_available python; then
        echo "python"
@ -167,12 +239,7 @@ get_python_cmd() {
 }

 if [ -n "$run_config" ]; then
-  # Copy the run config to the build context since it's an absolute path
  cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
-
-  # Parse the run.yaml configuration to identify external provider directories
-  # If external providers are specified, copy their directory to the container
-  # and update the configuration to reference the new container path
  python_cmd=$(get_python_cmd)
  external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
  external_providers_dir=$(eval echo "$external_providers_dir")
--- a/llama_stack/distribution/build_venv.sh
+++ b/llama_stack/distribution/build_venv.sh
@ -18,6 +18,76 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
 UV_SYSTEM_PYTHON=${UV_SYSTEM_PYTHON:-}
 VIRTUAL_ENV=${VIRTUAL_ENV:-}

+set -euo pipefail
+
+# Define color codes
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+source "$SCRIPT_DIR/common.sh"
+
+# Usage function
+usage() {
+  echo "Usage: $0 --env-name <env_name> --normal-deps <pip_dependencies> [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
+  echo "Example: $0 --env-name mybuild --normal-deps 'numpy pandas scipy' --external-provider-deps 'foo' --optional-deps 'bar'"
+  exit 1
+}
+
+# Parse arguments
+env_name=""
+normal_deps=""
+external_provider_deps=""
+optional_deps=""
+
+while [[ $# -gt 0 ]]; do
+  key="$1"
+  case "$key" in
+    --env-name)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --env-name requires a string value" >&2
+        usage
+      fi
+      env_name="$2"
+      shift 2
+      ;;
+    --normal-deps)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --normal-deps requires a string value" >&2
+        usage
+      fi
+      normal_deps="$2"
+      shift 2
+      ;;
+    --external-provider-deps)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --external-provider-deps requires a string value" >&2
+        usage
+      fi
+      external_provider_deps="$2"
+      shift 2
+      ;;
+    --optional-deps)
+      if [[ -z "$2" || "$2" == --* ]]; then
+        echo "Error: --optional-deps requires a string value" >&2
+        usage
+      fi
+      optional_deps="$2"
+      shift 2
+      ;;
+    *)
+      echo "Unknown option: $1" >&2
+      usage
+      ;;
+  esac
+done
+
+# Check required arguments
+if [[ -z "$env_name" || -z "$normal_deps" ]]; then
+  echo "Error: --env-name and --normal-deps are required." >&2
+  usage
+fi
+
 if [ -n "$LLAMA_STACK_DIR" ]; then
  echo "Using llama-stack-dir=$LLAMA_STACK_DIR"
 fi
@ -25,29 +95,6 @@ if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
  echo "Using llama-stack-client-dir=$LLAMA_STACK_CLIENT_DIR"
 fi

-if [ "$#" -lt 2 ]; then
-  echo "Usage: $0 <env_name> <pip_dependencies> [<special_pip_deps>]" >&2
-  echo "Example: $0 mybuild ./my-stack-build.yaml 'numpy pandas scipy'" >&2
-  exit 1
-fi
-
-special_pip_deps="$3"
-
-set -euo pipefail
-
-env_name="$1"
-pip_dependencies="$2"
-
-# Define color codes
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-# this is set if we actually create a new conda in which case we need to clean up
-ENVNAME=""
-
-SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
-source "$SCRIPT_DIR/common.sh"
-
 # pre-run checks to make sure we can proceed with the installation
 pre_run_checks() {
  local env_name="$1"
@ -71,49 +118,44 @@ pre_run_checks() {
 }

 run() {
-  local env_name="$1"
-  local pip_dependencies="$2"
-  local special_pip_deps="$3"
-
+  # Use only global variables set by flag parser
  if [ -n "$UV_SYSTEM_PYTHON" ] || [ "$env_name" == "__system__" ]; then
    echo "Installing dependencies in system Python environment"
-    # if env == __system__, ensure we set UV_SYSTEM_PYTHON
    export UV_SYSTEM_PYTHON=1
  elif [ "$VIRTUAL_ENV" == "$env_name" ]; then
    echo "Virtual environment $env_name is already active"
  else
    echo "Using virtual environment $env_name"
    uv venv "$env_name"
-    # shellcheck source=/dev/null
    source "$env_name/bin/activate"
  fi

  if [ -n "$TEST_PYPI_VERSION" ]; then
-    # these packages are damaged in test-pypi, so install them first
    uv pip install fastapi libcst
-    # shellcheck disable=SC2086
-    # we are building a command line so word splitting is expected
    uv pip install --extra-index-url https://test.pypi.org/simple/ \
      --index-strategy unsafe-best-match \
      llama-stack=="$TEST_PYPI_VERSION" \
-      $pip_dependencies
-    if [ -n "$special_pip_deps" ]; then
-      IFS='#' read -ra parts <<<"$special_pip_deps"
+      $normal_deps
+    if [ -n "$optional_deps" ]; then
+      IFS='#' read -ra parts <<<"$optional_deps"
      for part in "${parts[@]}"; do
        echo "$part"
-        # shellcheck disable=SC2086
-        # we are building a command line so word splitting is expected
        uv pip install $part
      done
    fi
+    if [ -n "$external_provider_deps" ]; then
+      IFS='#' read -ra parts <<<"$external_provider_deps"
+      for part in "${parts[@]}"; do
+        echo "$part"
+        uv pip install "$part"
+      done
+    fi
  else
-    # Re-installing llama-stack in the new virtual environment
    if [ -n "$LLAMA_STACK_DIR" ]; then
      if [ ! -d "$LLAMA_STACK_DIR" ]; then
        printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_DIR" >&2
        exit 1
      fi
-
      printf "Installing from LLAMA_STACK_DIR: %s\n"  "$LLAMA_STACK_DIR"
      uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"
    else
@ -125,27 +167,41 @@ run() {
        printf "${RED}Warning: LLAMA_STACK_CLIENT_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_CLIENT_DIR" >&2
        exit 1
      fi
-
      printf "Installing from LLAMA_STACK_CLIENT_DIR: %s\n" "$LLAMA_STACK_CLIENT_DIR"
      uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"
    fi

-    # Install pip dependencies
    printf "Installing pip dependencies\n"
-    # shellcheck disable=SC2086
-    # we are building a command line so word splitting is expected
-    uv pip install $pip_dependencies
-    if [ -n "$special_pip_deps" ]; then
-      IFS='#' read -ra parts <<<"$special_pip_deps"
+    uv pip install $normal_deps
+    if [ -n "$optional_deps" ]; then
+      IFS='#' read -ra parts <<<"$optional_deps"
      for part in "${parts[@]}"; do
-        echo "$part"
-        # shellcheck disable=SC2086
-        # we are building a command line so word splitting is expected
+        echo "Installing special provider module: $part"
        uv pip install $part
      done
    fi
+    if [ -n "$external_provider_deps" ]; then
+      IFS='#' read -ra parts <<<"$external_provider_deps"
+      for part in "${parts[@]}"; do
+        echo "Installing external provider module: $part"
+        uv pip install "$part"
+        echo "Getting provider spec for module: $part and installing dependencies"
+        package_name=$(echo "$part" | sed 's/[<>=!].*//')
+        python3 -c "
+import importlib
+import sys
+try:
+    module = importlib.import_module(f'$package_name.provider')
+    spec = module.get_provider_spec()
+    if hasattr(spec, 'pip_packages') and spec.pip_packages:
+        print('\\n'.join(spec.pip_packages))
+except Exception as e:
+    print(f'Error getting provider spec for $package_name: {e}', file=sys.stderr)
+" | uv pip install -r -
+      done
+    fi
  fi
 }

 pre_run_checks "$env_name"
-run "$env_name" "$pip_dependencies" "$special_pip_deps"
+run
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -12,6 +12,7 @@ from typing import Any
 import yaml
 from pydantic import BaseModel

+from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec
 from llama_stack.distribution.external import load_external_apis
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
@ -97,12 +98,10 @@ def _load_inline_provider_spec(spec_data: dict[str, Any], api: Api, provider_nam
    return spec


-def get_provider_registry(
-    config=None,
-) -> dict[Api, dict[str, ProviderSpec]]:
+def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
    """Get the provider registry, optionally including external providers.

-    This function loads both built-in providers and external providers from YAML files.
+    This function loads both built-in providers and external providers from YAML files or from their provided modules.
    External providers are loaded from a directory structure like:

    providers.d/
@ -123,8 +122,13 @@ def get_provider_registry(
        safety/
          llama-guard.yaml

+    This method is overloaded in that it can be called from a variety of places: during build, during run, during stack construction.
+    So when building external providers from a module, there are scenarios where the pip package required to import the module might not be available yet.
+    There is special handling for all of the potential cases this method can be called from.
+
    Args:
        config: Optional object containing the external providers directory path
+        building: Optional bool delineating whether or not this is being called from a build process

    Returns:
        A dictionary mapping APIs to their available providers
@ -162,46 +166,112 @@ def get_provider_registry(
                "Install the API package to load any in-tree providers for this API."
            )

-    # Check if config has the external_providers_dir attribute
-    if config and hasattr(config, "external_providers_dir") and config.external_providers_dir:
-        external_providers_dir = os.path.abspath(os.path.expanduser(config.external_providers_dir))
-        if not os.path.exists(external_providers_dir):
-            raise FileNotFoundError(f"External providers directory not found: {external_providers_dir}")
-        logger.info(f"Loading external providers from {external_providers_dir}")
+    # Check if config has external providers
+    if config:
+        if hasattr(config, "external_providers_dir") and config.external_providers_dir:
+            registry = get_external_providers_from_dir(registry, config)
+        # else lets check for modules in each provider
+        registry = get_external_providers_from_module(
+            registry=registry,
+            config=config,
+            building=(isinstance(config, BuildConfig) or isinstance(config, DistributionSpec)),
+        )

-        for api in providable_apis():
-            api_name = api.name.lower()
-
-            # Process both remote and inline providers
-            for provider_type in ["remote", "inline"]:
-                api_dir = os.path.join(external_providers_dir, provider_type, api_name)
-                if not os.path.exists(api_dir):
-                    logger.debug(f"No {provider_type} provider directory found for {api_name}")
-                    continue
-
-                # Look for provider spec files in the API directory
-                for spec_path in glob.glob(os.path.join(api_dir, "*.yaml")):
-                    provider_name = os.path.splitext(os.path.basename(spec_path))[0]
-                    logger.info(f"Loading {provider_type} provider spec from {spec_path}")
-
-                    try:
-                        with open(spec_path) as f:
-                            spec_data = yaml.safe_load(f)
-
-                        if provider_type == "remote":
-                            spec = _load_remote_provider_spec(spec_data, api)
-                            provider_type_key = f"remote::{provider_name}"
-                        else:
-                            spec = _load_inline_provider_spec(spec_data, api, provider_name)
-                            provider_type_key = f"inline::{provider_name}"
-                        if provider_type_key in registry[api]:
-                            logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}")
-                        registry[api][provider_type_key] = spec
-                        logger.info(f"Successfully loaded external provider {provider_type_key}")
-                    except yaml.YAMLError as yaml_err:
-                        logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}")
-                        raise yaml_err
-                    except Exception as e:
-                        logger.error(f"Failed to load provider spec from {spec_path}: {e}")
-                        raise e
+    return registry
+
+
+def get_external_providers_from_dir(
+    registry: dict[Api, dict[str, ProviderSpec]], config
+) -> dict[Api, dict[str, ProviderSpec]]:
+    logger.warning(
+        "Specifying external providers via `external_providers_dir` is being deprecated. Please specify `module:` in the provider instead."
+    )
+    external_providers_dir = os.path.abspath(os.path.expanduser(config.external_providers_dir))
+    if not os.path.exists(external_providers_dir):
+        raise FileNotFoundError(f"External providers directory not found: {external_providers_dir}")
+    logger.info(f"Loading external providers from {external_providers_dir}")
+
+    for api in providable_apis():
+        api_name = api.name.lower()
+
+        # Process both remote and inline providers
+        for provider_type in ["remote", "inline"]:
+            api_dir = os.path.join(external_providers_dir, provider_type, api_name)
+            if not os.path.exists(api_dir):
+                logger.debug(f"No {provider_type} provider directory found for {api_name}")
+                continue
+
+            # Look for provider spec files in the API directory
+            for spec_path in glob.glob(os.path.join(api_dir, "*.yaml")):
+                provider_name = os.path.splitext(os.path.basename(spec_path))[0]
+                logger.info(f"Loading {provider_type} provider spec from {spec_path}")
+
+                try:
+                    with open(spec_path) as f:
+                        spec_data = yaml.safe_load(f)
+
+                    if provider_type == "remote":
+                        spec = _load_remote_provider_spec(spec_data, api)
+                        provider_type_key = f"remote::{provider_name}"
+                    else:
+                        spec = _load_inline_provider_spec(spec_data, api, provider_name)
+                        provider_type_key = f"inline::{provider_name}"
+
+                    logger.info(f"Loaded {provider_type} provider spec for {provider_type_key} from {spec_path}")
+                    if provider_type_key in registry[api]:
+                        logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}")
+                    registry[api][provider_type_key] = spec
+                    logger.info(f"Successfully loaded external provider {provider_type_key}")
+                except yaml.YAMLError as yaml_err:
+                    logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}")
+                    raise yaml_err
+                except Exception as e:
+                    logger.error(f"Failed to load provider spec from {spec_path}: {e}")
+                    raise e
+
+    return registry
+
+
+def get_external_providers_from_module(
+    registry: dict[Api, dict[str, ProviderSpec]], config, building: bool
+) -> dict[Api, dict[str, ProviderSpec]]:
+    provider_list = None
+    if isinstance(config, BuildConfig):
+        provider_list = config.distribution_spec.providers.items()
+    else:
+        provider_list = config.providers.items()
+    if provider_list is None:
+        logger.warning("Could not get list of providers from config")
+        return registry
+    for provider_api, providers in provider_list:
+        for provider in providers:
+            if not hasattr(provider, "module") or provider.module is None:
+                continue
+            # get provider using module
+            try:
+                if not building:
+                    package_name = provider.module.split("==")[0]
+                    module = importlib.import_module(f"{package_name}.provider")
+                    # if config class is wrong you will get an error saying module could not be imported
+                    spec = module.get_provider_spec()
+                else:
+                    # pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
+                    spec = ProviderSpec(
+                        api=Api(provider_api),
+                        provider_type=provider.provider_type,
+                        is_external=True,
+                        module=provider.module,
+                        config_class="",
+                    )
+                provider_type = provider.provider_type
+                # in the case we are building we CANNOT import this module of course because it has not been installed.
+                # return a partially filled out spec that the build script will populate.
+                registry[Api(provider_api)][provider_type] = spec
+            except ModuleNotFoundError as exc:
+                raise ValueError(
+                    "get_provider_spec not found. If specifying an external provider via `module` in the Provider spec, the Provider must have the `provider.get_provider_spec` module available"
+                ) from exc
+            except Exception as e:
+                logger.error(f"Failed to load provider spec from module {provider.module}: {e}")
+                raise e
    return registry