feat: consolidate most distros into "starter" (#2516)

# What does this PR do? * Removes a bunch of distros * Removed distros were added into the "starter" distribution * Doc for "starter" has been added * Partially reverts https://github.com/meta-llama/llama-stack/pull/2482 since inference providers are disabled by default and can be turned on manually via env variable. * Disables safety in starter distro Closes: https://github.com/meta-llama/llama-stack/issues/2502. ~Needs: https://github.com/meta-llama/llama-stack/pull/2482 for Ollama to work properly in the CI.~ TODO: - [ ] We can only update `install.sh` when we get a new release. - [x] Update providers documentation - [ ] Update notebooks to reference starter instead of ollama Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-07-04 15:58:03 +02:00 · 2025-07-04 15:58:03 +02:00 · c4349f532b
commit c4349f532b
parent f77d4d91f5
132 changed files with 1009 additions and 10845 deletions
--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
@ -84,7 +84,13 @@ class ProviderImpl(Providers):
                Each API maps to a dictionary of provider IDs to their health responses.
        """
        providers_health: dict[str, dict[str, HealthResponse]] = {}
-        timeout = 1.0
+
+        # The timeout has to be long enough to allow all the providers to be checked, especially in
+        # the case of the inference router health check since it checks all registered inference
+        # providers.
+        # The timeout must not be equal to the one set by health method for a given implementation,
+        # otherwise we will miss some providers.
+        timeout = 3.0

        async def check_provider_health(impl: Any) -> tuple[str, HealthResponse] | None:
            # Skip special implementations (inspect/providers) that don't have provider specs
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -98,6 +98,10 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):

        method = getattr(impls[api], register_method)
        for obj in objects:
+            # Do not register models on disabled providers
+            if hasattr(obj, "provider_id") and obj.provider_id is not None and obj.provider_id == "__disabled__":
+                logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled provider.")
+                continue
            # In complex templates, like our starter template, we may have dynamic model ids
            # given by environment variables. This allows those environment variables to have
            # a default value of __disabled__ to skip registration of the model if not set.
@ -106,6 +110,7 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
                and obj.provider_model_id is not None
                and "__disabled__" in obj.provider_model_id
            ):
+                logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled model.")
                continue
            # we want to maintain the type information in arguments to method.
            # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
@ -149,6 +154,25 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
        result = []
        for i, v in enumerate(config):
            try:
+                # Special handling for providers: first resolve the provider_id to check if provider
+                # is disabled so that we can skip config env variable expansion and avoid validation errors
+                if isinstance(v, dict) and "provider_id" in v:
+                    try:
+                        resolved_provider_id = replace_env_vars(v["provider_id"], f"{path}[{i}].provider_id")
+                        if resolved_provider_id == "__disabled__":
+                            logger.debug(
+                                f"Skipping config env variable expansion for disabled provider: {v.get('provider_id', '')}"
+                            )
+                            # Create a copy with resolved provider_id but original config
+                            disabled_provider = v.copy()
+                            disabled_provider["provider_id"] = resolved_provider_id
+                            result.append(disabled_provider)
+                            continue
+                    except EnvVarError:
+                        # If we can't resolve the provider_id, continue with normal processing
+                        pass
+
+                # Normal processing for non-disabled providers
                result.append(replace_env_vars(v, f"{path}[{i}]"))
            except EnvVarError as e:
                raise EnvVarError(e.var_name, e.path) from None