feat: consolidate most distros into "starter" (#2516)

# What does this PR do?

* Removes a bunch of distros
* Removed distros were added into the "starter" distribution
* Doc for "starter" has been added
* Partially reverts https://github.com/meta-llama/llama-stack/pull/2482
  since inference providers are disabled by default and can be turned on
  manually via env variable.
* Disables safety in starter distro

Closes: https://github.com/meta-llama/llama-stack/issues/2502.

~Needs: https://github.com/meta-llama/llama-stack/pull/2482 for Ollama
to work properly in the CI.~

TODO:

- [ ] We can only update `install.sh` when we get a new release.
- [x] Update providers documentation
- [ ] Update notebooks to reference starter instead of ollama

Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
Sébastien Han 2025-07-04 15:58:03 +02:00 committed by GitHub
parent f77d4d91f5
commit c4349f532b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
132 changed files with 1009 additions and 10845 deletions

View file

@ -84,7 +84,13 @@ class ProviderImpl(Providers):
Each API maps to a dictionary of provider IDs to their health responses.
"""
providers_health: dict[str, dict[str, HealthResponse]] = {}
timeout = 1.0
# The timeout has to be long enough to allow all the providers to be checked, especially in
# the case of the inference router health check since it checks all registered inference
# providers.
# The timeout must not be equal to the one set by health method for a given implementation,
# otherwise we will miss some providers.
timeout = 3.0
async def check_provider_health(impl: Any) -> tuple[str, HealthResponse] | None:
# Skip special implementations (inspect/providers) that don't have provider specs

View file

@ -98,6 +98,10 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
method = getattr(impls[api], register_method)
for obj in objects:
# Do not register models on disabled providers
if hasattr(obj, "provider_id") and obj.provider_id is not None and obj.provider_id == "__disabled__":
logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled provider.")
continue
# In complex templates, like our starter template, we may have dynamic model ids
# given by environment variables. This allows those environment variables to have
# a default value of __disabled__ to skip registration of the model if not set.
@ -106,6 +110,7 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
and obj.provider_model_id is not None
and "__disabled__" in obj.provider_model_id
):
logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled model.")
continue
# we want to maintain the type information in arguments to method.
# instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
@ -149,6 +154,25 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
result = []
for i, v in enumerate(config):
try:
# Special handling for providers: first resolve the provider_id to check if provider
# is disabled so that we can skip config env variable expansion and avoid validation errors
if isinstance(v, dict) and "provider_id" in v:
try:
resolved_provider_id = replace_env_vars(v["provider_id"], f"{path}[{i}].provider_id")
if resolved_provider_id == "__disabled__":
logger.debug(
f"Skipping config env variable expansion for disabled provider: {v.get('provider_id', '')}"
)
# Create a copy with resolved provider_id but original config
disabled_provider = v.copy()
disabled_provider["provider_id"] = resolved_provider_id
result.append(disabled_provider)
continue
except EnvVarError:
# If we can't resolve the provider_id, continue with normal processing
pass
# Normal processing for non-disabled providers
result.append(replace_env_vars(v, f"{path}[{i}]"))
except EnvVarError as e:
raise EnvVarError(e.var_name, e.path) from None