feat: consolidate most distros into "starter" (#2516)

# What does this PR do? * Removes a bunch of distros * Removed distros were added into the "starter" distribution * Doc for "starter" has been added * Partially reverts https://github.com/meta-llama/llama-stack/pull/2482 since inference providers are disabled by default and can be turned on manually via env variable. * Disables safety in starter distro Closes: https://github.com/meta-llama/llama-stack/issues/2502. ~Needs: https://github.com/meta-llama/llama-stack/pull/2482 for Ollama to work properly in the CI.~ TODO: - [ ] We can only update `install.sh` when we get a new release. - [x] Update providers documentation - [ ] Update notebooks to reference starter instead of ollama Signed-off-by: Sébastien Han <seb@redhat.com>
2025-10-13 14:08:39 +00:00 · 2025-07-04 15:58:03 +02:00 · 2025-07-04 15:58:03 +02:00 · c4349f532b
commit c4349f532b
parent f77d4d91f5
132 changed files with 1009 additions and 10845 deletions
--- a/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/llama_stack/providers/remote/inference/cerebras/config.py
@ -26,8 +26,8 @@ class CerebrasImplConfig(BaseModel):
    )

    @classmethod
-    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]:
        return {
            "base_url": DEFAULT_BASE_URL,
-            "api_key": "${env.CEREBRAS_API_KEY}",
+            "api_key": api_key,
        }
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@ -13,13 +13,9 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"

 class OllamaImplConfig(BaseModel):
    url: str = DEFAULT_OLLAMA_URL
-    raise_on_connect_error: bool = True

    @classmethod
-    def sample_run_config(
-        cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
-    ) -> dict[str, Any]:
+    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
        return {
            "url": url,
-            "raise_on_connect_error": raise_on_connect_error,
        }
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -94,7 +94,6 @@ class OllamaInferenceAdapter(
    def __init__(self, config: OllamaImplConfig) -> None:
        self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
        self.url = config.url
-        self.raise_on_connect_error = config.raise_on_connect_error

    @property
    def client(self) -> AsyncClient:
@ -108,10 +107,7 @@ class OllamaInferenceAdapter(
        logger.debug(f"checking connectivity to Ollama at `{self.url}`...")
        health_response = await self.health()
        if health_response["status"] == HealthStatus.ERROR:
-            if self.raise_on_connect_error:
-                raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
-            else:
-                logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
+            raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")

    async def health(self) -> HealthResponse:
        """
--- a/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/llama_stack/providers/remote/inference/passthrough/config.py
@ -24,8 +24,10 @@ class PassthroughImplConfig(BaseModel):
    )

    @classmethod
-    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
+    def sample_run_config(
+        cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
+    ) -> dict[str, Any]:
        return {
-            "url": "${env.PASSTHROUGH_URL}",
-            "api_key": "${env.PASSTHROUGH_API_KEY}",
+            "url": url,
+            "api_key": api_key,
        }
--- a/llama_stack/providers/remote/inference/runpod/config.py
+++ b/llama_stack/providers/remote/inference/runpod/config.py
@ -26,5 +26,5 @@ class RunpodImplConfig(BaseModel):
    def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
        return {
            "url": "${env.RUNPOD_URL:=}",
-            "api_token": "${env.RUNPOD_API_TOKEN:=}",
+            "api_token": "${env.RUNPOD_API_TOKEN}",
        }
--- a/llama_stack/providers/remote/inference/tgi/config.py
+++ b/llama_stack/providers/remote/inference/tgi/config.py
@ -17,7 +17,11 @@ class TGIImplConfig(BaseModel):
    )

    @classmethod
-    def sample_run_config(cls, url: str = "${env.TGI_URL}", **kwargs):
+    def sample_run_config(
+        cls,
+        url: str = "${env.TGI_URL}",
+        **kwargs,
+    ):
        return {
            "url": url,
        }
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@ -327,7 +327,6 @@ class InferenceEndpointAdapter(_HfAdapter):
        # Get the inference endpoint details
        api = HfApi(token=config.api_token.get_secret_value())
        endpoint = api.get_inference_endpoint(config.endpoint_name)
-
        # Wait for the endpoint to be ready (if not already)
        endpoint.wait(timeout=60)

--- a/llama_stack/providers/remote/inference/together/config.py
+++ b/llama_stack/providers/remote/inference/together/config.py
@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
        return {
            "url": "https://api.together.xyz/v1",
-            "api_key": "${env.TOGETHER_API_KEY:=}",
+            "api_key": "${env.TOGETHER_API_KEY}",
        }