feat: consolidate most distros into "starter"

* Removes a bunch of distros
* Removed distros were added into the "starter" distribution
* Doc for "starter" has been added
* Partially reverts https://github.com/meta-llama/llama-stack/pull/2482
  since inference providers are disabled by default and can be turned on
  manually via env variable.
* Disables safety in starter distro

Closes: #2502
Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
Sébastien Han 2025-06-25 16:09:41 +02:00
parent 0ddb293d77
commit bedfea38c3
No known key found for this signature in database
127 changed files with 758 additions and 10771 deletions

View file

@ -26,8 +26,8 @@ class CerebrasImplConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]:
return {
"base_url": DEFAULT_BASE_URL,
"api_key": "${env.CEREBRAS_API_KEY}",
"api_key": api_key,
}

View file

@ -31,7 +31,7 @@ class LlamaCompatConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY:}", **kwargs) -> dict[str, Any]:
return {
"openai_compat_api_base": "https://api.llama.com/compat/v1/",
"api_key": api_key,

View file

@ -53,9 +53,15 @@ class NVIDIAConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
def sample_run_config(
cls,
url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
api_key: str = "${env.NVIDIA_API_KEY:+}",
append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
**kwargs,
) -> dict[str, Any]:
return {
"url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
"api_key": "${env.NVIDIA_API_KEY:+}",
"append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}",
"url": url,
"api_key": api_key,
"append_api_version": append_api_version,
}

View file

@ -13,13 +13,9 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"
class OllamaImplConfig(BaseModel):
url: str = DEFAULT_OLLAMA_URL
raise_on_connect_error: bool = True
@classmethod
def sample_run_config(
cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
) -> dict[str, Any]:
def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
return {
"url": url,
"raise_on_connect_error": raise_on_connect_error,
}

View file

@ -91,7 +91,6 @@ class OllamaInferenceAdapter(
def __init__(self, config: OllamaImplConfig) -> None:
self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
self.url = config.url
self.raise_on_connect_error = config.raise_on_connect_error
@property
def client(self) -> AsyncClient:
@ -105,10 +104,7 @@ class OllamaInferenceAdapter(
logger.debug(f"checking connectivity to Ollama at `{self.url}`...")
health_response = await self.health()
if health_response["status"] == HealthStatus.ERROR:
if self.raise_on_connect_error:
raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
else:
logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
async def health(self) -> HealthResponse:
"""

View file

@ -24,8 +24,10 @@ class PassthroughImplConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
def sample_run_config(
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
) -> dict[str, Any]:
return {
"url": "${env.PASSTHROUGH_URL}",
"api_key": "${env.PASSTHROUGH_API_KEY}",
"url": url,
"api_key": api_key,
}

View file

@ -17,7 +17,11 @@ class TGIImplConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, url: str = "${env.TGI_URL}", **kwargs):
def sample_run_config(
cls,
url: str = "${env.TGI_URL}",
**kwargs,
):
return {
"url": url,
}

View file

@ -327,7 +327,6 @@ class InferenceEndpointAdapter(_HfAdapter):
# Get the inference endpoint details
api = HfApi(token=config.api_token.get_secret_value())
endpoint = api.get_inference_endpoint(config.endpoint_name)
# Wait for the endpoint to be ready (if not already)
endpoint.wait(timeout=60)