ci: test safety with starter

We are now testing the safety capability with the starter image. This includes a few changes: * Enable the safety integration test * Relax the shield model requirements from llama-guard to make it work with llama-guard3:8b coming from Ollama * Expose a shield for each inference provider in the starter distro. The shield will only be registered if the provider is enabled. Shields will be added if the provider claims to support a safety model * Missing providers models have been added too * Pointers to official documentation pages for provider models support have been added Closes: https://github.com/meta-llama/llama-stack/issues/2528 Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-27 00:12:01 +00:00 · 2025-07-04 17:07:11 +02:00 · 2025-07-04 17:07:11 +02:00 · 11c912da0a
commit 11c912da0a
parent cd0ad21111
20 changed files with 621 additions and 126 deletions
--- a/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/llama_stack/providers/remote/inference/runpod/runpod.py
@ -11,7 +11,7 @@ from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.apis.inference import OpenAIEmbeddingsResponse

 # from llama_stack.providers.datatypes import ModelsProtocolPrivate
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, build_hf_repo_model_entry
 from llama_stack.providers.utils.inference.openai_compat import (
    OpenAIChatCompletionToLlamaStackMixin,
    OpenAICompletionToLlamaStackMixin,
@ -25,6 +25,8 @@ from llama_stack.providers.utils.inference.prompt_adapter import (

 from .config import RunpodImplConfig

+# https://docs.runpod.io/serverless/vllm/overview#compatible-models
+# https://github.com/runpod-workers/worker-vllm/blob/main/README.md#compatible-model-architectures
 RUNPOD_SUPPORTED_MODELS = {
    "Llama3.1-8B": "meta-llama/Llama-3.1-8B",
    "Llama3.1-70B": "meta-llama/Llama-3.1-70B",
@ -40,6 +42,14 @@ RUNPOD_SUPPORTED_MODELS = {
    "Llama3.2-3B": "meta-llama/Llama-3.2-3B",
 }

+SAFETY_MODELS_ENTRIES = []
+
+# Create MODEL_ENTRIES from RUNPOD_SUPPORTED_MODELS for compatibility with starter template
+MODEL_ENTRIES = [
+    build_hf_repo_model_entry(provider_model_id, model_descriptor)
+    for provider_model_id, model_descriptor in RUNPOD_SUPPORTED_MODELS.items()
+] + SAFETY_MODELS_ENTRIES
+

 class RunpodInferenceAdapter(
    ModelRegistryHelper,