feat: add api.llama provider, llama-guard-4 model (#2058)

This PR adds a llama-stack inference provider for `api.llama.com`, as well as adds entries for Llama-Guard-4 and updated Prompt-Guard models.
2025-12-03 18:00:36 +00:00 · 2025-04-29 10:07:41 -07:00 · 2025-04-29 10:07:41 -07:00 · 4d0bfbf984
commit 4d0bfbf984
parent 934446ddb4
21 changed files with 1526 additions and 47 deletions
--- a/llama_stack/cli/model/safety_models.py
+++ b/llama_stack/cli/model/safety_models.py
@ -15,11 +15,11 @@ from llama_stack.models.llama.sku_types import CheckpointQuantizationFormat
 class PromptGuardModel(BaseModel):
    """Make a 'fake' Model-like object for Prompt Guard. Eventually this will be removed."""

-    model_id: str = "Prompt-Guard-86M"
+    model_id: str
+    huggingface_repo: str
    description: str = "Prompt Guard. NOTE: this model will not be provided via `llama` CLI soon."
    is_featured: bool = False
-    huggingface_repo: str = "meta-llama/Prompt-Guard-86M"
-    max_seq_length: int = 2048
+    max_seq_length: int = 512
    is_instruct_model: bool = False
    quantization_format: CheckpointQuantizationFormat = CheckpointQuantizationFormat.bf16
    arch_args: Dict[str, Any] = Field(default_factory=dict)
@ -30,18 +30,35 @@ class PromptGuardModel(BaseModel):
    model_config = ConfigDict(protected_namespaces=())


-def prompt_guard_model_sku():
-    return PromptGuardModel()
+def prompt_guard_model_skus():
+    return [
+        PromptGuardModel(model_id="Prompt-Guard-86M", huggingface_repo="meta-llama/Prompt-Guard-86M"),
+        PromptGuardModel(
+            model_id="Llama-Prompt-Guard-2-86M",
+            huggingface_repo="meta-llama/Llama-Prompt-Guard-2-86M",
+        ),
+        PromptGuardModel(
+            model_id="Llama-Prompt-Guard-2-22M",
+            huggingface_repo="meta-llama/Llama-Prompt-Guard-2-22M",
+        ),
+    ]


-def prompt_guard_download_info():
-    return LlamaDownloadInfo(
-        folder="Prompt-Guard",
-        files=[
-            "model.safetensors",
-            "special_tokens_map.json",
-            "tokenizer.json",
-            "tokenizer_config.json",
-        ],
-        pth_size=1,
-    )
+def prompt_guard_model_sku_map() -> Dict[str, Any]:
+    return {model.model_id: model for model in prompt_guard_model_skus()}
+
+
+def prompt_guard_download_info_map() -> Dict[str, LlamaDownloadInfo]:
+    return {
+        model.model_id: LlamaDownloadInfo(
+            folder="Prompt-Guard" if model.model_id == "Prompt-Guard-86M" else model.model_id,
+            files=[
+                "model.safetensors",
+                "special_tokens_map.json",
+                "tokenizer.json",
+                "tokenizer_config.json",
+            ],
+            pth_size=1,
+        )
+        for model in prompt_guard_model_skus()
+    }