feat: add api.llama provider, llama-guard-4 model (#2058)

This PR adds a llama-stack inference provider for `api.llama.com`, as
well as adds entries for Llama-Guard-4 and updated Prompt-Guard models.
This commit is contained in:
Ashwin Bharambe 2025-04-29 10:07:41 -07:00 committed by GitHub
parent 934446ddb4
commit 4d0bfbf984
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1526 additions and 47 deletions

View file

@ -460,15 +460,17 @@ def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
from llama_stack.models.llama.sku_list import llama_meta_net_info, resolve_model
from .model.safety_models import (
prompt_guard_download_info,
prompt_guard_model_sku,
prompt_guard_download_info_map,
prompt_guard_model_sku_map,
)
prompt_guard = prompt_guard_model_sku()
prompt_guard_model_sku_map = prompt_guard_model_sku_map()
prompt_guard_download_info_map = prompt_guard_download_info_map()
for model_id in model_ids:
if model_id == prompt_guard.model_id:
model = prompt_guard
info = prompt_guard_download_info()
if model_id in prompt_guard_model_sku_map.keys():
model = prompt_guard_model_sku_map[model_id]
info = prompt_guard_download_info_map[model_id]
else:
model = resolve_model(model_id)
if model is None:

View file

@ -36,11 +36,11 @@ class ModelDescribe(Subcommand):
)
def _run_model_describe_cmd(self, args: argparse.Namespace) -> None:
from .safety_models import prompt_guard_model_sku
from .safety_models import prompt_guard_model_sku_map
prompt_guard = prompt_guard_model_sku()
if args.model_id == prompt_guard.model_id:
model = prompt_guard
prompt_guard_model_map = prompt_guard_model_sku_map()
if args.model_id in prompt_guard_model_map.keys():
model = prompt_guard_model_map[args.model_id]
else:
model = resolve_model(args.model_id)

View file

@ -84,7 +84,7 @@ class ModelList(Subcommand):
)
def _run_model_list_cmd(self, args: argparse.Namespace) -> None:
from .safety_models import prompt_guard_model_sku
from .safety_models import prompt_guard_model_skus
if args.downloaded:
return _run_model_list_downloaded_cmd()
@ -96,7 +96,7 @@ class ModelList(Subcommand):
]
rows = []
for model in all_registered_models() + [prompt_guard_model_sku()]:
for model in all_registered_models() + prompt_guard_model_skus():
if not args.show_all and not model.is_featured:
continue

View file

@ -42,11 +42,12 @@ class ModelRemove(Subcommand):
)
def _run_model_remove_cmd(self, args: argparse.Namespace) -> None:
from .safety_models import prompt_guard_model_sku
from .safety_models import prompt_guard_model_sku_map
prompt_guard = prompt_guard_model_sku()
if args.model == prompt_guard.model_id:
model = prompt_guard
prompt_guard_model_map = prompt_guard_model_sku_map()
if args.model in prompt_guard_model_map.keys():
model = prompt_guard_model_map[args.model]
else:
model = resolve_model(args.model)

View file

@ -15,11 +15,11 @@ from llama_stack.models.llama.sku_types import CheckpointQuantizationFormat
class PromptGuardModel(BaseModel):
"""Make a 'fake' Model-like object for Prompt Guard. Eventually this will be removed."""
model_id: str = "Prompt-Guard-86M"
model_id: str
huggingface_repo: str
description: str = "Prompt Guard. NOTE: this model will not be provided via `llama` CLI soon."
is_featured: bool = False
huggingface_repo: str = "meta-llama/Prompt-Guard-86M"
max_seq_length: int = 2048
max_seq_length: int = 512
is_instruct_model: bool = False
quantization_format: CheckpointQuantizationFormat = CheckpointQuantizationFormat.bf16
arch_args: Dict[str, Any] = Field(default_factory=dict)
@ -30,18 +30,35 @@ class PromptGuardModel(BaseModel):
model_config = ConfigDict(protected_namespaces=())
def prompt_guard_model_sku():
return PromptGuardModel()
def prompt_guard_model_skus():
return [
PromptGuardModel(model_id="Prompt-Guard-86M", huggingface_repo="meta-llama/Prompt-Guard-86M"),
PromptGuardModel(
model_id="Llama-Prompt-Guard-2-86M",
huggingface_repo="meta-llama/Llama-Prompt-Guard-2-86M",
),
PromptGuardModel(
model_id="Llama-Prompt-Guard-2-22M",
huggingface_repo="meta-llama/Llama-Prompt-Guard-2-22M",
),
]
def prompt_guard_download_info():
return LlamaDownloadInfo(
folder="Prompt-Guard",
files=[
"model.safetensors",
"special_tokens_map.json",
"tokenizer.json",
"tokenizer_config.json",
],
pth_size=1,
)
def prompt_guard_model_sku_map() -> Dict[str, Any]:
return {model.model_id: model for model in prompt_guard_model_skus()}
def prompt_guard_download_info_map() -> Dict[str, LlamaDownloadInfo]:
return {
model.model_id: LlamaDownloadInfo(
folder="Prompt-Guard" if model.model_id == "Prompt-Guard-86M" else model.model_id,
files=[
"model.safetensors",
"special_tokens_map.json",
"tokenizer.json",
"tokenizer_config.json",
],
pth_size=1,
)
for model in prompt_guard_model_skus()
}