forked from phoenix-oss/llama-stack-mirror
feat: add api.llama provider, llama-guard-4 model (#2058)
This PR adds a llama-stack inference provider for `api.llama.com`, as well as adds entries for Llama-Guard-4 and updated Prompt-Guard models.
This commit is contained in:
parent
934446ddb4
commit
4d0bfbf984
21 changed files with 1526 additions and 47 deletions
|
@ -460,15 +460,17 @@ def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
|
|||
from llama_stack.models.llama.sku_list import llama_meta_net_info, resolve_model
|
||||
|
||||
from .model.safety_models import (
|
||||
prompt_guard_download_info,
|
||||
prompt_guard_model_sku,
|
||||
prompt_guard_download_info_map,
|
||||
prompt_guard_model_sku_map,
|
||||
)
|
||||
|
||||
prompt_guard = prompt_guard_model_sku()
|
||||
prompt_guard_model_sku_map = prompt_guard_model_sku_map()
|
||||
prompt_guard_download_info_map = prompt_guard_download_info_map()
|
||||
|
||||
for model_id in model_ids:
|
||||
if model_id == prompt_guard.model_id:
|
||||
model = prompt_guard
|
||||
info = prompt_guard_download_info()
|
||||
if model_id in prompt_guard_model_sku_map.keys():
|
||||
model = prompt_guard_model_sku_map[model_id]
|
||||
info = prompt_guard_download_info_map[model_id]
|
||||
else:
|
||||
model = resolve_model(model_id)
|
||||
if model is None:
|
||||
|
|
|
@ -36,11 +36,11 @@ class ModelDescribe(Subcommand):
|
|||
)
|
||||
|
||||
def _run_model_describe_cmd(self, args: argparse.Namespace) -> None:
|
||||
from .safety_models import prompt_guard_model_sku
|
||||
from .safety_models import prompt_guard_model_sku_map
|
||||
|
||||
prompt_guard = prompt_guard_model_sku()
|
||||
if args.model_id == prompt_guard.model_id:
|
||||
model = prompt_guard
|
||||
prompt_guard_model_map = prompt_guard_model_sku_map()
|
||||
if args.model_id in prompt_guard_model_map.keys():
|
||||
model = prompt_guard_model_map[args.model_id]
|
||||
else:
|
||||
model = resolve_model(args.model_id)
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ class ModelList(Subcommand):
|
|||
)
|
||||
|
||||
def _run_model_list_cmd(self, args: argparse.Namespace) -> None:
|
||||
from .safety_models import prompt_guard_model_sku
|
||||
from .safety_models import prompt_guard_model_skus
|
||||
|
||||
if args.downloaded:
|
||||
return _run_model_list_downloaded_cmd()
|
||||
|
@ -96,7 +96,7 @@ class ModelList(Subcommand):
|
|||
]
|
||||
|
||||
rows = []
|
||||
for model in all_registered_models() + [prompt_guard_model_sku()]:
|
||||
for model in all_registered_models() + prompt_guard_model_skus():
|
||||
if not args.show_all and not model.is_featured:
|
||||
continue
|
||||
|
||||
|
|
|
@ -42,11 +42,12 @@ class ModelRemove(Subcommand):
|
|||
)
|
||||
|
||||
def _run_model_remove_cmd(self, args: argparse.Namespace) -> None:
|
||||
from .safety_models import prompt_guard_model_sku
|
||||
from .safety_models import prompt_guard_model_sku_map
|
||||
|
||||
prompt_guard = prompt_guard_model_sku()
|
||||
if args.model == prompt_guard.model_id:
|
||||
model = prompt_guard
|
||||
prompt_guard_model_map = prompt_guard_model_sku_map()
|
||||
|
||||
if args.model in prompt_guard_model_map.keys():
|
||||
model = prompt_guard_model_map[args.model]
|
||||
else:
|
||||
model = resolve_model(args.model)
|
||||
|
||||
|
|
|
@ -15,11 +15,11 @@ from llama_stack.models.llama.sku_types import CheckpointQuantizationFormat
|
|||
class PromptGuardModel(BaseModel):
|
||||
"""Make a 'fake' Model-like object for Prompt Guard. Eventually this will be removed."""
|
||||
|
||||
model_id: str = "Prompt-Guard-86M"
|
||||
model_id: str
|
||||
huggingface_repo: str
|
||||
description: str = "Prompt Guard. NOTE: this model will not be provided via `llama` CLI soon."
|
||||
is_featured: bool = False
|
||||
huggingface_repo: str = "meta-llama/Prompt-Guard-86M"
|
||||
max_seq_length: int = 2048
|
||||
max_seq_length: int = 512
|
||||
is_instruct_model: bool = False
|
||||
quantization_format: CheckpointQuantizationFormat = CheckpointQuantizationFormat.bf16
|
||||
arch_args: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
@ -30,18 +30,35 @@ class PromptGuardModel(BaseModel):
|
|||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
|
||||
def prompt_guard_model_sku():
|
||||
return PromptGuardModel()
|
||||
def prompt_guard_model_skus():
|
||||
return [
|
||||
PromptGuardModel(model_id="Prompt-Guard-86M", huggingface_repo="meta-llama/Prompt-Guard-86M"),
|
||||
PromptGuardModel(
|
||||
model_id="Llama-Prompt-Guard-2-86M",
|
||||
huggingface_repo="meta-llama/Llama-Prompt-Guard-2-86M",
|
||||
),
|
||||
PromptGuardModel(
|
||||
model_id="Llama-Prompt-Guard-2-22M",
|
||||
huggingface_repo="meta-llama/Llama-Prompt-Guard-2-22M",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def prompt_guard_download_info():
|
||||
return LlamaDownloadInfo(
|
||||
folder="Prompt-Guard",
|
||||
files=[
|
||||
"model.safetensors",
|
||||
"special_tokens_map.json",
|
||||
"tokenizer.json",
|
||||
"tokenizer_config.json",
|
||||
],
|
||||
pth_size=1,
|
||||
)
|
||||
def prompt_guard_model_sku_map() -> Dict[str, Any]:
|
||||
return {model.model_id: model for model in prompt_guard_model_skus()}
|
||||
|
||||
|
||||
def prompt_guard_download_info_map() -> Dict[str, LlamaDownloadInfo]:
|
||||
return {
|
||||
model.model_id: LlamaDownloadInfo(
|
||||
folder="Prompt-Guard" if model.model_id == "Prompt-Guard-86M" else model.model_id,
|
||||
files=[
|
||||
"model.safetensors",
|
||||
"special_tokens_map.json",
|
||||
"tokenizer.json",
|
||||
"tokenizer_config.json",
|
||||
],
|
||||
pth_size=1,
|
||||
)
|
||||
for model in prompt_guard_model_skus()
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue