feat: add api.llama provider, llama-guard-4 model (#2058)

This PR adds a llama-stack inference provider for `api.llama.com`, as
well as adds entries for Llama-Guard-4 and updated Prompt-Guard models.
This commit is contained in:
Ashwin Bharambe 2025-04-29 10:07:41 -07:00 committed by GitHub
parent 934446ddb4
commit 4d0bfbf984
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1526 additions and 47 deletions

View file

@ -792,6 +792,13 @@ def llama3_3_instruct_models() -> List[Model]:
@lru_cache
def safety_models() -> List[Model]:
return [
Model(
core_model_id=CoreModelId.llama_guard_4_12b,
description="Llama Guard v4 12b system safety model",
huggingface_repo="meta-llama/Llama-Guard-4-12B",
arch_args={},
pth_file_count=1,
),
Model(
core_model_id=CoreModelId.llama_guard_3_11b_vision,
description="Llama Guard v3 11b vision system safety model",

View file

@ -81,6 +81,7 @@ class CoreModelId(Enum):
llama_guard_2_8b = "Llama-Guard-2-8B"
llama_guard_3_11b_vision = "Llama-Guard-3-11B-Vision"
llama_guard_3_1b = "Llama-Guard-3-1B"
llama_guard_4_12b = "Llama-Guard-4-12B"
def is_multimodal(model_id) -> bool:
@ -148,6 +149,7 @@ def model_family(model_id) -> ModelFamily:
CoreModelId.llama_guard_2_8b,
CoreModelId.llama_guard_3_11b_vision,
CoreModelId.llama_guard_3_1b,
CoreModelId.llama_guard_4_12b,
]:
return ModelFamily.safety
else:
@ -225,5 +227,7 @@ class Model(BaseModel):
CoreModelId.llama_guard_3_1b,
]:
return 131072
elif self.core_model_id == CoreModelId.llama_guard_4_12b:
return 8192
else:
raise ValueError(f"Unknown max_seq_len for {self.core_model_id}")