Make Llama Guard 1B the default

2025-12-03 09:53:45 +00:00 · 2024-10-02 09:48:26 -07:00 · 2024-10-02 09:48:26 -07:00 · 4a75d922a9
commit 4a75d922a9
parent cc5029a716
5 changed files with 14 additions and 10 deletions
--- a/docs/cli_reference.md
+++ b/docs/cli_reference.md
@ -109,7 +109,7 @@ llama download --source meta --model-id Llama3.2-11B-Vision-Instruct --meta-url
 # llama-agents have safety enabled by default. For this, you will need
 # safety models -- Llama-Guard and Prompt-Guard
 llama download --source meta --model-id Prompt-Guard-86M --meta-url META_URL
-llama download --source meta --model-id Llama-Guard-3-8B --meta-url META_URL
+llama download --source meta --model-id Llama-Guard-3-1B --meta-url META_URL
 ```
 #### Downloading from [Hugging Face](https://huggingface.co/meta-llama)
@ -121,7 +121,7 @@ llama download --source huggingface --model-id  Meta-Llama3.1-8B-Instruct --hf-t
 llama download --source huggingface --model-id Meta-Llama3.1-70B-Instruct --hf-token <HF_TOKEN>
-llama download --source huggingface --model-id Llama-Guard-3-8B --ignore-patterns *original*
+llama download --source huggingface --model-id Llama-Guard-3-1B --ignore-patterns *original*
 llama download --source huggingface --model-id Prompt-Guard-86M --ignore-patterns *original*
 ```
@ -376,7 +376,7 @@ Configuring API: memory (meta-reference-faiss)
 Configuring API: safety (meta-reference)
 Do you want to configure llama_guard_shield? (y/n): y
 Entering sub-configuration for llama_guard_shield:
-Enter value for model (default: Llama-Guard-3-8B) (required):
+Enter value for model (default: Llama-Guard-3-1B) (required):
 Enter value for excluded_categories (default: []) (required):
 Enter value for disable_input_check (default: False) (required):
 Enter value for disable_output_check (default: False) (required):
@ -398,7 +398,7 @@ After this step is successful, you should be able to find a run configuration sp
 As you can see, we did basic configuration above and configured:
 - inference to run on model `Meta-Llama3.1-8B-Instruct` (obtained from `llama model list`)
- Llama Guard safety shield with model `Llama-Guard-3-8B`
+- Llama Guard safety shield with model `Llama-Guard-3-1B`
 - Prompt Guard safety shield with model `Prompt-Guard-86M`
 For how these configurations are stored as yaml, checkout the file printed at the end of the configuration.
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@ -329,7 +329,7 @@ Configuring API: memory (meta-reference-faiss)
 Configuring API: safety (meta-reference)
 Do you want to configure llama_guard_shield? (y/n): y
 Entering sub-configuration for llama_guard_shield:
-Enter value for model (default: Llama-Guard-3-8B) (required):
+Enter value for model (default: Llama-Guard-3-1B) (required):
 Enter value for excluded_categories (default: []) (required):
 Enter value for disable_input_check (default: False) (required):
 Enter value for disable_output_check (default: False) (required):
@ -351,7 +351,7 @@ After this step is successful, you should be able to find a run configuration sp
 As you can see, we did basic configuration above and configured:
 - inference to run on model `Meta-Llama3.1-8B-Instruct` (obtained from `llama model list`)
- Llama Guard safety shield with model `Llama-Guard-3-8B`
+- Llama Guard safety shield with model `Llama-Guard-3-1B`
 - Prompt Guard safety shield with model `Prompt-Guard-86M`
 For how these configurations are stored as yaml, checkout the file printed at the end of the configuration.
--- a/llama_stack/apis/models/client.py
+++ b/llama_stack/apis/models/client.py
@ -59,7 +59,7 @@ async def run_main(host: str, port: int, stream: bool):
    response = await client.get_model("Meta-Llama3.1-8B-Instruct")
    cprint(f"get_model response={response}", "blue")
-    response = await client.get_model("Llama-Guard-3-8B")
+    response = await client.get_model("Llama-Guard-3-1B")
    cprint(f"get_model response={response}", "red")
--- a/llama_stack/providers/impls/meta_reference/safety/config.py
+++ b/llama_stack/providers/impls/meta_reference/safety/config.py
@ -20,7 +20,7 @@ class MetaReferenceShieldType(Enum):
 class LlamaGuardShieldConfig(BaseModel):
-    model: str = "Llama-Guard-3-8B"
+    model: str = "Llama-Guard-3-1B"
    excluded_categories: List[str] = []
    disable_input_check: bool = False
    disable_output_check: bool = False
@ -33,7 +33,11 @@ class LlamaGuardShieldConfig(BaseModel):
            for m in safety_models()
            if (
                m.core_model_id
-                in {CoreModelId.llama_guard_3_8b, CoreModelId.llama_guard_3_11b_vision}
+                in {
                    CoreModelId.llama_guard_3_8b,
                    CoreModelId.llama_guard_3_1b,
                    CoreModelId.llama_guard_3_11b_vision,
                }
            )
        ]
        if model not in permitted_models:
--- a/tests/examples/local-run.yaml
+++ b/tests/examples/local-run.yaml
@ -44,7 +44,7 @@ routing_table:
  - provider_id: meta-reference
    config:
      llama_guard_shield:
-        model: Llama-Guard-3-8B
+        model: Llama-Guard-3-1B
        excluded_categories: []
        disable_input_check: false
        disable_output_check: false