diff --git a/docs/cli_reference.md b/docs/cli_reference.md index feded6bac..28874641f 100644 --- a/docs/cli_reference.md +++ b/docs/cli_reference.md @@ -109,7 +109,7 @@ llama download --source meta --model-id Llama3.2-11B-Vision-Instruct --meta-url # llama-agents have safety enabled by default. For this, you will need # safety models -- Llama-Guard and Prompt-Guard llama download --source meta --model-id Prompt-Guard-86M --meta-url META_URL -llama download --source meta --model-id Llama-Guard-3-8B --meta-url META_URL +llama download --source meta --model-id Llama-Guard-3-1B --meta-url META_URL ``` #### Downloading from [Hugging Face](https://huggingface.co/meta-llama) @@ -121,7 +121,7 @@ llama download --source huggingface --model-id Meta-Llama3.1-8B-Instruct --hf-t llama download --source huggingface --model-id Meta-Llama3.1-70B-Instruct --hf-token -llama download --source huggingface --model-id Llama-Guard-3-8B --ignore-patterns *original* +llama download --source huggingface --model-id Llama-Guard-3-1B --ignore-patterns *original* llama download --source huggingface --model-id Prompt-Guard-86M --ignore-patterns *original* ``` @@ -376,7 +376,7 @@ Configuring API: memory (meta-reference-faiss) Configuring API: safety (meta-reference) Do you want to configure llama_guard_shield? (y/n): y Entering sub-configuration for llama_guard_shield: -Enter value for model (default: Llama-Guard-3-8B) (required): +Enter value for model (default: Llama-Guard-3-1B) (required): Enter value for excluded_categories (default: []) (required): Enter value for disable_input_check (default: False) (required): Enter value for disable_output_check (default: False) (required): @@ -398,7 +398,7 @@ After this step is successful, you should be able to find a run configuration sp As you can see, we did basic configuration above and configured: - inference to run on model `Meta-Llama3.1-8B-Instruct` (obtained from `llama model list`) -- Llama Guard safety shield with model `Llama-Guard-3-8B` +- Llama Guard safety shield with model `Llama-Guard-3-1B` - Prompt Guard safety shield with model `Prompt-Guard-86M` For how these configurations are stored as yaml, checkout the file printed at the end of the configuration. diff --git a/docs/getting_started.md b/docs/getting_started.md index af06adee2..4e51bc079 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -329,7 +329,7 @@ Configuring API: memory (meta-reference-faiss) Configuring API: safety (meta-reference) Do you want to configure llama_guard_shield? (y/n): y Entering sub-configuration for llama_guard_shield: -Enter value for model (default: Llama-Guard-3-8B) (required): +Enter value for model (default: Llama-Guard-3-1B) (required): Enter value for excluded_categories (default: []) (required): Enter value for disable_input_check (default: False) (required): Enter value for disable_output_check (default: False) (required): @@ -351,7 +351,7 @@ After this step is successful, you should be able to find a run configuration sp As you can see, we did basic configuration above and configured: - inference to run on model `Meta-Llama3.1-8B-Instruct` (obtained from `llama model list`) -- Llama Guard safety shield with model `Llama-Guard-3-8B` +- Llama Guard safety shield with model `Llama-Guard-3-1B` - Prompt Guard safety shield with model `Prompt-Guard-86M` For how these configurations are stored as yaml, checkout the file printed at the end of the configuration. diff --git a/llama_stack/apis/models/client.py b/llama_stack/apis/models/client.py index dbd26146d..0c26b1b50 100644 --- a/llama_stack/apis/models/client.py +++ b/llama_stack/apis/models/client.py @@ -59,7 +59,7 @@ async def run_main(host: str, port: int, stream: bool): response = await client.get_model("Meta-Llama3.1-8B-Instruct") cprint(f"get_model response={response}", "blue") - response = await client.get_model("Llama-Guard-3-8B") + response = await client.get_model("Llama-Guard-3-1B") cprint(f"get_model response={response}", "red") diff --git a/llama_stack/providers/impls/meta_reference/safety/config.py b/llama_stack/providers/impls/meta_reference/safety/config.py index 9003aa272..734103412 100644 --- a/llama_stack/providers/impls/meta_reference/safety/config.py +++ b/llama_stack/providers/impls/meta_reference/safety/config.py @@ -20,7 +20,7 @@ class MetaReferenceShieldType(Enum): class LlamaGuardShieldConfig(BaseModel): - model: str = "Llama-Guard-3-8B" + model: str = "Llama-Guard-3-1B" excluded_categories: List[str] = [] disable_input_check: bool = False disable_output_check: bool = False @@ -33,7 +33,11 @@ class LlamaGuardShieldConfig(BaseModel): for m in safety_models() if ( m.core_model_id - in {CoreModelId.llama_guard_3_8b, CoreModelId.llama_guard_3_11b_vision} + in { + CoreModelId.llama_guard_3_8b, + CoreModelId.llama_guard_3_1b, + CoreModelId.llama_guard_3_11b_vision, + } ) ] if model not in permitted_models: diff --git a/tests/examples/local-run.yaml b/tests/examples/local-run.yaml index cbe36193c..98d105233 100644 --- a/tests/examples/local-run.yaml +++ b/tests/examples/local-run.yaml @@ -44,7 +44,7 @@ routing_table: - provider_id: meta-reference config: llama_guard_shield: - model: Llama-Guard-3-8B + model: Llama-Guard-3-1B excluded_categories: [] disable_input_check: false disable_output_check: false