mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-30 23:51:00 +00:00
Update provider_type -> inline::llama-guard in templates, update run.yaml
This commit is contained in:
parent
15ffceb533
commit
4971113f92
24 changed files with 121 additions and 98 deletions
|
@ -19,15 +19,14 @@ providers:
|
|||
url: http://127.0.0.1:80
|
||||
safety:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
llama_guard_shield:
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
disable_input_check: false
|
||||
disable_output_check: false
|
||||
prompt_guard_shield:
|
||||
model: Prompt-Guard-86M
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
- provider_id: meta1
|
||||
provider_type: inline::prompt-guard
|
||||
config:
|
||||
model: Prompt-Guard-86M
|
||||
memory:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
|
|
|
@ -19,16 +19,16 @@ providers:
|
|||
url: https://api.fireworks.ai/inference
|
||||
# api_key: <ENTER_YOUR_API_KEY>
|
||||
safety:
|
||||
safety:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
llama_guard_shield:
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
disable_input_check: false
|
||||
disable_output_check: false
|
||||
prompt_guard_shield:
|
||||
model: Prompt-Guard-86M
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
- provider_id: meta1
|
||||
provider_type: inline::prompt-guard
|
||||
config:
|
||||
model: Prompt-Guard-86M
|
||||
memory:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
|
|
|
@ -21,7 +21,7 @@ providers:
|
|||
gpu_memory_utilization: 0.4
|
||||
enforce_eager: true
|
||||
max_tokens: 4096
|
||||
- provider_id: vllm-safety
|
||||
- provider_id: vllm-inference-safety
|
||||
provider_type: inline::vllm
|
||||
config:
|
||||
model: Llama-Guard-3-1B
|
||||
|
@ -31,14 +31,15 @@ providers:
|
|||
max_tokens: 4096
|
||||
safety:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
llama_guard_shield:
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
# Uncomment to use prompt guard
|
||||
# prompt_guard_shield:
|
||||
# model: Prompt-Guard-86M
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
# Uncomment to use prompt guard
|
||||
# - provider_id: meta1
|
||||
# provider_type: inline::prompt-guard
|
||||
# config:
|
||||
# model: Prompt-Guard-86M
|
||||
memory:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
|
|
|
@ -13,7 +13,7 @@ apis:
|
|||
- safety
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: meta-reference-inference
|
||||
- provider_id: inference0
|
||||
provider_type: meta-reference
|
||||
config:
|
||||
model: Llama3.2-3B-Instruct
|
||||
|
@ -21,7 +21,7 @@ providers:
|
|||
torch_seed: null
|
||||
max_seq_len: 4096
|
||||
max_batch_size: 1
|
||||
- provider_id: meta-reference-safety
|
||||
- provider_id: inference1
|
||||
provider_type: meta-reference
|
||||
config:
|
||||
model: Llama-Guard-3-1B
|
||||
|
@ -31,11 +31,14 @@ providers:
|
|||
max_batch_size: 1
|
||||
safety:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
llama_guard_shield:
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
- provider_id: meta1
|
||||
provider_type: inline::prompt-guard
|
||||
config:
|
||||
model: Prompt-Guard-86M
|
||||
# Uncomment to use prompt guard
|
||||
# prompt_guard_shield:
|
||||
# model: Prompt-Guard-86M
|
||||
|
|
|
@ -22,17 +22,25 @@ providers:
|
|||
torch_seed: null
|
||||
max_seq_len: 2048
|
||||
max_batch_size: 1
|
||||
- provider_id: meta1
|
||||
provider_type: meta-reference-quantized
|
||||
config:
|
||||
# not a quantized model !
|
||||
model: Llama-Guard-3-1B
|
||||
quantization: null
|
||||
torch_seed: null
|
||||
max_seq_len: 2048
|
||||
max_batch_size: 1
|
||||
safety:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
llama_guard_shield:
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
disable_input_check: false
|
||||
disable_output_check: false
|
||||
prompt_guard_shield:
|
||||
model: Prompt-Guard-86M
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
- provider_id: meta1
|
||||
provider_type: inline::prompt-guard
|
||||
config:
|
||||
model: Prompt-Guard-86M
|
||||
memory:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
|
|
|
@ -19,15 +19,14 @@ providers:
|
|||
url: http://127.0.0.1:14343
|
||||
safety:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
llama_guard_shield:
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
disable_input_check: false
|
||||
disable_output_check: false
|
||||
prompt_guard_shield:
|
||||
model: Prompt-Guard-86M
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
- provider_id: meta1
|
||||
provider_type: inline::prompt-guard
|
||||
config:
|
||||
model: Prompt-Guard-86M
|
||||
memory:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
|
|
|
@ -19,15 +19,14 @@ providers:
|
|||
url: http://127.0.0.1:14343
|
||||
safety:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
llama_guard_shield:
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
disable_input_check: false
|
||||
disable_output_check: false
|
||||
prompt_guard_shield:
|
||||
model: Prompt-Guard-86M
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
- provider_id: meta1
|
||||
provider_type: inline::prompt-guard
|
||||
config:
|
||||
model: Prompt-Guard-86M
|
||||
memory:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
|
|
|
@ -19,15 +19,14 @@ providers:
|
|||
url: http://127.0.0.1:8000
|
||||
safety:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
llama_guard_shield:
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
disable_input_check: false
|
||||
disable_output_check: false
|
||||
prompt_guard_shield:
|
||||
model: Prompt-Guard-86M
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
- provider_id: meta1
|
||||
provider_type: inline::prompt-guard
|
||||
config:
|
||||
model: Prompt-Guard-86M
|
||||
memory:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
|
|
|
@ -19,15 +19,14 @@ providers:
|
|||
url: http://127.0.0.1:5009
|
||||
safety:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
llama_guard_shield:
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
disable_input_check: false
|
||||
disable_output_check: false
|
||||
prompt_guard_shield:
|
||||
model: Prompt-Guard-86M
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
- provider_id: meta1
|
||||
provider_type: inline::prompt-guard
|
||||
config:
|
||||
model: Prompt-Guard-86M
|
||||
memory:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
|
|
|
@ -20,15 +20,14 @@ providers:
|
|||
# api_key: <ENTER_YOUR_API_KEY>
|
||||
safety:
|
||||
- provider_id: meta0
|
||||
provider_type: meta-reference
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
llama_guard_shield:
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
disable_input_check: false
|
||||
disable_output_check: false
|
||||
prompt_guard_shield:
|
||||
model: Prompt-Guard-86M
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
- provider_id: meta1
|
||||
provider_type: inline::prompt-guard
|
||||
config:
|
||||
model: Prompt-Guard-86M
|
||||
memory:
|
||||
- provider_id: meta0
|
||||
provider_type: remote::weaviate
|
||||
|
|
|
@ -36,9 +36,9 @@ the provider types (implementations) you want to use for these APIs.
|
|||
Tip: use <TAB> to see options for the providers.
|
||||
|
||||
> Enter provider for API inference: meta-reference
|
||||
> Enter provider for API safety: meta-reference
|
||||
> Enter provider for API safety: inline::llama-guard
|
||||
> Enter provider for API agents: meta-reference
|
||||
> Enter provider for API memory: meta-reference
|
||||
> Enter provider for API memory: inline::faiss
|
||||
> Enter provider for API datasetio: meta-reference
|
||||
> Enter provider for API scoring: meta-reference
|
||||
> Enter provider for API eval: meta-reference
|
||||
|
@ -203,8 +203,8 @@ distribution_spec:
|
|||
description: Like local, but use ollama for running LLM inference
|
||||
providers:
|
||||
inference: remote::ollama
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
memory: inline::faiss
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
image_type: conda
|
||||
|
|
|
@ -11,6 +11,7 @@ from llama_stack.apis.shields import ShieldType
|
|||
|
||||
from llama_stack.distribution.datatypes import Api, Provider
|
||||
from llama_stack.providers.inline.safety.llama_guard import LlamaGuardConfig
|
||||
from llama_stack.providers.inline.safety.prompt_guard import PromptGuardConfig
|
||||
from llama_stack.providers.remote.safety.bedrock import BedrockSafetyConfig
|
||||
|
||||
from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2
|
||||
|
@ -44,6 +45,22 @@ def safety_llama_guard(safety_model) -> ProviderFixture:
|
|||
)
|
||||
|
||||
|
||||
# TODO: this is not tested yet; we would need to configure the run_shield() test
|
||||
# and parametrize it with the "prompt" for testing depending on the safety fixture
|
||||
# we are using.
|
||||
@pytest.fixture(scope="session")
|
||||
def safety_prompt_guard() -> ProviderFixture:
|
||||
return ProviderFixture(
|
||||
providers=[
|
||||
Provider(
|
||||
provider_id="inline::prompt-guard",
|
||||
provider_type="inline::prompt-guard",
|
||||
config=PromptGuardConfig().model_dump(),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def safety_bedrock() -> ProviderFixture:
|
||||
return ProviderFixture(
|
||||
|
|
|
@ -3,7 +3,7 @@ distribution_spec:
|
|||
description: Use Amazon Bedrock APIs.
|
||||
providers:
|
||||
inference: remote::bedrock
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
memory: inline::faiss
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
|
@ -3,7 +3,7 @@ distribution_spec:
|
|||
description: Use Databricks for running LLM inference
|
||||
providers:
|
||||
inference: remote::databricks
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
memory: inline::faiss
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
|
@ -6,6 +6,6 @@ distribution_spec:
|
|||
memory:
|
||||
- meta-reference
|
||||
- remote::weaviate
|
||||
safety: meta-reference
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
|
@ -3,7 +3,7 @@ distribution_spec:
|
|||
description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
|
||||
providers:
|
||||
inference: remote::hf::endpoint
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
memory: inline::faiss
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
|
@ -3,7 +3,7 @@ distribution_spec:
|
|||
description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
|
||||
providers:
|
||||
inference: remote::hf::serverless
|
||||
memory: meta-reference
|
||||
safety: meta-reference
|
||||
memory: inline::faiss
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
|
@ -8,6 +8,6 @@ distribution_spec:
|
|||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
|
@ -8,6 +8,6 @@ distribution_spec:
|
|||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
|
@ -8,6 +8,6 @@ distribution_spec:
|
|||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
|
@ -7,6 +7,6 @@ distribution_spec:
|
|||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
|
@ -7,6 +7,6 @@ distribution_spec:
|
|||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
|
@ -7,6 +7,6 @@ distribution_spec:
|
|||
- meta-reference
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: meta-reference
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
|
@ -6,6 +6,6 @@ distribution_spec:
|
|||
memory:
|
||||
- meta-reference
|
||||
- remote::weaviate
|
||||
safety: meta-reference
|
||||
safety: inline::llama-guard
|
||||
agents: meta-reference
|
||||
telemetry: meta-reference
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue