Split safety into (llama-guard, prompt-guard, code-scanner) (#400)

Splits the meta-reference safety implementation into three distinct providers:

- inline::llama-guard
- inline::prompt-guard
- inline::code-scanner

Note that this PR is a backward incompatible change to the llama stack server. I have added deprecation_error field to ProviderSpec -- the server reads it and immediately barfs. This is used to direct the user with a specific message on what action to perform. An automagical "config upgrade" is a bit too much work to implement right now :/

(Note that we will be gradually prefixing all inline providers with inline:: -- I am only doing this for this set of new providers because otherwise existing configuration files will break even more badly.)
This commit is contained in:
Ashwin Bharambe 2024-11-11 09:29:18 -08:00 committed by GitHub
parent 6d38b1690b
commit c1f7ba3aed
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
47 changed files with 464 additions and 500 deletions

View file

@ -19,15 +19,14 @@ providers:
url: http://127.0.0.1:80
safety:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::llama-guard
config:
llama_guard_shield:
model: Llama-Guard-3-1B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
model: Llama-Guard-3-1B
excluded_categories: []
- provider_id: meta1
provider_type: inline::prompt-guard
config:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference

View file

@ -19,16 +19,16 @@ providers:
url: https://api.fireworks.ai/inference
# api_key: <ENTER_YOUR_API_KEY>
safety:
safety:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::llama-guard
config:
llama_guard_shield:
model: Llama-Guard-3-1B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
model: Llama-Guard-3-1B
excluded_categories: []
- provider_id: meta1
provider_type: inline::prompt-guard
config:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference

View file

@ -21,7 +21,7 @@ providers:
gpu_memory_utilization: 0.4
enforce_eager: true
max_tokens: 4096
- provider_id: vllm-safety
- provider_id: vllm-inference-safety
provider_type: inline::vllm
config:
model: Llama-Guard-3-1B
@ -31,14 +31,15 @@ providers:
max_tokens: 4096
safety:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::llama-guard
config:
llama_guard_shield:
model: Llama-Guard-3-1B
excluded_categories: []
# Uncomment to use prompt guard
# prompt_guard_shield:
# model: Prompt-Guard-86M
model: Llama-Guard-3-1B
excluded_categories: []
# Uncomment to use prompt guard
# - provider_id: meta1
# provider_type: inline::prompt-guard
# config:
# model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference

View file

@ -13,7 +13,7 @@ apis:
- safety
providers:
inference:
- provider_id: meta-reference-inference
- provider_id: inference0
provider_type: meta-reference
config:
model: Llama3.2-3B-Instruct
@ -21,7 +21,7 @@ providers:
torch_seed: null
max_seq_len: 4096
max_batch_size: 1
- provider_id: meta-reference-safety
- provider_id: inference1
provider_type: meta-reference
config:
model: Llama-Guard-3-1B
@ -31,11 +31,14 @@ providers:
max_batch_size: 1
safety:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::llama-guard
config:
llama_guard_shield:
model: Llama-Guard-3-1B
excluded_categories: []
model: Llama-Guard-3-1B
excluded_categories: []
- provider_id: meta1
provider_type: inline::prompt-guard
config:
model: Prompt-Guard-86M
# Uncomment to use prompt guard
# prompt_guard_shield:
# model: Prompt-Guard-86M

View file

@ -22,17 +22,25 @@ providers:
torch_seed: null
max_seq_len: 2048
max_batch_size: 1
- provider_id: meta1
provider_type: meta-reference-quantized
config:
# not a quantized model !
model: Llama-Guard-3-1B
quantization: null
torch_seed: null
max_seq_len: 2048
max_batch_size: 1
safety:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::llama-guard
config:
llama_guard_shield:
model: Llama-Guard-3-1B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
model: Llama-Guard-3-1B
excluded_categories: []
- provider_id: meta1
provider_type: inline::prompt-guard
config:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference

View file

@ -19,15 +19,14 @@ providers:
url: http://127.0.0.1:14343
safety:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::llama-guard
config:
llama_guard_shield:
model: Llama-Guard-3-1B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
model: Llama-Guard-3-1B
excluded_categories: []
- provider_id: meta1
provider_type: inline::prompt-guard
config:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference

View file

@ -19,15 +19,14 @@ providers:
url: http://127.0.0.1:14343
safety:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::llama-guard
config:
llama_guard_shield:
model: Llama-Guard-3-1B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
model: Llama-Guard-3-1B
excluded_categories: []
- provider_id: meta1
provider_type: inline::prompt-guard
config:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference

View file

@ -19,15 +19,14 @@ providers:
url: http://127.0.0.1:8000
safety:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::llama-guard
config:
llama_guard_shield:
model: Llama-Guard-3-1B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
model: Llama-Guard-3-1B
excluded_categories: []
- provider_id: meta1
provider_type: inline::prompt-guard
config:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference

View file

@ -19,15 +19,14 @@ providers:
url: http://127.0.0.1:5009
safety:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::llama-guard
config:
llama_guard_shield:
model: Llama-Guard-3-1B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
model: Llama-Guard-3-1B
excluded_categories: []
- provider_id: meta1
provider_type: inline::prompt-guard
config:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: meta-reference

View file

@ -20,15 +20,14 @@ providers:
# api_key: <ENTER_YOUR_API_KEY>
safety:
- provider_id: meta0
provider_type: meta-reference
provider_type: inline::llama-guard
config:
llama_guard_shield:
model: Llama-Guard-3-1B
excluded_categories: []
disable_input_check: false
disable_output_check: false
prompt_guard_shield:
model: Prompt-Guard-86M
model: Llama-Guard-3-1B
excluded_categories: []
- provider_id: meta1
provider_type: inline::prompt-guard
config:
model: Prompt-Guard-86M
memory:
- provider_id: meta0
provider_type: remote::weaviate