mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-21 01:15:10 +00:00
feat: Code scanner Provider impl for moderations api (#3100)
# What does this PR do? Add CodeScanner implementations ## Test Plan `SAFETY_MODEL=CodeScanner LLAMA_STACK_CONFIG=starter uv run pytest -v tests/integration/safety/test_safety.py --text-model=llama3.2:3b-instruct-fp16 --embedding-model=all-MiniLM-L6-v2 --safety-shield=ollama` This PR need to land after this https://github.com/meta-llama/llama-stack/pull/3098
This commit is contained in:
parent
27d6becfd0
commit
7519ab4024
9 changed files with 144 additions and 24 deletions
|
@ -27,10 +27,10 @@ def data_url_from_image(file_path):
|
|||
|
||||
@pytest.fixture(scope="session")
|
||||
def code_scanner_shield_id(available_shields):
|
||||
if "CodeScanner" in available_shields:
|
||||
return "CodeScanner"
|
||||
if "code-scanner" in available_shields:
|
||||
return "code-scanner"
|
||||
|
||||
pytest.skip("CodeScanner shield is not available. Skipping.")
|
||||
pytest.skip("code-scanner shield is not available. Skipping.")
|
||||
|
||||
|
||||
def test_unsafe_examples(client_with_models, shield_id):
|
||||
|
@ -163,6 +163,60 @@ def test_safety_with_code_scanner(client_with_models, code_scanner_shield_id, mo
|
|||
assert response.violation.user_message == "Sorry, I found security concerns in the code."
|
||||
|
||||
|
||||
def test_unsafe_input_with_code_scanner_run_moderation(client_with_models, code_scanner_shield_id, model_providers):
|
||||
code_safety_enabled = len(CODE_SCANNER_ENABLED_PROVIDERS & model_providers) > 0
|
||||
if not code_safety_enabled:
|
||||
pytest.skip(f"Testing code scanner shields is not supported for model_providers {model_providers}")
|
||||
|
||||
insecure_code = [
|
||||
"""
|
||||
Here is an example of code that hashes a password using MD5:
|
||||
|
||||
```
|
||||
import hashlib
|
||||
password = 'securepassword123'.encode()
|
||||
hashed_password = hashlib.md5(password).hexdigest()
|
||||
print("Insecure hashed password:", hashed_password)
|
||||
```
|
||||
"""
|
||||
]
|
||||
moderation_object = client_with_models.moderations.create(
|
||||
input=insecure_code,
|
||||
model=code_scanner_shield_id,
|
||||
)
|
||||
assert moderation_object.results[0].flagged is True, f"Code scanner should have flagged {insecure_code} as insecure"
|
||||
assert all(value is True for value in moderation_object.results[0].categories.values()), (
|
||||
"Code scanner shield should have detected code insecure category"
|
||||
)
|
||||
|
||||
|
||||
def test_safe_input_with_code_scanner_run_moderation(client_with_models, code_scanner_shield_id, model_providers):
|
||||
code_safety_enabled = len(CODE_SCANNER_ENABLED_PROVIDERS & model_providers) > 0
|
||||
if not code_safety_enabled:
|
||||
pytest.skip(f"Testing code scanner shields is not supported for model_providers {model_providers}")
|
||||
|
||||
secure_code = [
|
||||
"""
|
||||
Extract the first 5 characters from a string:
|
||||
```
|
||||
text = "Hello World"
|
||||
first_five = text[:5]
|
||||
print(first_five) # Output: "Hello"
|
||||
|
||||
# Safe handling for strings shorter than 5 characters
|
||||
def get_first_five(text):
|
||||
return text[:5] if text else ""
|
||||
```
|
||||
"""
|
||||
]
|
||||
moderation_object = client_with_models.moderations.create(
|
||||
input=secure_code,
|
||||
model=code_scanner_shield_id,
|
||||
)
|
||||
|
||||
assert moderation_object.results[0].flagged is False, "Code scanner should not have flagged the code as insecure"
|
||||
|
||||
|
||||
# We can use an instance of the LlamaGuard shield to detect attempts to misuse
|
||||
# the interpreter as this is one of the existing categories it checks for
|
||||
def test_safety_with_code_interpreter_abuse(client_with_models, shield_id):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue