feat: Code scanner Provider impl for moderations api (#3100)

# What does this PR do?
Add CodeScanner implementations

## Test Plan
`SAFETY_MODEL=CodeScanner LLAMA_STACK_CONFIG=starter uv run pytest -v
tests/integration/safety/test_safety.py
--text-model=llama3.2:3b-instruct-fp16
--embedding-model=all-MiniLM-L6-v2 --safety-shield=ollama`

This PR need to land after this
https://github.com/meta-llama/llama-stack/pull/3098
This commit is contained in:
slekkala1 2025-08-18 14:15:40 -07:00 committed by GitHub
parent 27d6becfd0
commit 7519ab4024
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 144 additions and 24 deletions

View file

@ -27,10 +27,10 @@ def data_url_from_image(file_path):
@pytest.fixture(scope="session")
def code_scanner_shield_id(available_shields):
if "CodeScanner" in available_shields:
return "CodeScanner"
if "code-scanner" in available_shields:
return "code-scanner"
pytest.skip("CodeScanner shield is not available. Skipping.")
pytest.skip("code-scanner shield is not available. Skipping.")
def test_unsafe_examples(client_with_models, shield_id):
@ -163,6 +163,60 @@ def test_safety_with_code_scanner(client_with_models, code_scanner_shield_id, mo
assert response.violation.user_message == "Sorry, I found security concerns in the code."
def test_unsafe_input_with_code_scanner_run_moderation(client_with_models, code_scanner_shield_id, model_providers):
code_safety_enabled = len(CODE_SCANNER_ENABLED_PROVIDERS & model_providers) > 0
if not code_safety_enabled:
pytest.skip(f"Testing code scanner shields is not supported for model_providers {model_providers}")
insecure_code = [
"""
Here is an example of code that hashes a password using MD5:
```
import hashlib
password = 'securepassword123'.encode()
hashed_password = hashlib.md5(password).hexdigest()
print("Insecure hashed password:", hashed_password)
```
"""
]
moderation_object = client_with_models.moderations.create(
input=insecure_code,
model=code_scanner_shield_id,
)
assert moderation_object.results[0].flagged is True, f"Code scanner should have flagged {insecure_code} as insecure"
assert all(value is True for value in moderation_object.results[0].categories.values()), (
"Code scanner shield should have detected code insecure category"
)
def test_safe_input_with_code_scanner_run_moderation(client_with_models, code_scanner_shield_id, model_providers):
code_safety_enabled = len(CODE_SCANNER_ENABLED_PROVIDERS & model_providers) > 0
if not code_safety_enabled:
pytest.skip(f"Testing code scanner shields is not supported for model_providers {model_providers}")
secure_code = [
"""
Extract the first 5 characters from a string:
```
text = "Hello World"
first_five = text[:5]
print(first_five) # Output: "Hello"
# Safe handling for strings shorter than 5 characters
def get_first_five(text):
return text[:5] if text else ""
```
"""
]
moderation_object = client_with_models.moderations.create(
input=secure_code,
model=code_scanner_shield_id,
)
assert moderation_object.results[0].flagged is False, "Code scanner should not have flagged the code as insecure"
# We can use an instance of the LlamaGuard shield to detect attempts to misuse
# the interpreter as this is one of the existing categories it checks for
def test_safety_with_code_interpreter_abuse(client_with_models, shield_id):