This commit is contained in:
Mustafa Elbehery 2025-07-25 11:36:37 +08:00 committed by GitHub
commit 4c867ce690
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 29 additions and 6 deletions

View file

@ -15,6 +15,7 @@ from llama_stack.apis.safety import (
RunShieldResponse,
Safety,
SafetyViolation,
ShieldStore,
ViolationLevel,
)
from llama_stack.apis.shields import Shield
@ -32,6 +33,8 @@ PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
shield_store: ShieldStore
def __init__(self, config: PromptGuardConfig, _deps) -> None:
self.config = config
@ -50,7 +53,7 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
self,
shield_id: str,
messages: list[Message],
params: dict[str, Any] = None,
params: dict[str, Any],
) -> RunShieldResponse:
shield = await self.shield_store.get_shield(shield_id)
if not shield:
@ -114,8 +117,10 @@ class PromptGuardShield:
elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold:
violation = SafetyViolation(
violation_level=ViolationLevel.ERROR,
violation_type=f"prompt_injection:malicious={score_malicious}",
violation_return_message="Sorry, I cannot do this.",
user_message="Sorry, I cannot do this.",
metadata={
"violation_type": f"prompt_injection:malicious={score_malicious}",
},
)
return RunShieldResponse(violation=violation)