move codeshield into an independent safety provider

This commit is contained in:
Ashwin Bharambe 2024-10-08 15:45:50 -07:00 committed by Ashwin Bharambe
parent 380b9dab90
commit 4540d8bd87
10 changed files with 98 additions and 84 deletions

View file

@ -44,7 +44,6 @@ def message_content_as_str(message: Message) -> str:
return interleaved_text_media_as_str(message.content)
# For shields that operate on simple strings
class TextShield(ShieldBase):
def convert_messages_to_text(self, messages: List[Message]) -> str:
return "\n".join([message_content_as_str(m) for m in messages])
@ -56,9 +55,3 @@ class TextShield(ShieldBase):
@abstractmethod
async def run_impl(self, text: str) -> ShieldResponse:
raise NotImplementedError()
class DummyShield(TextShield):
async def run_impl(self, text: str) -> ShieldResponse:
# Dummy return LOW to test e2e
return ShieldResponse(is_violation=False)

View file

@ -12,19 +12,11 @@ from llama_stack.apis.safety import * # noqa: F403
from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.distribution.datatypes import Api
from llama_stack.providers.impls.meta_reference.safety.shields.base import (
OnViolationAction,
)
from .base import OnViolationAction, ShieldBase
from .config import SafetyConfig
from .llama_guard import LlamaGuardShield
from .prompt_guard import InjectionShield, JailbreakShield, PromptGuardShield
from .shields import (
CodeScannerShield,
InjectionShield,
JailbreakShield,
LlamaGuardShield,
ShieldBase,
)
PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
@ -34,7 +26,7 @@ class MetaReferenceSafetyImpl(Safety):
self.config = config
self.inference_api = deps[Api.inference]
self.available_shields = [ShieldType.code_scanner.value]
self.available_shields = []
if config.llama_guard_shield:
self.available_shields.append(ShieldType.llama_guard.value)
if config.enable_prompt_guard:
@ -42,8 +34,6 @@ class MetaReferenceSafetyImpl(Safety):
async def initialize(self) -> None:
if self.config.enable_prompt_guard:
from .shields import PromptGuardShield
model_dir = model_local_dir(PROMPT_GUARD_MODEL)
_ = PromptGuardShield.instance(model_dir)
@ -107,7 +97,5 @@ class MetaReferenceSafetyImpl(Safety):
return JailbreakShield.instance(model_dir)
else:
raise ValueError(f"Unknown prompt guard type: {subtype}")
elif shield.type == ShieldType.code_scanner.value:
return CodeScannerShield.instance()
else:
raise ValueError(f"Unknown shield type: {shield.type}")

View file

@ -1,33 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
# supress warnings and spew of logs from hugging face
import transformers
from .base import ( # noqa: F401
DummyShield,
OnViolationAction,
ShieldBase,
ShieldResponse,
TextShield,
)
from .code_scanner import CodeScannerShield # noqa: F401
from .llama_guard import LlamaGuardShield # noqa: F401
from .prompt_guard import ( # noqa: F401
InjectionShield,
JailbreakShield,
PromptGuardShield,
)
transformers.logging.set_verbosity_error()
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import warnings
warnings.filterwarnings("ignore")

View file

@ -1,27 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from termcolor import cprint
from .base import ShieldResponse, TextShield
class CodeScannerShield(TextShield):
async def run_impl(self, text: str) -> ShieldResponse:
from codeshield.cs import CodeShield
cprint(f"Running CodeScannerShield on {text[50:]}", color="magenta")
result = await CodeShield.scan_code(text)
if result.is_insecure:
return ShieldResponse(
is_violation=True,
violation_type=",".join(
[issue.pattern_id for issue in result.issues_found]
),
violation_return_message="Sorry, I found security concerns in the code.",
)
else:
return ShieldResponse(is_violation=False)