move codeshield into an independent safety provider

2025-12-10 03:30:58 +00:00 · 2024-10-08 15:45:50 -07:00 · 2024-10-08 15:45:50 -07:00 · 4540d8bd87
commit 4540d8bd87
parent 380b9dab90
10 changed files with 98 additions and 84 deletions
--- a/llama_stack/providers/impls/meta_reference/safety/shields/base.py
+++ b/llama_stack/providers/impls/meta_reference/safety/shields/base.py
@ -44,7 +44,6 @@ def message_content_as_str(message: Message) -> str:
    return interleaved_text_media_as_str(message.content)


-# For shields that operate on simple strings
 class TextShield(ShieldBase):
    def convert_messages_to_text(self, messages: List[Message]) -> str:
        return "\n".join([message_content_as_str(m) for m in messages])
@ -56,9 +55,3 @@ class TextShield(ShieldBase):
    @abstractmethod
    async def run_impl(self, text: str) -> ShieldResponse:
        raise NotImplementedError()
-
-
-class DummyShield(TextShield):
-    async def run_impl(self, text: str) -> ShieldResponse:
-        # Dummy return LOW to test e2e
-        return ShieldResponse(is_violation=False)
--- a/llama_stack/providers/impls/meta_reference/safety/shields/llama_guard.py
+++ b/llama_stack/providers/impls/meta_reference/safety/shields/llama_guard.py
--- a/llama_stack/providers/impls/meta_reference/safety/shields/prompt_guard.py
+++ b/llama_stack/providers/impls/meta_reference/safety/shields/prompt_guard.py
--- a/llama_stack/providers/impls/meta_reference/safety/safety.py
+++ b/llama_stack/providers/impls/meta_reference/safety/safety.py
@ -12,19 +12,11 @@ from llama_stack.apis.safety import *  # noqa: F403
 from llama_models.llama3.api.datatypes import *  # noqa: F403
 from llama_stack.distribution.datatypes import Api

-from llama_stack.providers.impls.meta_reference.safety.shields.base import (
-    OnViolationAction,
-)
-
+from .base import OnViolationAction, ShieldBase
 from .config import SafetyConfig
+from .llama_guard import LlamaGuardShield
+from .prompt_guard import InjectionShield, JailbreakShield, PromptGuardShield

-from .shields import (
-    CodeScannerShield,
-    InjectionShield,
-    JailbreakShield,
-    LlamaGuardShield,
-    ShieldBase,
-)

 PROMPT_GUARD_MODEL = "Prompt-Guard-86M"

@ -34,7 +26,7 @@ class MetaReferenceSafetyImpl(Safety):
        self.config = config
        self.inference_api = deps[Api.inference]

-        self.available_shields = [ShieldType.code_scanner.value]
+        self.available_shields = []
        if config.llama_guard_shield:
            self.available_shields.append(ShieldType.llama_guard.value)
        if config.enable_prompt_guard:
@ -42,8 +34,6 @@ class MetaReferenceSafetyImpl(Safety):

    async def initialize(self) -> None:
        if self.config.enable_prompt_guard:
-            from .shields import PromptGuardShield
-
            model_dir = model_local_dir(PROMPT_GUARD_MODEL)
            _ = PromptGuardShield.instance(model_dir)

@ -107,7 +97,5 @@ class MetaReferenceSafetyImpl(Safety):
                return JailbreakShield.instance(model_dir)
            else:
                raise ValueError(f"Unknown prompt guard type: {subtype}")
-        elif shield.type == ShieldType.code_scanner.value:
-            return CodeScannerShield.instance()
        else:
            raise ValueError(f"Unknown shield type: {shield.type}")
--- a/llama_stack/providers/impls/meta_reference/safety/shields/init.py
+++ b/llama_stack/providers/impls/meta_reference/safety/shields/init.py
@ -1,33 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# supress warnings and spew of logs from hugging face
-import transformers
-
-from .base import (  # noqa: F401
-    DummyShield,
-    OnViolationAction,
-    ShieldBase,
-    ShieldResponse,
-    TextShield,
-)
-from .code_scanner import CodeScannerShield  # noqa: F401
-from .llama_guard import LlamaGuardShield  # noqa: F401
-from .prompt_guard import (  # noqa: F401
-    InjectionShield,
-    JailbreakShield,
-    PromptGuardShield,
-)
-
-transformers.logging.set_verbosity_error()
-
-import os
-
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-import warnings
-
-warnings.filterwarnings("ignore")
--- a/llama_stack/providers/impls/meta_reference/safety/shields/code_scanner.py
+++ b/llama_stack/providers/impls/meta_reference/safety/shields/code_scanner.py
@ -1,27 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from termcolor import cprint
-
-from .base import ShieldResponse, TextShield
-
-
-class CodeScannerShield(TextShield):
-    async def run_impl(self, text: str) -> ShieldResponse:
-        from codeshield.cs import CodeShield
-
-        cprint(f"Running CodeScannerShield on {text[50:]}", color="magenta")
-        result = await CodeShield.scan_code(text)
-        if result.is_insecure:
-            return ShieldResponse(
-                is_violation=True,
-                violation_type=",".join(
-                    [issue.pattern_id for issue in result.issues_found]
-                ),
-                violation_return_message="Sorry, I found security concerns in the code.",
-            )
-        else:
-            return ShieldResponse(is_violation=False)