fix prompt guard (#177)

Several other fixes to configure. Add support for 1b/3b models in ollama.
This commit is contained in:
Ashwin Bharambe 2024-10-03 11:07:53 -07:00 committed by GitHub
parent b9b1e8b08b
commit 210b71b0ba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 50 additions and 45 deletions

View file

@ -56,7 +56,7 @@ async def run_main(host: str, port: int, stream: bool):
response = await client.list_models()
cprint(f"list_models response={response}", "green")
response = await client.get_model("Meta-Llama3.1-8B-Instruct")
response = await client.get_model("Llama3.1-8B-Instruct")
cprint(f"get_model response={response}", "blue")
response = await client.get_model("Llama-Guard-3-1B")

View file

@ -23,7 +23,7 @@ if [ "$#" -lt 3 ]; then
exit 1
fi
special_pip_deps="$3"
special_pip_deps="$4"
set -euo pipefail

View file

@ -6,8 +6,15 @@
from typing import Any
from pydantic import BaseModel
from llama_models.sku_list import (
llama3_1_family,
llama3_2_family,
llama3_family,
resolve_model,
safety_models,
)
from pydantic import BaseModel
from llama_stack.distribution.datatypes import * # noqa: F403
from prompt_toolkit import prompt
from prompt_toolkit.validation import Validator
@ -27,6 +34,11 @@ from llama_stack.providers.impls.meta_reference.safety.config import (
)
ALLOWED_MODELS = (
llama3_family() + llama3_1_family() + llama3_2_family() + safety_models()
)
def make_routing_entry_type(config_class: Any):
class BaseModelWithConfig(BaseModel):
routing_key: str
@ -104,7 +116,13 @@ def configure_api_providers(
else:
routing_key = prompt(
"> Please enter the supported model your provider has for inference: ",
default="Meta-Llama3.1-8B-Instruct",
default="Llama3.1-8B-Instruct",
validator=Validator.from_callable(
lambda x: resolve_model(x) is not None,
error_message="Model must be: {}".format(
[x.descriptor() for x in ALLOWED_MODELS]
),
),
)
routing_entries.append(
RoutableProviderConfig(

View file

@ -117,10 +117,10 @@ Provider configurations for each of the APIs provided by this package.
description="""
E.g. The following is a ProviderRoutingEntry for models:
- routing_key: Meta-Llama3.1-8B-Instruct
- routing_key: Llama3.1-8B-Instruct
provider_type: meta-reference
config:
model: Meta-Llama3.1-8B-Instruct
model: Llama3.1-8B-Instruct
quantization: null
torch_seed: null
max_seq_len: 4096

View file

@ -36,7 +36,7 @@ routing_table:
config:
host: localhost
port: 6000
routing_key: Meta-Llama3.1-8B-Instruct
routing_key: Llama3.1-8B-Instruct
safety:
- provider_type: meta-reference
config:

View file

@ -7,6 +7,10 @@
from llama_stack.distribution.datatypes import RemoteProviderConfig
class OllamaImplConfig(RemoteProviderConfig):
port: int = 11434
async def get_adapter_impl(config: RemoteProviderConfig, _deps):
from .ollama import OllamaInferenceAdapter

View file

@ -23,9 +23,10 @@ from llama_stack.providers.utils.inference.routable import RoutableProviderForMo
# TODO: Eventually this will move to the llama cli model list command
# mapping of Model SKUs to ollama models
OLLAMA_SUPPORTED_SKUS = {
# "Llama3.1-8B-Instruct": "llama3.1",
"Llama3.1-8B-Instruct": "llama3.1:8b-instruct-fp16",
"Llama3.1-70B-Instruct": "llama3.1:70b-instruct-fp16",
"Llama3.2-1B-Instruct": "llama3.2:1b-instruct-fp16",
"Llama3.2-3B-Instruct": "llama3.2:3b-instruct-fp16",
}

View file

@ -47,10 +47,6 @@ class LlamaGuardShieldConfig(BaseModel):
return model
class PromptGuardShieldConfig(BaseModel):
model: str = "Prompt-Guard-86M"
class SafetyConfig(BaseModel):
llama_guard_shield: Optional[LlamaGuardShieldConfig] = None
prompt_guard_shield: Optional[PromptGuardShieldConfig] = None
enable_prompt_guard: Optional[bool] = False

View file

@ -6,8 +6,6 @@
from typing import Any, Dict, List
from llama_models.sku_list import resolve_model
from llama_stack.distribution.utils.model_utils import model_local_dir
from llama_stack.apis.inference import * # noqa: F403
from llama_stack.apis.safety import * # noqa: F403
@ -20,21 +18,9 @@ from llama_stack.providers.impls.meta_reference.safety.shields.base import (
from .config import MetaReferenceShieldType, SafetyConfig
from .shields import (
CodeScannerShield,
InjectionShield,
JailbreakShield,
LlamaGuardShield,
PromptGuardShield,
ShieldBase,
)
from .shields import CodeScannerShield, LlamaGuardShield, ShieldBase
def resolve_and_get_path(model_name: str) -> str:
model = resolve_model(model_name)
assert model is not None, f"Could not resolve model {model_name}"
model_dir = model_local_dir(model.descriptor())
return model_dir
PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
class MetaReferenceSafetyImpl(Safety, RoutableProvider):
@ -43,9 +29,10 @@ class MetaReferenceSafetyImpl(Safety, RoutableProvider):
self.inference_api = deps[Api.inference]
async def initialize(self) -> None:
shield_cfg = self.config.prompt_guard_shield
if shield_cfg is not None:
model_dir = resolve_and_get_path(shield_cfg.model)
if self.config.enable_prompt_guard:
from .shields import PromptGuardShield
model_dir = model_local_dir(PROMPT_GUARD_MODEL)
_ = PromptGuardShield.instance(model_dir)
async def shutdown(self) -> None:
@ -108,16 +95,14 @@ class MetaReferenceSafetyImpl(Safety, RoutableProvider):
disable_output_check=cfg.disable_output_check,
)
elif typ == MetaReferenceShieldType.jailbreak_shield:
assert (
cfg.prompt_guard_shield is not None
), "Cannot use Jailbreak Shield since Prompt Guard not present in config"
model_dir = resolve_and_get_path(cfg.prompt_guard_shield.model)
from .shields import JailbreakShield
model_dir = model_local_dir(PROMPT_GUARD_MODEL)
return JailbreakShield.instance(model_dir)
elif typ == MetaReferenceShieldType.injection_shield:
assert (
cfg.prompt_guard_shield is not None
), "Cannot use PromptGuardShield since not present in config"
model_dir = resolve_and_get_path(cfg.prompt_guard_shield.model)
from .shields import InjectionShield
model_dir = model_local_dir(PROMPT_GUARD_MODEL)
return InjectionShield.instance(model_dir)
elif typ == MetaReferenceShieldType.code_scanner_guard:
return CodeScannerShield.instance()

View file

@ -41,6 +41,7 @@ def available_providers() -> List[ProviderSpec]:
adapter=AdapterSpec(
adapter_type="ollama",
pip_packages=["ollama"],
config_class="llama_stack.providers.adapters.inference.ollama.OllamaImplConfig",
module="llama_stack.providers.adapters.inference.ollama",
),
),