Push registration methods onto the backing providers

This commit is contained in:
Ashwin Bharambe 2024-10-05 22:17:06 -07:00 committed by Ashwin Bharambe
parent 5a7b01d292
commit 4215cc9331
14 changed files with 269 additions and 220 deletions

View file

@ -261,7 +261,7 @@ class Session(BaseModel):
turns: List[Turn] turns: List[Turn]
started_at: datetime started_at: datetime
memory_bank: Optional[MemoryBank] = None memory_bank: Optional[MemoryBankDef] = None
class AgentConfigCommon(BaseModel): class AgentConfigCommon(BaseModel):

View file

@ -14,6 +14,7 @@ from pydantic import BaseModel, Field
from typing_extensions import Annotated from typing_extensions import Annotated
from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.models import * # noqa: F403
class LogProbConfig(BaseModel): class LogProbConfig(BaseModel):
@ -203,3 +204,12 @@ class Inference(Protocol):
model: str, model: str,
contents: List[InterleavedTextMedia], contents: List[InterleavedTextMedia],
) -> EmbeddingsResponse: ... ) -> EmbeddingsResponse: ...
@webmethod(route="/inference/register_model")
async def register_model(self, model: ModelDef) -> None: ...
@webmethod(route="/inference/list_models")
async def list_models(self) -> List[ModelDef]: ...
@webmethod(route="/inference/get_model")
async def get_model(self, identifier: str) -> Optional[ModelDef]: ...

View file

@ -15,6 +15,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.memory_banks import * # noqa: F403
@json_schema_type @json_schema_type
@ -76,3 +77,12 @@ class Memory(Protocol):
bank_id: str, bank_id: str,
document_ids: List[str], document_ids: List[str],
) -> None: ... ) -> None: ...
@webmethod(route="/memory/register_memory_bank")
async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None: ...
@webmethod(route="/memory/list_memory_banks")
async def list_memory_banks(self) -> List[MemoryBankDef]: ...
@webmethod(route="/memory/get_memory_bank")
async def get_memory_bank(self, identifier: str) -> Optional[MemoryBankDef]: ...

View file

@ -11,6 +11,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
from pydantic import BaseModel from pydantic import BaseModel
from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.shields import * # noqa: F403
@json_schema_type @json_schema_type
@ -42,3 +43,12 @@ class Safety(Protocol):
async def run_shield( async def run_shield(
self, shield_type: str, messages: List[Message], params: Dict[str, Any] = None self, shield_type: str, messages: List[Message], params: Dict[str, Any] = None
) -> RunShieldResponse: ... ) -> RunShieldResponse: ...
@webmethod(route="/safety/register_shield")
async def register_shield(self, shield: ShieldDef) -> None: ...
@webmethod(route="/safety/list_shields")
async def list_shields(self) -> List[ShieldDef]: ...
@webmethod(route="/safety/get_shield")
async def get_shield(self, identifier: str) -> Optional[ShieldDef]: ...

View file

@ -14,6 +14,9 @@ from llama_stack.providers.datatypes import * # noqa: F403
from llama_stack.apis.models import * # noqa: F403 from llama_stack.apis.models import * # noqa: F403
from llama_stack.apis.shields import * # noqa: F403 from llama_stack.apis.shields import * # noqa: F403
from llama_stack.apis.memory_banks import * # noqa: F403 from llama_stack.apis.memory_banks import * # noqa: F403
from llama_stack.apis.inference import Inference
from llama_stack.apis.memory import Memory
from llama_stack.apis.safety import Safety
LLAMA_STACK_BUILD_CONFIG_VERSION = "2" LLAMA_STACK_BUILD_CONFIG_VERSION = "2"
@ -23,6 +26,19 @@ LLAMA_STACK_RUN_CONFIG_VERSION = "2"
RoutingKey = Union[str, List[str]] RoutingKey = Union[str, List[str]]
RoutableObject = Union[
ModelDef,
ShieldDef,
MemoryBankDef,
]
RoutedProtocol = Union[
Inference,
Safety,
Memory,
]
class GenericProviderConfig(BaseModel): class GenericProviderConfig(BaseModel):
provider_type: str provider_type: str
config: Dict[str, Any] config: Dict[str, Any]
@ -56,6 +72,7 @@ class RoutingTableProviderSpec(ProviderSpec):
docker_image: Optional[str] = None docker_image: Optional[str] = None
router_api: Api router_api: Api
registry: List[RoutableObject]
module: str module: str
pip_packages: List[str] = Field(default_factory=list) pip_packages: List[str] = Field(default_factory=list)

View file

@ -28,46 +28,48 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
""" """
all_api_providers = get_provider_registry() all_api_providers = get_provider_registry()
auto_routed_apis = builtin_automatically_routed_apis() routing_table_apis = set(
x.routing_table_api for x in builtin_automatically_routed_apis()
)
router_apis = set(x.router_api for x in builtin_automatically_routed_apis())
providers_with_specs = {} providers_with_specs = {}
for api_str, instances in run_config.providers.items(): for api_str, providers in run_config.providers.items():
api = Api(api_str) api = Api(api_str)
if api in [a.routing_table_api for a in auto_routed_apis]: if api in routing_table_apis:
raise ValueError( raise ValueError(
f"Provider for `{api_str}` is automatically provided and cannot be overridden" f"Provider for `{api_str}` is automatically provided and cannot be overridden"
) )
providers_with_specs[api] = {} specs = {}
for config in instances: for provider in providers:
if config.provider_type not in all_api_providers[api]: if provider.provider_type not in all_api_providers[api]:
raise ValueError( raise ValueError(
f"Provider `{config.provider_type}` is not available for API `{api}`" f"Provider `{provider.provider_type}` is not available for API `{api}`"
) )
spec = ProviderWithSpec( spec = ProviderWithSpec(
spec=all_api_providers[api][config.provider_type], spec=all_api_providers[api][provider.provider_type],
**config, **(provider.dict()),
) )
providers_with_specs[api][spec.provider_id] = spec specs[provider.provider_id] = spec
key = api_str if api not in router_apis else f"inner-{api_str}"
providers_with_specs[key] = specs
apis_to_serve = run_config.apis_to_serve or set( apis_to_serve = run_config.apis_to_serve or set(
list(providers_with_specs.keys()) list(providers_with_specs.keys()) + list(routing_table_apis)
+ [a.routing_table_api.value for a in auto_routed_apis]
) )
for info in builtin_automatically_routed_apis(): for info in builtin_automatically_routed_apis():
if info.router_api.value not in apis_to_serve: if info.router_api.value not in apis_to_serve:
continue continue
if info.routing_table_api.value not in run_config: available_providers = providers_with_specs[f"inner-{info.router_api.value}"]
raise ValueError(
f"Registry for `{info.routing_table_api.value}` is not provided?"
)
available_providers = providers_with_specs[info.router_api]
inner_deps = [] inner_deps = []
registry = run_config[info.routing_table_api.value] registry = getattr(run_config, info.routing_table_api.value)
for entry in registry: for entry in registry:
if entry.provider_id not in available_providers: if entry.provider_id not in available_providers:
raise ValueError( raise ValueError(
@ -77,74 +79,70 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
provider = available_providers[entry.provider_id] provider = available_providers[entry.provider_id]
inner_deps.extend(provider.spec.api_dependencies) inner_deps.extend(provider.spec.api_dependencies)
providers_with_specs[info.routing_table_api] = { providers_with_specs[info.routing_table_api.value] = {
"__builtin__": [ "__builtin__": ProviderWithSpec(
ProviderWithSpec( provider_id="__builtin__",
provider_id="__builtin__", provider_type="__routing_table__",
provider_type="__builtin__", config={},
config=registry, spec=RoutingTableProviderSpec(
spec=RoutingTableProviderSpec( api=info.routing_table_api,
api=info.routing_table_api, router_api=info.router_api,
router_api=info.router_api, registry=registry,
module="llama_stack.distribution.routers", module="llama_stack.distribution.routers",
api_dependencies=inner_deps, api_dependencies=inner_deps,
), ),
) )
]
} }
providers_with_specs[info.router_api] = { providers_with_specs[info.router_api.value] = {
"__builtin__": [ "__builtin__": ProviderWithSpec(
ProviderWithSpec( provider_id="__builtin__",
provider_id="__builtin__", provider_type="__autorouted__",
provider_type="__builtin__", config={},
config={}, spec=AutoRoutedProviderSpec(
spec=AutoRoutedProviderSpec( api=info.router_api,
api=info.router_api, module="llama_stack.distribution.routers",
module="llama_stack.distribution.routers", routing_table_api=info.routing_table_api,
routing_table_api=source_api, api_dependencies=[info.routing_table_api],
api_dependencies=[source_api], ),
), )
)
]
} }
sorted_providers = topological_sort(providers_with_specs) sorted_providers = topological_sort(
{k: v.values() for k, v in providers_with_specs.items()}
)
sorted_providers.append( sorted_providers.append(
ProviderWithSpec( (
provider_id="__builtin__", "inspect",
provider_type="__builtin__", ProviderWithSpec(
config={}, provider_id="__builtin__",
spec=InlineProviderSpec(
api=Api.inspect,
provider_type="__builtin__", provider_type="__builtin__",
config_class="llama_stack.distribution.inspect.DistributionInspectConfig", config={},
module="llama_stack.distribution.inspect", spec=InlineProviderSpec(
api=Api.inspect,
provider_type="__builtin__",
config_class="llama_stack.distribution.inspect.DistributionInspectConfig",
module="llama_stack.distribution.inspect",
),
), ),
) )
) )
print(f"Resolved {len(sorted_providers)} providers in topological order") print(f"Resolved {len(sorted_providers)} providers in topological order")
for provider in sorted_providers: for api_str, provider in sorted_providers:
print( print(f" {api_str}: ({provider.provider_id}) {provider.spec.provider_type}")
f" {provider.spec.api}: ({provider.provider_id}) {provider.spec.provider_type}"
)
print("") print("")
impls = {} impls = {}
inner_impls_by_provider_id = {f"inner-{x}": {} for x in router_apis}
impls_by_provider_id = {} for api_str, provider in sorted_providers:
for provider in sorted_providers: deps = {a: impls[a] for a in provider.spec.api_dependencies}
api = provider.spec.api
if api not in impls_by_provider_id:
impls_by_provider_id[api] = {}
deps = {api: impls[api] for api in provider.spec.api_dependencies}
inner_impls = {} inner_impls = {}
if isinstance(provider.spec, RoutingTableProviderSpec): if isinstance(provider.spec, RoutingTableProviderSpec):
for entry in provider.config: for entry in provider.spec.registry:
inner_impls[entry.provider_id] = impls_by_provider_id[ inner_impls[entry.provider_id] = inner_impls_by_provider_id[
provider.spec.router_api f"inner-{provider.spec.router_api.value}"
][entry.provider_id] ][entry.provider_id]
impl = await instantiate_provider( impl = await instantiate_provider(
@ -152,37 +150,46 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
deps, deps,
inner_impls, inner_impls,
) )
if "inner-" in api_str:
impls[api] = impl inner_impls_by_provider_id[api_str][provider.provider_id] = impl
impls_by_provider_id[api][provider.provider_id] = impl else:
api = Api(api_str)
impls[api] = impl
return impls return impls
def topological_sort( def topological_sort(
providers_with_specs: Dict[Api, List[ProviderWithSpec]], providers_with_specs: Dict[str, List[ProviderWithSpec]],
) -> List[ProviderWithSpec]: ) -> List[ProviderWithSpec]:
def dfs(kv, visited: Set[Api], stack: List[Api]): def dfs(kv, visited: Set[str], stack: List[str]):
api, providers = kv api_str, providers = kv
visited.add(api) visited.add(api_str)
deps = [dep for x in providers for dep in x.api_dependencies] deps = []
for api in deps: for provider in providers:
if api not in visited: for dep in provider.spec.api_dependencies:
dfs((api, providers_with_specs[api]), visited, stack) deps.append(dep.value)
if isinstance(provider, AutoRoutedProviderSpec):
deps.append(f"inner-{provider.api}")
stack.append(api) for dep in deps:
if dep not in visited:
dfs((dep, providers_with_specs[dep]), visited, stack)
stack.append(api_str)
visited = set() visited = set()
stack = [] stack = []
for api, providers in providers_with_specs.items(): for api_str, providers in providers_with_specs.items():
if api not in visited: if api_str not in visited:
dfs((api, providers), visited, stack) dfs((api_str, providers), visited, stack)
flattened = [] flattened = []
for api in stack: for api_str in stack:
flattened.extend(providers_with_specs[api]) for provider in providers_with_specs[api_str]:
flattened.append((api_str, provider))
return flattened return flattened
@ -202,9 +209,8 @@ async def instantiate_provider(
else: else:
method = "get_client_impl" method = "get_client_impl"
assert isinstance(provider_config, GenericProviderConfig)
config_type = instantiate_class_type(provider_spec.config_class) config_type = instantiate_class_type(provider_spec.config_class)
config = config_type(**provider_config.config) config = config_type(**provider.config)
args = [config, deps] args = [config, deps]
elif isinstance(provider_spec, AutoRoutedProviderSpec): elif isinstance(provider_spec, AutoRoutedProviderSpec):
method = "get_auto_router_impl" method = "get_auto_router_impl"
@ -214,17 +220,13 @@ async def instantiate_provider(
elif isinstance(provider_spec, RoutingTableProviderSpec): elif isinstance(provider_spec, RoutingTableProviderSpec):
method = "get_routing_table_impl" method = "get_routing_table_impl"
assert isinstance(provider_config, list)
registry = provider_config
config = None config = None
args = [provider_spec.api, registry, inner_impls, deps] args = [provider_spec.api, provider_spec.registry, inner_impls, deps]
else: else:
method = "get_provider_impl" method = "get_provider_impl"
assert isinstance(provider_config, GenericProviderConfig)
config_type = instantiate_class_type(provider_spec.config_class) config_type = instantiate_class_type(provider_spec.config_class)
config = config_type(**provider_config.config) config = config_type(**provider.config)
args = [config, deps] args = [config, deps]
fn = getattr(module, method) fn = getattr(module, method)

View file

@ -10,8 +10,6 @@ from llama_stack.distribution.datatypes import * # noqa: F403
from .routing_tables import ( from .routing_tables import (
MemoryBanksRoutingTable, MemoryBanksRoutingTable,
ModelsRoutingTable, ModelsRoutingTable,
RoutableObject,
RoutedProtocol,
ShieldsRoutingTable, ShieldsRoutingTable,
) )

View file

@ -4,33 +4,17 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from typing import Any, List, Optional, Union from typing import Any, List, Optional
from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.models import * # noqa: F403 from llama_stack.apis.models import * # noqa: F403
from llama_stack.apis.shields import * # noqa: F403 from llama_stack.apis.shields import * # noqa: F403
from llama_stack.apis.memory_banks import * # noqa: F403 from llama_stack.apis.memory_banks import * # noqa: F403
from llama_stack.apis.inference import Inference
from llama_stack.apis.memory import Memory
from llama_stack.apis.safety import Safety
from llama_stack.distribution.datatypes import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403
RoutableObject = Union[
ModelDef,
ShieldDef,
MemoryBankDef,
]
RoutedProtocol = Union[
Inference,
Safety,
Memory,
]
class CommonRoutingTableImpl(RoutingTable): class CommonRoutingTableImpl(RoutingTable):
def __init__( def __init__(
self, self,
@ -46,19 +30,14 @@ class CommonRoutingTableImpl(RoutingTable):
self.impls_by_provider_id = impls_by_provider_id self.impls_by_provider_id = impls_by_provider_id
self.registry = registry self.registry = registry
async def initialize(self) -> None: self.routing_key_to_object = {}
keys_by_provider = {}
for obj in self.registry: for obj in self.registry:
keys = keys_by_provider.setdefault(obj.provider_id, []) self.routing_key_to_object[obj.identifier] = obj
keys.append(obj.routing_key)
for provider_id, keys in keys_by_provider.items(): async def initialize(self) -> None:
p = self.impls_by_provider_id[provider_id] for obj in self.registry:
spec = p.__provider_spec__ p = self.impls_by_provider_id[obj.provider_id]
if is_passthrough(spec): await self.register_object(obj, p)
continue
await p.validate_routing_keys(keys)
async def shutdown(self) -> None: async def shutdown(self) -> None:
pass pass
@ -75,8 +54,24 @@ class CommonRoutingTableImpl(RoutingTable):
return obj return obj
return None return None
def register_object(self, obj: RoutableObject) -> None:
if obj.identifier in self.routing_key_to_object:
raise ValueError(f"Object `{obj.identifier}` already registered")
if obj.provider_id not in self.impls_by_provider_id:
raise ValueError(f"Provider `{obj.provider_id}` not found")
p = self.impls_by_provider_id[obj.provider_id]
await p.register_object(obj)
self.routing_key_to_object[obj.identifier] = obj
self.registry.append(obj)
class ModelsRoutingTable(CommonRoutingTableImpl, Models): class ModelsRoutingTable(CommonRoutingTableImpl, Models):
async def register_object(self, obj: ModelDef, p: Inference) -> None:
await p.register_model(obj)
async def list_models(self) -> List[ModelDef]: async def list_models(self) -> List[ModelDef]:
return self.registry return self.registry
@ -84,10 +79,13 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
return self.get_object_by_identifier(identifier) return self.get_object_by_identifier(identifier)
async def register_model(self, model: ModelDef) -> None: async def register_model(self, model: ModelDef) -> None:
raise NotImplementedError() await self.register_object(model)
class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
async def register_object(self, obj: ShieldDef, p: Safety) -> None:
await p.register_shield(obj)
async def list_shields(self) -> List[ShieldDef]: async def list_shields(self) -> List[ShieldDef]:
return self.registry return self.registry
@ -95,10 +93,13 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
return self.get_object_by_identifier(shield_type) return self.get_object_by_identifier(shield_type)
async def register_shield(self, shield: ShieldDef) -> None: async def register_shield(self, shield: ShieldDef) -> None:
raise NotImplementedError() await self.register_object(shield)
class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks): class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
async def register_object(self, obj: MemoryBankDef, p: Memory) -> None:
await p.register_memory_bank(obj)
async def list_memory_banks(self) -> List[MemoryBankDef]: async def list_memory_banks(self) -> List[MemoryBankDef]:
return self.registry return self.registry
@ -106,4 +107,4 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
return self.get_object_by_identifier(identifier) return self.get_object_by_identifier(identifier)
async def register_memory_bank(self, bank: MemoryBankDef) -> None: async def register_memory_bank(self, bank: MemoryBankDef) -> None:
raise NotImplementedError() await self.register_object(bank)

View file

@ -6,28 +6,23 @@
from together import Together from together import Together
from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.safety import ( from llama_stack.apis.safety import * # noqa: F403
RunShieldResponse,
Safety,
SafetyViolation,
ViolationLevel,
)
from llama_stack.distribution.datatypes import RoutableProvider
from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.distribution.request_headers import NeedsRequestProviderData
from .config import TogetherSafetyConfig from .config import TogetherSafetyConfig
SAFETY_SHIELD_TYPES = { SAFETY_SHIELD_MODEL_MAP = {
"llama_guard": "meta-llama/Meta-Llama-Guard-3-8B", "llama_guard": "meta-llama/Meta-Llama-Guard-3-8B",
"Llama-Guard-3-8B": "meta-llama/Meta-Llama-Guard-3-8B", "Llama-Guard-3-8B": "meta-llama/Meta-Llama-Guard-3-8B",
"Llama-Guard-3-11B-Vision": "meta-llama/Llama-Guard-3-11B-Vision-Turbo", "Llama-Guard-3-11B-Vision": "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
} }
class TogetherSafetyImpl(Safety, NeedsRequestProviderData, RoutableProvider): class TogetherSafetyImpl(Safety, NeedsRequestProviderData):
def __init__(self, config: TogetherSafetyConfig) -> None: def __init__(self, config: TogetherSafetyConfig) -> None:
self.config = config self.config = config
self.register_shields = []
async def initialize(self) -> None: async def initialize(self) -> None:
pass pass
@ -35,16 +30,31 @@ class TogetherSafetyImpl(Safety, NeedsRequestProviderData, RoutableProvider):
async def shutdown(self) -> None: async def shutdown(self) -> None:
pass pass
async def validate_routing_keys(self, routing_keys: List[str]) -> None: async def register_shield(self, shield: ShieldDef) -> None:
for key in routing_keys: if shield.type != ShieldType.llama_guard.value:
if key not in SAFETY_SHIELD_TYPES: raise ValueError(f"Unsupported safety shield type: {shield.type}")
raise ValueError(f"Unknown safety shield type: {key}")
self.registered_shields.append(shield)
async def list_shields(self) -> List[ShieldDef]:
return self.registered_shields
async def get_shield(self, identifier: str) -> Optional[ShieldDef]:
for shield in self.registered_shields:
if shield.identifier == identifier:
return shield
return None
async def run_shield( async def run_shield(
self, shield_type: str, messages: List[Message], params: Dict[str, Any] = None self, shield_type: str, messages: List[Message], params: Dict[str, Any] = None
) -> RunShieldResponse: ) -> RunShieldResponse:
if shield_type not in SAFETY_SHIELD_TYPES: shield_def = await self.get_shield(shield_type)
raise ValueError(f"Unknown safety shield type: {shield_type}") if not shield_def:
raise ValueError(f"Unknown shield {shield_type}")
model = shield_def.params.get("model", "llama_guard")
if model not in SAFETY_SHIELD_MODEL_MAP:
raise ValueError(f"Unsupported safety model: {model}")
together_api_key = None together_api_key = None
if self.config.api_key is not None: if self.config.api_key is not None:
@ -57,17 +67,13 @@ class TogetherSafetyImpl(Safety, NeedsRequestProviderData, RoutableProvider):
) )
together_api_key = provider_data.together_api_key together_api_key = provider_data.together_api_key
model_name = SAFETY_SHIELD_TYPES[shield_type]
# messages can have role assistant or user # messages can have role assistant or user
api_messages = [] api_messages = []
for message in messages: for message in messages:
if message.role in (Role.user.value, Role.assistant.value): if message.role in (Role.user.value, Role.assistant.value):
api_messages.append({"role": message.role, "content": message.content}) api_messages.append({"role": message.role, "content": message.content})
violation = await get_safety_response( violation = await get_safety_response(together_api_key, model, api_messages)
together_api_key, model_name, api_messages
)
return RunShieldResponse(violation=violation) return RunShieldResponse(violation=violation)

View file

@ -48,16 +48,6 @@ class RoutingTable(Protocol):
def get_provider_impl(self, routing_key: str) -> Any: ... def get_provider_impl(self, routing_key: str) -> Any: ...
class RoutableProvider(Protocol):
"""
A provider which sits behind the RoutingTable and can get routed to.
All Inference / Safety / Memory providers fall into this bucket.
"""
async def validate_routing_keys(self, keys: List[str]) -> None: ...
@json_schema_type @json_schema_type
class AdapterSpec(BaseModel): class AdapterSpec(BaseModel):
adapter_type: str = Field( adapter_type: str = Field(

View file

@ -5,7 +5,6 @@
# the root directory of this source tree. # the root directory of this source tree.
import logging import logging
import uuid
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
@ -72,38 +71,29 @@ class FaissMemoryImpl(Memory, RoutableProvider):
async def shutdown(self) -> None: ... async def shutdown(self) -> None: ...
async def validate_routing_keys(self, routing_keys: List[str]) -> None: async def register_memory_bank(
print(f"[faiss] Registering memory bank routing keys: {routing_keys}")
pass
async def create_memory_bank(
self, self,
name: str, memory_bank: MemoryBankDef,
config: MemoryBankConfig, ) -> None:
url: Optional[URL] = None,
) -> MemoryBank:
assert url is None, "URL is not supported for this implementation"
assert ( assert (
config.type == MemoryBankType.vector.value memory_bank.type == MemoryBankType.vector.value
), f"Only vector banks are supported {config.type}" ), f"Only vector banks are supported {memory_bank.type}"
bank_id = str(uuid.uuid4()) index = BankWithIndex(
bank = MemoryBank( bank=memory_bank, index=FaissIndex(ALL_MINILM_L6_V2_DIMENSION)
bank_id=bank_id,
name=name,
config=config,
url=url,
) )
index = BankWithIndex(bank=bank, index=FaissIndex(ALL_MINILM_L6_V2_DIMENSION)) self.cache[memory_bank.identifier] = index
self.cache[bank_id] = index
return bank return bank
async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: async def get_memory_bank(self, identifier: str) -> Optional[MemoryBankDef]:
index = self.cache.get(bank_id) index = self.cache.get(identifier)
if index is None: if index is None:
return None return None
return index.bank return index.bank
async def list_memory_banks(self) -> List[MemoryBankDef]:
return [x.bank for x in self.cache.values()]
async def insert_documents( async def insert_documents(
self, self,
bank_id: str, bank_id: str,

View file

@ -12,11 +12,9 @@ from llama_models.sku_list import CoreModelId, safety_models
from pydantic import BaseModel, field_validator from pydantic import BaseModel, field_validator
class MetaReferenceShieldType(Enum): class PromptGuardType(Enum):
llama_guard = "llama_guard" injection = "injection"
code_scanner_guard = "code_scanner_guard" jailbreak = "jailbreak"
injection_shield = "injection_shield"
jailbreak_shield = "jailbreak_shield"
class LlamaGuardShieldConfig(BaseModel): class LlamaGuardShieldConfig(BaseModel):

View file

@ -10,23 +10,36 @@ from llama_stack.distribution.utils.model_utils import model_local_dir
from llama_stack.apis.inference import * # noqa: F403 from llama_stack.apis.inference import * # noqa: F403
from llama_stack.apis.safety import * # noqa: F403 from llama_stack.apis.safety import * # noqa: F403
from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.distribution.datatypes import Api, RoutableProvider from llama_stack.distribution.datatypes import Api
from llama_stack.providers.impls.meta_reference.safety.shields.base import ( from llama_stack.providers.impls.meta_reference.safety.shields.base import (
OnViolationAction, OnViolationAction,
) )
from .config import MetaReferenceShieldType, SafetyConfig from .config import SafetyConfig
from .shields import CodeScannerShield, LlamaGuardShield, ShieldBase from .shields import (
CodeScannerShield,
InjectionShield,
JailbreakShield,
LlamaGuardShield,
ShieldBase,
)
PROMPT_GUARD_MODEL = "Prompt-Guard-86M" PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
class MetaReferenceSafetyImpl(Safety, RoutableProvider): class MetaReferenceSafetyImpl(Safety):
def __init__(self, config: SafetyConfig, deps) -> None: def __init__(self, config: SafetyConfig, deps) -> None:
self.config = config self.config = config
self.inference_api = deps[Api.inference] self.inference_api = deps[Api.inference]
self.registered_shields = []
self.available_shields = [ShieldType.code_scanner.value]
if config.llama_guard_shield:
self.available_shields.append(ShieldType.llama_guard.value)
if config.enable_prompt_guard:
self.available_shields.append(ShieldType.prompt_guard.value)
async def initialize(self) -> None: async def initialize(self) -> None:
if self.config.enable_prompt_guard: if self.config.enable_prompt_guard:
@ -38,11 +51,20 @@ class MetaReferenceSafetyImpl(Safety, RoutableProvider):
async def shutdown(self) -> None: async def shutdown(self) -> None:
pass pass
async def validate_routing_keys(self, routing_keys: List[str]) -> None: async def register_shield(self, shield: ShieldDef) -> None:
available_shields = [v.value for v in MetaReferenceShieldType] if shield.type not in self.available_shields:
for key in routing_keys: raise ValueError(f"Unsupported safety shield type: {shield.type}")
if key not in available_shields:
raise ValueError(f"Unknown safety shield type: {key}") self.registered_shields.append(shield)
async def list_shields(self) -> List[ShieldDef]:
return self.registered_shields
async def get_shield(self, identifier: str) -> Optional[ShieldDef]:
for shield in self.registered_shields:
if shield.identifier == identifier:
return shield
return None
async def run_shield( async def run_shield(
self, self,
@ -50,10 +72,11 @@ class MetaReferenceSafetyImpl(Safety, RoutableProvider):
messages: List[Message], messages: List[Message],
params: Dict[str, Any] = None, params: Dict[str, Any] = None,
) -> RunShieldResponse: ) -> RunShieldResponse:
available_shields = [v.value for v in MetaReferenceShieldType] shield_def = await self.get_shield(shield_type)
assert shield_type in available_shields, f"Unknown shield {shield_type}" if not shield_def:
raise ValueError(f"Unknown shield {shield_type}")
shield = self.get_shield_impl(MetaReferenceShieldType(shield_type)) shield = self.get_shield_impl(shield_def)
messages = messages.copy() messages = messages.copy()
# some shields like llama-guard require the first message to be a user message # some shields like llama-guard require the first message to be a user message
@ -79,30 +102,24 @@ class MetaReferenceSafetyImpl(Safety, RoutableProvider):
return RunShieldResponse(violation=violation) return RunShieldResponse(violation=violation)
def get_shield_impl(self, typ: MetaReferenceShieldType) -> ShieldBase: def get_shield_impl(self, shield: ShieldDef) -> ShieldBase:
cfg = self.config if shield.type == ShieldType.llama_guard.value:
if typ == MetaReferenceShieldType.llama_guard: cfg = self.config.llama_guard_shield
cfg = cfg.llama_guard_shield
assert (
cfg is not None
), "Cannot use LlamaGuardShield since not present in config"
return LlamaGuardShield( return LlamaGuardShield(
model=cfg.model, model=cfg.model,
inference_api=self.inference_api, inference_api=self.inference_api,
excluded_categories=cfg.excluded_categories, excluded_categories=cfg.excluded_categories,
) )
elif typ == MetaReferenceShieldType.jailbreak_shield: elif shield.type == ShieldType.prompt_guard.value:
from .shields import JailbreakShield
model_dir = model_local_dir(PROMPT_GUARD_MODEL) model_dir = model_local_dir(PROMPT_GUARD_MODEL)
return JailbreakShield.instance(model_dir) subtype = shield.params.get("prompt_guard_type", "injection")
elif typ == MetaReferenceShieldType.injection_shield: if subtype == "injection":
from .shields import InjectionShield return InjectionShield.instance(model_dir)
elif subtype == "jailbreak":
model_dir = model_local_dir(PROMPT_GUARD_MODEL) return JailbreakShield.instance(model_dir)
return InjectionShield.instance(model_dir) else:
elif typ == MetaReferenceShieldType.code_scanner_guard: raise ValueError(f"Unknown prompt guard type: {subtype}")
elif shield.type == ShieldType.code_scanner.value:
return CodeScannerShield.instance() return CodeScannerShield.instance()
else: else:
raise ValueError(f"Unknown shield type: {typ}") raise ValueError(f"Unknown shield type: {shield.type}")

View file

@ -146,7 +146,7 @@ class EmbeddingIndex(ABC):
@dataclass @dataclass
class BankWithIndex: class BankWithIndex:
bank: MemoryBank bank: MemoryBankDef
index: EmbeddingIndex index: EmbeddingIndex
async def insert_documents( async def insert_documents(