mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-16 23:03:49 +00:00
Merge branch 'main' into mmlu_benchmark
This commit is contained in:
commit
e690eb7ad3
85 changed files with 4761 additions and 358 deletions
5
llama_stack/providers/inline/agents/__init__.py
Normal file
5
llama_stack/providers/inline/agents/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
|
@ -4,9 +4,10 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.providers.utils.kvstore import KVStoreConfig
|
||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class MetaReferenceAgentsImplConfig(BaseModel):
|
||||
|
|
|
@ -11,9 +11,10 @@ from datetime import datetime
|
|||
|
||||
from typing import List, Optional
|
||||
from llama_stack.apis.agents import * # noqa: F403
|
||||
from llama_stack.providers.utils.kvstore import KVStore
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.providers.utils.kvstore import KVStore
|
||||
|
||||
|
||||
class AgentSessionInfo(BaseModel):
|
||||
session_id: str
|
||||
|
|
|
@ -10,13 +10,14 @@ from jinja2 import Template
|
|||
from llama_models.llama3.api import * # noqa: F403
|
||||
|
||||
|
||||
from termcolor import cprint # noqa: F401
|
||||
|
||||
from llama_stack.apis.agents import (
|
||||
DefaultMemoryQueryGeneratorConfig,
|
||||
LLMMemoryQueryGeneratorConfig,
|
||||
MemoryQueryGenerator,
|
||||
MemoryQueryGeneratorConfig,
|
||||
)
|
||||
from termcolor import cprint # noqa: F401
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ class ShieldRunnerMixin:
|
|||
responses = await asyncio.gather(
|
||||
*[
|
||||
self.safety_api.run_shield(
|
||||
identifier=identifier,
|
||||
shield_id=identifier,
|
||||
messages=messages,
|
||||
)
|
||||
for identifier in identifiers
|
||||
|
|
|
@ -80,7 +80,7 @@ class MockInferenceAPI:
|
|||
|
||||
class MockSafetyAPI:
|
||||
async def run_shield(
|
||||
self, shield_type: str, messages: List[Message]
|
||||
self, shield_id: str, messages: List[Message]
|
||||
) -> RunShieldResponse:
|
||||
return RunShieldResponse(violation=None)
|
||||
|
||||
|
|
5
llama_stack/providers/inline/inference/__init__.py
Normal file
5
llama_stack/providers/inline/inference/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
|
@ -10,9 +10,10 @@ from llama_models.datatypes import * # noqa: F403
|
|||
from llama_models.sku_list import resolve_model
|
||||
|
||||
from llama_stack.apis.inference import * # noqa: F401, F403
|
||||
from llama_stack.providers.utils.inference import supported_inference_models
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from llama_stack.providers.utils.inference import supported_inference_models
|
||||
|
||||
|
||||
class MetaReferenceInferenceConfig(BaseModel):
|
||||
model: str = Field(
|
||||
|
|
|
@ -35,12 +35,13 @@ from termcolor import cprint
|
|||
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
|
||||
from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
|
||||
|
||||
from llama_stack.distribution.utils.model_utils import model_local_dir
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
augment_content_with_response_format_prompt,
|
||||
chat_completion_request_to_messages,
|
||||
)
|
||||
from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
|
||||
|
||||
from .config import (
|
||||
Fp8QuantizationConfig,
|
||||
|
|
|
@ -12,7 +12,7 @@ from llama_models.sku_list import resolve_model
|
|||
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_stack.providers.datatypes import ModelDef, ModelsProtocolPrivate
|
||||
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
|
||||
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
convert_image_media_to_url,
|
||||
|
@ -45,16 +45,11 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate):
|
|||
else:
|
||||
self.generator = Llama.build(self.config)
|
||||
|
||||
async def register_model(self, model: ModelDef) -> None:
|
||||
raise ValueError("Dynamic model registration is not supported")
|
||||
|
||||
async def list_models(self) -> List[ModelDef]:
|
||||
return [
|
||||
ModelDef(
|
||||
identifier=self.model.descriptor(),
|
||||
llama_model=self.model.descriptor(),
|
||||
async def register_model(self, model: Model) -> None:
|
||||
if model.identifier != self.model.descriptor():
|
||||
raise ValueError(
|
||||
f"Model mismatch: {model.identifier} != {self.model.descriptor()}"
|
||||
)
|
||||
]
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
if self.config.create_distributed_process_group:
|
||||
|
|
|
@ -28,13 +28,13 @@ from fairscale.nn.model_parallel.initialize import (
|
|||
get_model_parallel_src_rank,
|
||||
)
|
||||
|
||||
from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from torch.distributed.launcher.api import elastic_launch, LaunchConfig
|
||||
from typing_extensions import Annotated
|
||||
|
||||
from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest
|
||||
|
||||
from .generation import TokenResult
|
||||
|
||||
|
||||
|
|
|
@ -21,13 +21,13 @@ from llama_models.llama3.api.args import ModelArgs
|
|||
from llama_models.llama3.reference_impl.model import Transformer, TransformerBlock
|
||||
from llama_models.sku_list import resolve_model
|
||||
|
||||
from llama_stack.apis.inference import QuantizationType
|
||||
|
||||
from termcolor import cprint
|
||||
from torch import nn, Tensor
|
||||
|
||||
from torchao.quantization.GPTQ import Int8DynActInt4WeightLinear
|
||||
|
||||
from llama_stack.apis.inference import QuantizationType
|
||||
|
||||
from ..config import MetaReferenceQuantizedInferenceConfig
|
||||
|
||||
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from llama_models.schema_utils import json_schema_type
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from llama_stack.providers.utils.inference import supported_inference_models
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
|
@ -20,7 +20,7 @@ from vllm.sampling_params import SamplingParams as VLLMSamplingParams
|
|||
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
|
||||
from llama_stack.providers.datatypes import ModelDef, ModelsProtocolPrivate
|
||||
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
OpenAICompatCompletionChoice,
|
||||
OpenAICompatCompletionResponse,
|
||||
|
@ -83,19 +83,11 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
|
|||
if self.engine:
|
||||
self.engine.shutdown_background_loop()
|
||||
|
||||
async def register_model(self, model: ModelDef) -> None:
|
||||
async def register_model(self, model: Model) -> None:
|
||||
raise ValueError(
|
||||
"You cannot dynamically add a model to a running vllm instance"
|
||||
)
|
||||
|
||||
async def list_models(self) -> List[ModelDef]:
|
||||
return [
|
||||
ModelDef(
|
||||
identifier=self.config.model,
|
||||
llama_model=self.config.model,
|
||||
)
|
||||
]
|
||||
|
||||
def _sampling_params(self, sampling_params: SamplingParams) -> VLLMSamplingParams:
|
||||
if sampling_params is None:
|
||||
return VLLMSamplingParams(max_tokens=self.config.max_tokens)
|
||||
|
|
5
llama_stack/providers/inline/memory/__init__.py
Normal file
5
llama_stack/providers/inline/memory/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
|
@ -5,13 +5,13 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from llama_models.schema_utils import json_schema_type
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
|
||||
from llama_stack.providers.utils.kvstore.config import (
|
||||
KVStoreConfig,
|
||||
SqliteKVStoreConfig,
|
||||
)
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
|
@ -8,11 +8,11 @@ import logging
|
|||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import faiss
|
||||
|
||||
import numpy as np
|
||||
from numpy.typing import NDArray
|
||||
|
||||
import faiss
|
||||
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
|
||||
from llama_stack.apis.memory import * # noqa: F403
|
||||
|
|
|
@ -24,19 +24,19 @@ class MetaReferenceCodeScannerSafetyImpl(Safety):
|
|||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
async def register_shield(self, shield: ShieldDef) -> None:
|
||||
async def register_shield(self, shield: Shield) -> None:
|
||||
if shield.shield_type != ShieldType.code_scanner.value:
|
||||
raise ValueError(f"Unsupported safety shield type: {shield.shield_type}")
|
||||
|
||||
async def run_shield(
|
||||
self,
|
||||
shield_type: str,
|
||||
shield_id: str,
|
||||
messages: List[Message],
|
||||
params: Dict[str, Any] = None,
|
||||
) -> RunShieldResponse:
|
||||
shield_def = await self.shield_store.get_shield(shield_type)
|
||||
if not shield_def:
|
||||
raise ValueError(f"Unknown shield {shield_type}")
|
||||
shield = await self.shield_store.get_shield(shield_id)
|
||||
if not shield:
|
||||
raise ValueError(f"Shield {shield_id} not found")
|
||||
|
||||
from codeshield.cs import CodeShield
|
||||
|
||||
|
|
5
llama_stack/providers/inline/safety/__init__.py
Normal file
5
llama_stack/providers/inline/safety/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
|
@ -21,6 +21,7 @@ from .prompt_guard import InjectionShield, JailbreakShield, PromptGuardShield
|
|||
|
||||
|
||||
PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
|
||||
SUPPORTED_SHIELDS = [ShieldType.llama_guard, ShieldType.prompt_guard]
|
||||
|
||||
|
||||
class MetaReferenceSafetyImpl(Safety, ShieldsProtocolPrivate):
|
||||
|
@ -30,9 +31,9 @@ class MetaReferenceSafetyImpl(Safety, ShieldsProtocolPrivate):
|
|||
|
||||
self.available_shields = []
|
||||
if config.llama_guard_shield:
|
||||
self.available_shields.append(ShieldType.llama_guard.value)
|
||||
self.available_shields.append(ShieldType.llama_guard)
|
||||
if config.enable_prompt_guard:
|
||||
self.available_shields.append(ShieldType.prompt_guard.value)
|
||||
self.available_shields.append(ShieldType.prompt_guard)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
if self.config.enable_prompt_guard:
|
||||
|
@ -42,30 +43,21 @@ class MetaReferenceSafetyImpl(Safety, ShieldsProtocolPrivate):
|
|||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
async def register_shield(self, shield: ShieldDef) -> None:
|
||||
raise ValueError("Registering dynamic shields is not supported")
|
||||
|
||||
async def list_shields(self) -> List[ShieldDef]:
|
||||
return [
|
||||
ShieldDef(
|
||||
identifier=shield_type,
|
||||
shield_type=shield_type,
|
||||
params={},
|
||||
)
|
||||
for shield_type in self.available_shields
|
||||
]
|
||||
async def register_shield(self, shield: Shield) -> None:
|
||||
if shield.shield_type not in self.available_shields:
|
||||
raise ValueError(f"Shield type {shield.shield_type} not supported")
|
||||
|
||||
async def run_shield(
|
||||
self,
|
||||
identifier: str,
|
||||
shield_id: str,
|
||||
messages: List[Message],
|
||||
params: Dict[str, Any] = None,
|
||||
) -> RunShieldResponse:
|
||||
shield_def = await self.shield_store.get_shield(identifier)
|
||||
if not shield_def:
|
||||
raise ValueError(f"Unknown shield {identifier}")
|
||||
shield = await self.shield_store.get_shield(shield_id)
|
||||
if not shield:
|
||||
raise ValueError(f"Shield {shield_id} not found")
|
||||
|
||||
shield = self.get_shield_impl(shield_def)
|
||||
shield_impl = self.get_shield_impl(shield)
|
||||
|
||||
messages = messages.copy()
|
||||
# some shields like llama-guard require the first message to be a user message
|
||||
|
@ -74,13 +66,16 @@ class MetaReferenceSafetyImpl(Safety, ShieldsProtocolPrivate):
|
|||
messages[0] = UserMessage(content=messages[0].content)
|
||||
|
||||
# TODO: we can refactor ShieldBase, etc. to be inline with the API types
|
||||
res = await shield.run(messages)
|
||||
res = await shield_impl.run(messages)
|
||||
violation = None
|
||||
if res.is_violation and shield.on_violation_action != OnViolationAction.IGNORE:
|
||||
if (
|
||||
res.is_violation
|
||||
and shield_impl.on_violation_action != OnViolationAction.IGNORE
|
||||
):
|
||||
violation = SafetyViolation(
|
||||
violation_level=(
|
||||
ViolationLevel.ERROR
|
||||
if shield.on_violation_action == OnViolationAction.RAISE
|
||||
if shield_impl.on_violation_action == OnViolationAction.RAISE
|
||||
else ViolationLevel.WARN
|
||||
),
|
||||
user_message=res.violation_return_message,
|
||||
|
@ -91,15 +86,15 @@ class MetaReferenceSafetyImpl(Safety, ShieldsProtocolPrivate):
|
|||
|
||||
return RunShieldResponse(violation=violation)
|
||||
|
||||
def get_shield_impl(self, shield: ShieldDef) -> ShieldBase:
|
||||
if shield.shield_type == ShieldType.llama_guard.value:
|
||||
def get_shield_impl(self, shield: Shield) -> ShieldBase:
|
||||
if shield.shield_type == ShieldType.llama_guard:
|
||||
cfg = self.config.llama_guard_shield
|
||||
return LlamaGuardShield(
|
||||
model=cfg.model,
|
||||
inference_api=self.inference_api,
|
||||
excluded_categories=cfg.excluded_categories,
|
||||
)
|
||||
elif shield.shield_type == ShieldType.prompt_guard.value:
|
||||
elif shield.shield_type == ShieldType.prompt_guard:
|
||||
model_dir = model_local_dir(PROMPT_GUARD_MODEL)
|
||||
subtype = shield.params.get("prompt_guard_type", "injection")
|
||||
if subtype == "injection":
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue