Merge branch 'main' into mmlu_benchmark

This commit is contained in:
Xi Yan 2024-11-11 10:22:32 -05:00
commit e690eb7ad3
85 changed files with 4761 additions and 358 deletions

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -4,9 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore import KVStoreConfig
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
from pydantic import BaseModel, Field
class MetaReferenceAgentsImplConfig(BaseModel):

View file

@ -11,9 +11,10 @@ from datetime import datetime
from typing import List, Optional
from llama_stack.apis.agents import * # noqa: F403
from llama_stack.providers.utils.kvstore import KVStore
from pydantic import BaseModel
from llama_stack.providers.utils.kvstore import KVStore
class AgentSessionInfo(BaseModel):
session_id: str

View file

@ -10,13 +10,14 @@ from jinja2 import Template
from llama_models.llama3.api import * # noqa: F403
from termcolor import cprint # noqa: F401
from llama_stack.apis.agents import (
DefaultMemoryQueryGeneratorConfig,
LLMMemoryQueryGeneratorConfig,
MemoryQueryGenerator,
MemoryQueryGeneratorConfig,
)
from termcolor import cprint # noqa: F401
from llama_stack.apis.inference import * # noqa: F403

View file

@ -37,7 +37,7 @@ class ShieldRunnerMixin:
responses = await asyncio.gather(
*[
self.safety_api.run_shield(
identifier=identifier,
shield_id=identifier,
messages=messages,
)
for identifier in identifiers

View file

@ -80,7 +80,7 @@ class MockInferenceAPI:
class MockSafetyAPI:
async def run_shield(
self, shield_type: str, messages: List[Message]
self, shield_id: str, messages: List[Message]
) -> RunShieldResponse:
return RunShieldResponse(violation=None)

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -10,9 +10,10 @@ from llama_models.datatypes import * # noqa: F403
from llama_models.sku_list import resolve_model
from llama_stack.apis.inference import * # noqa: F401, F403
from llama_stack.providers.utils.inference import supported_inference_models
from pydantic import BaseModel, Field, field_validator
from llama_stack.providers.utils.inference import supported_inference_models
class MetaReferenceInferenceConfig(BaseModel):
model: str = Field(

View file

@ -35,12 +35,13 @@ from termcolor import cprint
from llama_stack.apis.inference import * # noqa: F403
from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
from llama_stack.distribution.utils.model_utils import model_local_dir
from llama_stack.providers.utils.inference.prompt_adapter import (
augment_content_with_response_format_prompt,
chat_completion_request_to_messages,
)
from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
from .config import (
Fp8QuantizationConfig,

View file

@ -12,7 +12,7 @@ from llama_models.sku_list import resolve_model
from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.inference import * # noqa: F403
from llama_stack.providers.datatypes import ModelDef, ModelsProtocolPrivate
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import (
convert_image_media_to_url,
@ -45,16 +45,11 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate):
else:
self.generator = Llama.build(self.config)
async def register_model(self, model: ModelDef) -> None:
raise ValueError("Dynamic model registration is not supported")
async def list_models(self) -> List[ModelDef]:
return [
ModelDef(
identifier=self.model.descriptor(),
llama_model=self.model.descriptor(),
async def register_model(self, model: Model) -> None:
if model.identifier != self.model.descriptor():
raise ValueError(
f"Model mismatch: {model.identifier} != {self.model.descriptor()}"
)
]
async def shutdown(self) -> None:
if self.config.create_distributed_process_group:

View file

@ -28,13 +28,13 @@ from fairscale.nn.model_parallel.initialize import (
get_model_parallel_src_rank,
)
from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest
from pydantic import BaseModel, Field
from torch.distributed.launcher.api import elastic_launch, LaunchConfig
from typing_extensions import Annotated
from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest
from .generation import TokenResult

View file

@ -21,13 +21,13 @@ from llama_models.llama3.api.args import ModelArgs
from llama_models.llama3.reference_impl.model import Transformer, TransformerBlock
from llama_models.sku_list import resolve_model
from llama_stack.apis.inference import QuantizationType
from termcolor import cprint
from torch import nn, Tensor
from torchao.quantization.GPTQ import Int8DynActInt4WeightLinear
from llama_stack.apis.inference import QuantizationType
from ..config import MetaReferenceQuantizedInferenceConfig

View file

@ -5,9 +5,9 @@
# the root directory of this source tree.
from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel, Field, field_validator
from llama_stack.providers.utils.inference import supported_inference_models
from pydantic import BaseModel, Field, field_validator
@json_schema_type

View file

@ -20,7 +20,7 @@ from vllm.sampling_params import SamplingParams as VLLMSamplingParams
from llama_stack.apis.inference import * # noqa: F403
from llama_stack.providers.datatypes import ModelDef, ModelsProtocolPrivate
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionChoice,
OpenAICompatCompletionResponse,
@ -83,19 +83,11 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
if self.engine:
self.engine.shutdown_background_loop()
async def register_model(self, model: ModelDef) -> None:
async def register_model(self, model: Model) -> None:
raise ValueError(
"You cannot dynamically add a model to a running vllm instance"
)
async def list_models(self) -> List[ModelDef]:
return [
ModelDef(
identifier=self.config.model,
llama_model=self.config.model,
)
]
def _sampling_params(self, sampling_params: SamplingParams) -> VLLMSamplingParams:
if sampling_params is None:
return VLLMSamplingParams(max_tokens=self.config.max_tokens)

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -5,13 +5,13 @@
# the root directory of this source tree.
from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel
from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
from pydantic import BaseModel
@json_schema_type

View file

@ -8,11 +8,11 @@ import logging
from typing import Any, Dict, List, Optional
import faiss
import numpy as np
from numpy.typing import NDArray
import faiss
from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.memory import * # noqa: F403

View file

@ -24,19 +24,19 @@ class MetaReferenceCodeScannerSafetyImpl(Safety):
async def shutdown(self) -> None:
pass
async def register_shield(self, shield: ShieldDef) -> None:
async def register_shield(self, shield: Shield) -> None:
if shield.shield_type != ShieldType.code_scanner.value:
raise ValueError(f"Unsupported safety shield type: {shield.shield_type}")
async def run_shield(
self,
shield_type: str,
shield_id: str,
messages: List[Message],
params: Dict[str, Any] = None,
) -> RunShieldResponse:
shield_def = await self.shield_store.get_shield(shield_type)
if not shield_def:
raise ValueError(f"Unknown shield {shield_type}")
shield = await self.shield_store.get_shield(shield_id)
if not shield:
raise ValueError(f"Shield {shield_id} not found")
from codeshield.cs import CodeShield

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -21,6 +21,7 @@ from .prompt_guard import InjectionShield, JailbreakShield, PromptGuardShield
PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
SUPPORTED_SHIELDS = [ShieldType.llama_guard, ShieldType.prompt_guard]
class MetaReferenceSafetyImpl(Safety, ShieldsProtocolPrivate):
@ -30,9 +31,9 @@ class MetaReferenceSafetyImpl(Safety, ShieldsProtocolPrivate):
self.available_shields = []
if config.llama_guard_shield:
self.available_shields.append(ShieldType.llama_guard.value)
self.available_shields.append(ShieldType.llama_guard)
if config.enable_prompt_guard:
self.available_shields.append(ShieldType.prompt_guard.value)
self.available_shields.append(ShieldType.prompt_guard)
async def initialize(self) -> None:
if self.config.enable_prompt_guard:
@ -42,30 +43,21 @@ class MetaReferenceSafetyImpl(Safety, ShieldsProtocolPrivate):
async def shutdown(self) -> None:
pass
async def register_shield(self, shield: ShieldDef) -> None:
raise ValueError("Registering dynamic shields is not supported")
async def list_shields(self) -> List[ShieldDef]:
return [
ShieldDef(
identifier=shield_type,
shield_type=shield_type,
params={},
)
for shield_type in self.available_shields
]
async def register_shield(self, shield: Shield) -> None:
if shield.shield_type not in self.available_shields:
raise ValueError(f"Shield type {shield.shield_type} not supported")
async def run_shield(
self,
identifier: str,
shield_id: str,
messages: List[Message],
params: Dict[str, Any] = None,
) -> RunShieldResponse:
shield_def = await self.shield_store.get_shield(identifier)
if not shield_def:
raise ValueError(f"Unknown shield {identifier}")
shield = await self.shield_store.get_shield(shield_id)
if not shield:
raise ValueError(f"Shield {shield_id} not found")
shield = self.get_shield_impl(shield_def)
shield_impl = self.get_shield_impl(shield)
messages = messages.copy()
# some shields like llama-guard require the first message to be a user message
@ -74,13 +66,16 @@ class MetaReferenceSafetyImpl(Safety, ShieldsProtocolPrivate):
messages[0] = UserMessage(content=messages[0].content)
# TODO: we can refactor ShieldBase, etc. to be inline with the API types
res = await shield.run(messages)
res = await shield_impl.run(messages)
violation = None
if res.is_violation and shield.on_violation_action != OnViolationAction.IGNORE:
if (
res.is_violation
and shield_impl.on_violation_action != OnViolationAction.IGNORE
):
violation = SafetyViolation(
violation_level=(
ViolationLevel.ERROR
if shield.on_violation_action == OnViolationAction.RAISE
if shield_impl.on_violation_action == OnViolationAction.RAISE
else ViolationLevel.WARN
),
user_message=res.violation_return_message,
@ -91,15 +86,15 @@ class MetaReferenceSafetyImpl(Safety, ShieldsProtocolPrivate):
return RunShieldResponse(violation=violation)
def get_shield_impl(self, shield: ShieldDef) -> ShieldBase:
if shield.shield_type == ShieldType.llama_guard.value:
def get_shield_impl(self, shield: Shield) -> ShieldBase:
if shield.shield_type == ShieldType.llama_guard:
cfg = self.config.llama_guard_shield
return LlamaGuardShield(
model=cfg.model,
inference_api=self.inference_api,
excluded_categories=cfg.excluded_categories,
)
elif shield.shield_type == ShieldType.prompt_guard.value:
elif shield.shield_type == ShieldType.prompt_guard:
model_dir = model_local_dir(PROMPT_GUARD_MODEL)
subtype = shield.params.get("prompt_guard_type", "injection")
if subtype == "injection":