forked from phoenix-oss/llama-stack-mirror
Use inference APIs for executing Llama Guard (#121)
We should use Inference APIs to execute Llama Guard instead of directly needing to use HuggingFace modeling related code. The actual inference consideration is handled by Inference.
This commit is contained in:
parent
6236634d84
commit
0a3999a9a4
9 changed files with 167 additions and 204 deletions
|
@ -3,3 +3,31 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import List
|
||||
|
||||
from llama_models.datatypes import * # noqa: F403
|
||||
from llama_models.sku_list import all_registered_models
|
||||
|
||||
|
||||
def is_supported_safety_model(model: Model) -> bool:
|
||||
if model.quantization_format != CheckpointQuantizationFormat.bf16:
|
||||
return False
|
||||
|
||||
model_id = model.core_model_id
|
||||
return model_id in [
|
||||
CoreModelId.llama_guard_3_8b,
|
||||
CoreModelId.llama_guard_3_1b,
|
||||
CoreModelId.llama_guard_3_11b_vision,
|
||||
]
|
||||
|
||||
|
||||
def supported_inference_models() -> List[str]:
|
||||
return [
|
||||
m.descriptor()
|
||||
for m in all_registered_models()
|
||||
if (
|
||||
m.model_family in {ModelFamily.llama3_1, ModelFamily.llama3_2}
|
||||
or is_supported_safety_model(m)
|
||||
)
|
||||
]
|
||||
|
|
|
@ -16,6 +16,8 @@ from llama_models.llama3.prompt_templates import (
|
|||
)
|
||||
from llama_models.sku_list import resolve_model
|
||||
|
||||
from llama_stack.providers.utils.inference import supported_inference_models
|
||||
|
||||
|
||||
def augment_messages_for_tools(request: ChatCompletionRequest) -> List[Message]:
|
||||
"""Reads chat completion request and augments the messages to handle tools.
|
||||
|
@ -27,8 +29,8 @@ def augment_messages_for_tools(request: ChatCompletionRequest) -> List[Message]:
|
|||
cprint(f"Could not resolve model {request.model}", color="red")
|
||||
return request.messages
|
||||
|
||||
if model.model_family not in [ModelFamily.llama3_1, ModelFamily.llama3_2]:
|
||||
cprint(f"Model family {model.model_family} not llama 3_1 or 3_2", color="red")
|
||||
if model.descriptor() not in supported_inference_models():
|
||||
cprint(f"Unsupported inference model? {model.descriptor()}", color="red")
|
||||
return request.messages
|
||||
|
||||
if model.model_family == ModelFamily.llama3_1 or (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue