forked from phoenix-oss/llama-stack-mirror
Use inference APIs for executing Llama Guard (#121)
We should use Inference APIs to execute Llama Guard instead of directly needing to use HuggingFace modeling related code. The actual inference consideration is handled by Inference.
This commit is contained in:
parent
6236634d84
commit
0a3999a9a4
9 changed files with 167 additions and 204 deletions
|
@ -16,6 +16,8 @@ from llama_models.llama3.prompt_templates import (
|
|||
)
|
||||
from llama_models.sku_list import resolve_model
|
||||
|
||||
from llama_stack.providers.utils.inference import supported_inference_models
|
||||
|
||||
|
||||
def augment_messages_for_tools(request: ChatCompletionRequest) -> List[Message]:
|
||||
"""Reads chat completion request and augments the messages to handle tools.
|
||||
|
@ -27,8 +29,8 @@ def augment_messages_for_tools(request: ChatCompletionRequest) -> List[Message]:
|
|||
cprint(f"Could not resolve model {request.model}", color="red")
|
||||
return request.messages
|
||||
|
||||
if model.model_family not in [ModelFamily.llama3_1, ModelFamily.llama3_2]:
|
||||
cprint(f"Model family {model.model_family} not llama 3_1 or 3_2", color="red")
|
||||
if model.descriptor() not in supported_inference_models():
|
||||
cprint(f"Unsupported inference model? {model.descriptor()}", color="red")
|
||||
return request.messages
|
||||
|
||||
if model.model_family == ModelFamily.llama3_1 or (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue