add quantized model ollama support

This commit is contained in:
Kai Wu 2024-11-18 10:22:50 -08:00
parent f1b9578f8d
commit 2edfda97e9

View file

@ -12,12 +12,12 @@ from llama_models.datatypes import CoreModelId
from llama_models.llama3.api.chat_format import ChatFormat
from llama_models.llama3.api.datatypes import Message
from llama_models.llama3.api.tokenizer import Tokenizer
from ollama import AsyncClient
from llama_stack.providers.utils.inference.model_registry import (
build_model_alias,
ModelRegistryHelper,
)
from ollama import AsyncClient
from llama_stack.apis.inference import * # noqa: F403
from llama_stack.providers.datatypes import ModelsProtocolPrivate
@ -44,10 +44,18 @@ model_aliases = [
"llama3.1:8b-instruct-fp16",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias(
"llama3.1:8b",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias(
"llama3.1:70b-instruct-fp16",
CoreModelId.llama3_1_70b_instruct.value,
),
build_model_alias(
"llama3.1:70b",
CoreModelId.llama3_1_70b_instruct.value,
),
build_model_alias(
"llama3.2:1b-instruct-fp16",
CoreModelId.llama3_2_1b_instruct.value,
@ -56,6 +64,14 @@ model_aliases = [
"llama3.2:3b-instruct-fp16",
CoreModelId.llama3_2_3b_instruct.value,
),
build_model_alias(
"llama3.2:1b",
CoreModelId.llama3_2_1b_instruct.value,
),
build_model_alias(
"llama3.2:3b",
CoreModelId.llama3_2_3b_instruct.value,
),
build_model_alias(
"llama-guard3:8b",
CoreModelId.llama_guard_3_8b.value,
@ -68,6 +84,10 @@ model_aliases = [
"x/llama3.2-vision:11b-instruct-fp16",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
build_model_alias(
"llama3.2-vision",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
]