From 2edfda97e9659155074269fc3b7e66d9bb2c57d4 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Mon, 18 Nov 2024 10:22:50 -0800 Subject: [PATCH] add quantized model ollama support --- .../remote/inference/ollama/ollama.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 3b3f3868b..a18c40b20 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -12,12 +12,12 @@ from llama_models.datatypes import CoreModelId from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.datatypes import Message from llama_models.llama3.api.tokenizer import Tokenizer -from ollama import AsyncClient from llama_stack.providers.utils.inference.model_registry import ( build_model_alias, ModelRegistryHelper, ) +from ollama import AsyncClient from llama_stack.apis.inference import * # noqa: F403 from llama_stack.providers.datatypes import ModelsProtocolPrivate @@ -44,10 +44,18 @@ model_aliases = [ "llama3.1:8b-instruct-fp16", CoreModelId.llama3_1_8b_instruct.value, ), + build_model_alias( + "llama3.1:8b", + CoreModelId.llama3_1_8b_instruct.value, + ), build_model_alias( "llama3.1:70b-instruct-fp16", CoreModelId.llama3_1_70b_instruct.value, ), + build_model_alias( + "llama3.1:70b", + CoreModelId.llama3_1_70b_instruct.value, + ), build_model_alias( "llama3.2:1b-instruct-fp16", CoreModelId.llama3_2_1b_instruct.value, @@ -56,6 +64,14 @@ model_aliases = [ "llama3.2:3b-instruct-fp16", CoreModelId.llama3_2_3b_instruct.value, ), + build_model_alias( + "llama3.2:1b", + CoreModelId.llama3_2_1b_instruct.value, + ), + build_model_alias( + "llama3.2:3b", + CoreModelId.llama3_2_3b_instruct.value, + ), build_model_alias( "llama-guard3:8b", CoreModelId.llama_guard_3_8b.value, @@ -68,6 +84,10 @@ model_aliases = [ "x/llama3.2-vision:11b-instruct-fp16", CoreModelId.llama3_2_11b_vision_instruct.value, ), + build_model_alias( + "llama3.2-vision", + CoreModelId.llama3_2_11b_vision_instruct.value, + ), ]