mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
use templates for generating system prompts
This commit is contained in:
parent
68855ed218
commit
ab8193c88c
8 changed files with 410 additions and 218 deletions
|
@ -32,7 +32,7 @@ from llama_toolchain.inference.api import (
|
|||
ToolCallDelta,
|
||||
ToolCallParseStatus,
|
||||
)
|
||||
from llama_toolchain.inference.prepare_messages import prepare_messages_for_tools
|
||||
from llama_toolchain.inference.prepare_messages import prepare_messages
|
||||
from .config import OllamaImplConfig
|
||||
|
||||
# TODO: Eventually this will move to the llama cli model list command
|
||||
|
@ -111,7 +111,7 @@ class OllamaInference(Inference):
|
|||
return options
|
||||
|
||||
async def chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
|
||||
request = prepare_messages_for_tools(request)
|
||||
messages = prepare_messages(request)
|
||||
# accumulate sampling params and other options to pass to ollama
|
||||
options = self.get_ollama_chat_options(request)
|
||||
ollama_model = self.resolve_ollama_model(request.model)
|
||||
|
@ -133,7 +133,7 @@ class OllamaInference(Inference):
|
|||
if not request.stream:
|
||||
r = await self.client.chat(
|
||||
model=ollama_model,
|
||||
messages=self._messages_to_ollama_messages(request.messages),
|
||||
messages=self._messages_to_ollama_messages(messages),
|
||||
stream=False,
|
||||
options=options,
|
||||
)
|
||||
|
@ -161,7 +161,7 @@ class OllamaInference(Inference):
|
|||
)
|
||||
stream = await self.client.chat(
|
||||
model=ollama_model,
|
||||
messages=self._messages_to_ollama_messages(request.messages),
|
||||
messages=self._messages_to_ollama_messages(messages),
|
||||
stream=True,
|
||||
options=options,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue