forked from phoenix-oss/llama-stack-mirror
Added Ollama as an inference impl (#20)
* fix non-streaming api in inference server * unit test for inline inference * Added non-streaming ollama inference impl * add streaming support for ollama inference with tests * addressing comments --------- Co-authored-by: Hardik Shah <hjshah@fb.com>
This commit is contained in:
parent
c253c1c9ad
commit
156bfa0e15
9 changed files with 921 additions and 33 deletions
|
@ -6,7 +6,10 @@
|
|||
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_toolchain.inference.api import ChatCompletionResponseEventType
|
||||
from llama_toolchain.inference.api import (
|
||||
ChatCompletionResponseEventType,
|
||||
ChatCompletionResponseStreamChunk
|
||||
)
|
||||
|
||||
|
||||
class LogEvent:
|
||||
|
@ -25,12 +28,16 @@ class LogEvent:
|
|||
|
||||
|
||||
class EventLogger:
|
||||
async def log(self, event_generator, stream=True):
|
||||
async def log(self, event_generator):
|
||||
async for chunk in event_generator:
|
||||
event = chunk.event
|
||||
if event.event_type == ChatCompletionResponseEventType.start:
|
||||
if isinstance(chunk, ChatCompletionResponseStreamChunk):
|
||||
event = chunk.event
|
||||
if event.event_type == ChatCompletionResponseEventType.start:
|
||||
yield LogEvent("Assistant> ", color="cyan", end="")
|
||||
elif event.event_type == ChatCompletionResponseEventType.progress:
|
||||
yield LogEvent(event.delta, color="yellow", end="")
|
||||
elif event.event_type == ChatCompletionResponseEventType.complete:
|
||||
yield LogEvent("")
|
||||
else:
|
||||
yield LogEvent("Assistant> ", color="cyan", end="")
|
||||
elif event.event_type == ChatCompletionResponseEventType.progress:
|
||||
yield LogEvent(event.delta, color="yellow", end="")
|
||||
elif event.event_type == ChatCompletionResponseEventType.complete:
|
||||
yield LogEvent("")
|
||||
yield LogEvent(chunk.completion_message.content, color="yellow")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue