add streaming support for ollama inference with tests

This commit is contained in:
Hardik Shah 2024-07-31 19:33:36 -07:00
parent 0e75e73fa7
commit 0e985648f5
4 changed files with 491 additions and 61 deletions

View file

@ -103,13 +103,15 @@ class InferenceImpl(Inference):
)
else:
delta = text
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.progress,
delta=delta,
stop_reason=stop_reason,
if stop_reason is None:
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.progress,
delta=delta,
stop_reason=stop_reason,
)
)
)
if stop_reason is None:
stop_reason = StopReason.out_of_tokens