From 2d0163b47b5d27918ff2018951b764b517e3299d Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 11 Sep 2024 14:51:06 -0700 Subject: [PATCH] fix inference --- llama_toolchain/inference/meta_reference/inference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama_toolchain/inference/meta_reference/inference.py b/llama_toolchain/inference/meta_reference/inference.py index b54e2f3f4..9dca627ce 100644 --- a/llama_toolchain/inference/meta_reference/inference.py +++ b/llama_toolchain/inference/meta_reference/inference.py @@ -77,7 +77,8 @@ class MetaReferenceInferenceImpl(Inference): logprobs=logprobs, ) - return self._chat_completion(request) + async for chunk in self.chat_completion_impl(request): + yield chunk async def chat_completion_impl( self, request: ChatCompletionRequest