diff --git a/llama_toolchain/inference/meta_reference/inference.py b/llama_toolchain/inference/meta_reference/inference.py index b54e2f3f4..9dca627ce 100644 --- a/llama_toolchain/inference/meta_reference/inference.py +++ b/llama_toolchain/inference/meta_reference/inference.py @@ -77,7 +77,8 @@ class MetaReferenceInferenceImpl(Inference): logprobs=logprobs, ) - return self._chat_completion(request) + async for chunk in self.chat_completion_impl(request): + yield chunk async def chat_completion_impl( self, request: ChatCompletionRequest