mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-05 04:17:32 +00:00
fix agentic calling inference
This commit is contained in:
parent
2501b3d7de
commit
f55ffa8b53
4 changed files with 8 additions and 22 deletions
|
@ -77,14 +77,6 @@ class MetaReferenceInferenceImpl(Inference):
|
|||
logprobs=logprobs,
|
||||
)
|
||||
|
||||
async for chunk in self.chat_completion_impl(request):
|
||||
yield chunk
|
||||
|
||||
async def chat_completion_impl(
|
||||
self, request: ChatCompletionRequest
|
||||
) -> AsyncIterator[
|
||||
Union[ChatCompletionResponseStreamChunk, ChatCompletionResponse]
|
||||
]:
|
||||
messages = prepare_messages(request)
|
||||
model = resolve_model(request.model)
|
||||
if model is None:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue