mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-09 11:20:58 +00:00
Fix
This commit is contained in:
parent
072d1b7205
commit
fedc11b726
2 changed files with 5 additions and 2 deletions
|
|
@ -95,7 +95,7 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate):
|
|||
def impl():
|
||||
stop_reason = None
|
||||
|
||||
for token_result in self.generator.chat_completion(request):
|
||||
for token_result in self.generator.completion(request):
|
||||
if token_result.text == "<|eot_id|>":
|
||||
stop_reason = StopReason.end_of_turn
|
||||
text = ""
|
||||
|
|
|
|||
|
|
@ -159,7 +159,10 @@ async def test_completion(inference_settings):
|
|||
)
|
||||
]
|
||||
|
||||
print(chunks)
|
||||
assert all(isinstance(chunk, CompletionResponseStreamChunk) for chunk in chunks)
|
||||
assert len(chunks) == 51
|
||||
last = chunks[-1]
|
||||
assert last.stop_reason == StopReason.out_of_tokens
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue