mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-11 11:50:41 +00:00
Fix
This commit is contained in:
parent
072d1b7205
commit
fedc11b726
2 changed files with 5 additions and 2 deletions
|
|
@ -95,7 +95,7 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate):
|
||||||
def impl():
|
def impl():
|
||||||
stop_reason = None
|
stop_reason = None
|
||||||
|
|
||||||
for token_result in self.generator.chat_completion(request):
|
for token_result in self.generator.completion(request):
|
||||||
if token_result.text == "<|eot_id|>":
|
if token_result.text == "<|eot_id|>":
|
||||||
stop_reason = StopReason.end_of_turn
|
stop_reason = StopReason.end_of_turn
|
||||||
text = ""
|
text = ""
|
||||||
|
|
|
||||||
|
|
@ -159,7 +159,10 @@ async def test_completion(inference_settings):
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
print(chunks)
|
assert all(isinstance(chunk, CompletionResponseStreamChunk) for chunk in chunks)
|
||||||
|
assert len(chunks) == 51
|
||||||
|
last = chunks[-1]
|
||||||
|
assert last.stop_reason == StopReason.out_of_tokens
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue