mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-12 04:50:39 +00:00
do not swallow first token
This commit is contained in:
parent
a9f5c5bfca
commit
f55d812d8e
1 changed files with 10 additions and 11 deletions
|
@ -6,7 +6,7 @@
|
|||
import json
|
||||
import logging
|
||||
import warnings
|
||||
from typing import AsyncGenerator, Dict, Generator, Iterable, List, Optional, Union
|
||||
from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union
|
||||
|
||||
from openai import AsyncStream
|
||||
from openai.types.chat import (
|
||||
|
@ -843,12 +843,11 @@ async def convert_openai_chat_completion_stream(
|
|||
"""
|
||||
|
||||
# generate a stream of ChatCompletionResponseEventType: start -> progress -> progress -> ...
|
||||
def _event_type_generator() -> Generator[ChatCompletionResponseEventType, None, None]:
|
||||
yield ChatCompletionResponseEventType.start
|
||||
while True:
|
||||
yield ChatCompletionResponseEventType.progress
|
||||
|
||||
event_type = _event_type_generator()
|
||||
# def _event_type_generator() -> Generator[ChatCompletionResponseEventType, None, None]:
|
||||
# yield ChatCompletionResponseEventType.start
|
||||
# while True:
|
||||
# yield ChatCompletionResponseEventType.progress
|
||||
event_type = ChatCompletionResponseEventType.progress
|
||||
|
||||
stop_reason = None
|
||||
toolcall_buffer = {}
|
||||
|
@ -868,7 +867,7 @@ async def convert_openai_chat_completion_stream(
|
|||
if choice.delta.content:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=next(event_type),
|
||||
event_type=event_type,
|
||||
delta=TextDelta(text=choice.delta.content),
|
||||
logprobs=_convert_openai_logprobs(logprobs),
|
||||
)
|
||||
|
@ -909,7 +908,7 @@ async def convert_openai_chat_completion_stream(
|
|||
toolcall_buffer["content"] += delta
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=next(event_type),
|
||||
event_type=event_type,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=delta,
|
||||
parse_status=ToolCallParseStatus.in_progress,
|
||||
|
@ -920,7 +919,7 @@ async def convert_openai_chat_completion_stream(
|
|||
else:
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=next(event_type),
|
||||
event_type=event_type,
|
||||
delta=TextDelta(text=choice.delta.content or ""),
|
||||
logprobs=_convert_openai_logprobs(logprobs),
|
||||
)
|
||||
|
@ -931,7 +930,7 @@ async def convert_openai_chat_completion_stream(
|
|||
toolcall_buffer["content"] += delta
|
||||
yield ChatCompletionResponseStreamChunk(
|
||||
event=ChatCompletionResponseEvent(
|
||||
event_type=next(event_type),
|
||||
event_type=event_type,
|
||||
delta=ToolCallDelta(
|
||||
tool_call=delta,
|
||||
parse_status=ToolCallParseStatus.in_progress,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue