forked from phoenix-oss/llama-stack-mirror
fix: [Litellm]Do not swallow first token (#1316)
`ChatCompletionResponseEventType: start` is ignored and not yielded in the agent_instance as we expect that to not have any content. However, litellm sends first event as `ChatCompletionResponseEventType: start` with content ( which was the first token that we were skipping ) ``` LLAMA_STACK_CONFIG=dev pytest -s -v tests/client-sdk/agents/test_agents.py --inference-model "openai/gpt-4o-mini" -k test_agent_simple ``` This was failing before ( since the word hello was not in the final response )
This commit is contained in:
parent
7780fc92d5
commit
999195fe5b
1 changed files with 12 additions and 13 deletions
|
@ -6,7 +6,7 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import warnings
|
import warnings
|
||||||
from typing import AsyncGenerator, Dict, Generator, Iterable, List, Optional, Union
|
from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union
|
||||||
|
|
||||||
from openai import AsyncStream
|
from openai import AsyncStream
|
||||||
from openai.types.chat import (
|
from openai.types.chat import (
|
||||||
|
@ -841,14 +841,13 @@ async def convert_openai_chat_completion_stream(
|
||||||
Convert a stream of OpenAI chat completion chunks into a stream
|
Convert a stream of OpenAI chat completion chunks into a stream
|
||||||
of ChatCompletionResponseStreamChunk.
|
of ChatCompletionResponseStreamChunk.
|
||||||
"""
|
"""
|
||||||
|
yield ChatCompletionResponseStreamChunk(
|
||||||
# generate a stream of ChatCompletionResponseEventType: start -> progress -> progress -> ...
|
event=ChatCompletionResponseEvent(
|
||||||
def _event_type_generator() -> Generator[ChatCompletionResponseEventType, None, None]:
|
event_type=ChatCompletionResponseEventType.start,
|
||||||
yield ChatCompletionResponseEventType.start
|
delta=TextDelta(text=""),
|
||||||
while True:
|
)
|
||||||
yield ChatCompletionResponseEventType.progress
|
)
|
||||||
|
event_type = ChatCompletionResponseEventType.progress
|
||||||
event_type = _event_type_generator()
|
|
||||||
|
|
||||||
stop_reason = None
|
stop_reason = None
|
||||||
toolcall_buffer = {}
|
toolcall_buffer = {}
|
||||||
|
@ -868,7 +867,7 @@ async def convert_openai_chat_completion_stream(
|
||||||
if choice.delta.content:
|
if choice.delta.content:
|
||||||
yield ChatCompletionResponseStreamChunk(
|
yield ChatCompletionResponseStreamChunk(
|
||||||
event=ChatCompletionResponseEvent(
|
event=ChatCompletionResponseEvent(
|
||||||
event_type=next(event_type),
|
event_type=event_type,
|
||||||
delta=TextDelta(text=choice.delta.content),
|
delta=TextDelta(text=choice.delta.content),
|
||||||
logprobs=_convert_openai_logprobs(logprobs),
|
logprobs=_convert_openai_logprobs(logprobs),
|
||||||
)
|
)
|
||||||
|
@ -909,7 +908,7 @@ async def convert_openai_chat_completion_stream(
|
||||||
toolcall_buffer["content"] += delta
|
toolcall_buffer["content"] += delta
|
||||||
yield ChatCompletionResponseStreamChunk(
|
yield ChatCompletionResponseStreamChunk(
|
||||||
event=ChatCompletionResponseEvent(
|
event=ChatCompletionResponseEvent(
|
||||||
event_type=next(event_type),
|
event_type=event_type,
|
||||||
delta=ToolCallDelta(
|
delta=ToolCallDelta(
|
||||||
tool_call=delta,
|
tool_call=delta,
|
||||||
parse_status=ToolCallParseStatus.in_progress,
|
parse_status=ToolCallParseStatus.in_progress,
|
||||||
|
@ -920,7 +919,7 @@ async def convert_openai_chat_completion_stream(
|
||||||
else:
|
else:
|
||||||
yield ChatCompletionResponseStreamChunk(
|
yield ChatCompletionResponseStreamChunk(
|
||||||
event=ChatCompletionResponseEvent(
|
event=ChatCompletionResponseEvent(
|
||||||
event_type=next(event_type),
|
event_type=event_type,
|
||||||
delta=TextDelta(text=choice.delta.content or ""),
|
delta=TextDelta(text=choice.delta.content or ""),
|
||||||
logprobs=_convert_openai_logprobs(logprobs),
|
logprobs=_convert_openai_logprobs(logprobs),
|
||||||
)
|
)
|
||||||
|
@ -931,7 +930,7 @@ async def convert_openai_chat_completion_stream(
|
||||||
toolcall_buffer["content"] += delta
|
toolcall_buffer["content"] += delta
|
||||||
yield ChatCompletionResponseStreamChunk(
|
yield ChatCompletionResponseStreamChunk(
|
||||||
event=ChatCompletionResponseEvent(
|
event=ChatCompletionResponseEvent(
|
||||||
event_type=next(event_type),
|
event_type=event_type,
|
||||||
delta=ToolCallDelta(
|
delta=ToolCallDelta(
|
||||||
tool_call=delta,
|
tool_call=delta,
|
||||||
parse_status=ToolCallParseStatus.in_progress,
|
parse_status=ToolCallParseStatus.in_progress,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue