fix: [Litellm]Do not swallow first token (#1316)

`ChatCompletionResponseEventType: start` is ignored and not yielded in
the agent_instance as we expect that to not have any content.

However, litellm sends first event as `ChatCompletionResponseEventType:
start` with content ( which was the first token that we were skipping )

```
LLAMA_STACK_CONFIG=dev pytest -s -v tests/client-sdk/agents/test_agents.py --inference-model "openai/gpt-4o-mini" -k test_agent_simple
``` 
This was failing before ( since the word hello was not in the final
response )
This commit is contained in:
Hardik Shah 2025-02-27 20:53:47 -08:00 committed by GitHub
parent 7780fc92d5
commit 999195fe5b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -6,7 +6,7 @@
import json import json
import logging import logging
import warnings import warnings
from typing import AsyncGenerator, Dict, Generator, Iterable, List, Optional, Union from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union
from openai import AsyncStream from openai import AsyncStream
from openai.types.chat import ( from openai.types.chat import (
@ -841,14 +841,13 @@ async def convert_openai_chat_completion_stream(
Convert a stream of OpenAI chat completion chunks into a stream Convert a stream of OpenAI chat completion chunks into a stream
of ChatCompletionResponseStreamChunk. of ChatCompletionResponseStreamChunk.
""" """
yield ChatCompletionResponseStreamChunk(
# generate a stream of ChatCompletionResponseEventType: start -> progress -> progress -> ... event=ChatCompletionResponseEvent(
def _event_type_generator() -> Generator[ChatCompletionResponseEventType, None, None]: event_type=ChatCompletionResponseEventType.start,
yield ChatCompletionResponseEventType.start delta=TextDelta(text=""),
while True: )
yield ChatCompletionResponseEventType.progress )
event_type = ChatCompletionResponseEventType.progress
event_type = _event_type_generator()
stop_reason = None stop_reason = None
toolcall_buffer = {} toolcall_buffer = {}
@ -868,7 +867,7 @@ async def convert_openai_chat_completion_stream(
if choice.delta.content: if choice.delta.content:
yield ChatCompletionResponseStreamChunk( yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=next(event_type), event_type=event_type,
delta=TextDelta(text=choice.delta.content), delta=TextDelta(text=choice.delta.content),
logprobs=_convert_openai_logprobs(logprobs), logprobs=_convert_openai_logprobs(logprobs),
) )
@ -909,7 +908,7 @@ async def convert_openai_chat_completion_stream(
toolcall_buffer["content"] += delta toolcall_buffer["content"] += delta
yield ChatCompletionResponseStreamChunk( yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=next(event_type), event_type=event_type,
delta=ToolCallDelta( delta=ToolCallDelta(
tool_call=delta, tool_call=delta,
parse_status=ToolCallParseStatus.in_progress, parse_status=ToolCallParseStatus.in_progress,
@ -920,7 +919,7 @@ async def convert_openai_chat_completion_stream(
else: else:
yield ChatCompletionResponseStreamChunk( yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=next(event_type), event_type=event_type,
delta=TextDelta(text=choice.delta.content or ""), delta=TextDelta(text=choice.delta.content or ""),
logprobs=_convert_openai_logprobs(logprobs), logprobs=_convert_openai_logprobs(logprobs),
) )
@ -931,7 +930,7 @@ async def convert_openai_chat_completion_stream(
toolcall_buffer["content"] += delta toolcall_buffer["content"] += delta
yield ChatCompletionResponseStreamChunk( yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=next(event_type), event_type=event_type,
delta=ToolCallDelta( delta=ToolCallDelta(
tool_call=delta, tool_call=delta,
parse_status=ToolCallParseStatus.in_progress, parse_status=ToolCallParseStatus.in_progress,