mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
fixes to streaming iterator
This commit is contained in:
parent
ffd6d35ba2
commit
e6ae53b5a9
2 changed files with 32 additions and 5 deletions
|
@ -54,6 +54,7 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
|
||||||
# Get the next chunk from the stream
|
# Get the next chunk from the stream
|
||||||
try:
|
try:
|
||||||
chunk = await self.litellm_custom_stream_wrapper.__anext__()
|
chunk = await self.litellm_custom_stream_wrapper.__anext__()
|
||||||
|
self.collected_chat_completion_chunks.append(chunk)
|
||||||
response_api_chunk = (
|
response_api_chunk = (
|
||||||
self._transform_chat_completion_chunk_to_response_api_chunk(
|
self._transform_chat_completion_chunk_to_response_api_chunk(
|
||||||
chunk
|
chunk
|
||||||
|
@ -61,7 +62,6 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
|
||||||
)
|
)
|
||||||
if response_api_chunk:
|
if response_api_chunk:
|
||||||
return response_api_chunk
|
return response_api_chunk
|
||||||
self.collected_chat_completion_chunks.append(chunk)
|
|
||||||
except StopAsyncIteration:
|
except StopAsyncIteration:
|
||||||
self.finished = True
|
self.finished = True
|
||||||
response_completed_event = self._emit_response_completed_event()
|
response_completed_event = self._emit_response_completed_event()
|
||||||
|
@ -84,10 +84,11 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
if self.finished is True:
|
if self.finished is True:
|
||||||
raise StopAsyncIteration
|
raise StopIteration
|
||||||
# Get the next chunk from the stream
|
# Get the next chunk from the stream
|
||||||
try:
|
try:
|
||||||
chunk = self.litellm_custom_stream_wrapper.__next__()
|
chunk = self.litellm_custom_stream_wrapper.__next__()
|
||||||
|
self.collected_chat_completion_chunks.append(chunk)
|
||||||
response_api_chunk = (
|
response_api_chunk = (
|
||||||
self._transform_chat_completion_chunk_to_response_api_chunk(
|
self._transform_chat_completion_chunk_to_response_api_chunk(
|
||||||
chunk
|
chunk
|
||||||
|
@ -95,14 +96,13 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
|
||||||
)
|
)
|
||||||
if response_api_chunk:
|
if response_api_chunk:
|
||||||
return response_api_chunk
|
return response_api_chunk
|
||||||
self.collected_chat_completion_chunks.append(chunk)
|
except StopIteration:
|
||||||
except StopAsyncIteration:
|
|
||||||
self.finished = True
|
self.finished = True
|
||||||
response_completed_event = self._emit_response_completed_event()
|
response_completed_event = self._emit_response_completed_event()
|
||||||
if response_completed_event:
|
if response_completed_event:
|
||||||
return response_completed_event
|
return response_completed_event
|
||||||
else:
|
else:
|
||||||
raise StopAsyncIteration
|
raise StopIteration
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Handle HTTP errors
|
# Handle HTTP errors
|
||||||
|
|
|
@ -138,6 +138,8 @@ class BaseResponsesAPITest(ABC):
|
||||||
async def test_basic_openai_responses_api_streaming(self, sync_mode):
|
async def test_basic_openai_responses_api_streaming(self, sync_mode):
|
||||||
litellm._turn_on_debug()
|
litellm._turn_on_debug()
|
||||||
base_completion_call_args = self.get_base_completion_call_args()
|
base_completion_call_args = self.get_base_completion_call_args()
|
||||||
|
collected_content_string = ""
|
||||||
|
response_completed_event = None
|
||||||
if sync_mode:
|
if sync_mode:
|
||||||
response = litellm.responses(
|
response = litellm.responses(
|
||||||
input="Basic ping",
|
input="Basic ping",
|
||||||
|
@ -146,6 +148,10 @@ class BaseResponsesAPITest(ABC):
|
||||||
)
|
)
|
||||||
for event in response:
|
for event in response:
|
||||||
print("litellm response=", json.dumps(event, indent=4, default=str))
|
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||||
|
if event.type == "response.output_text.delta":
|
||||||
|
collected_content_string += event.delta
|
||||||
|
elif event.type == "response.completed":
|
||||||
|
response_completed_event = event
|
||||||
else:
|
else:
|
||||||
response = await litellm.aresponses(
|
response = await litellm.aresponses(
|
||||||
input="Basic ping",
|
input="Basic ping",
|
||||||
|
@ -154,5 +160,26 @@ class BaseResponsesAPITest(ABC):
|
||||||
)
|
)
|
||||||
async for event in response:
|
async for event in response:
|
||||||
print("litellm response=", json.dumps(event, indent=4, default=str))
|
print("litellm response=", json.dumps(event, indent=4, default=str))
|
||||||
|
if event.type == "response.output_text.delta":
|
||||||
|
collected_content_string += event.delta
|
||||||
|
elif event.type == "response.completed":
|
||||||
|
response_completed_event = event
|
||||||
|
|
||||||
|
# assert the delta chunks content had len(collected_content_string) > 0
|
||||||
|
# this content is typically rendered on chat ui's
|
||||||
|
assert len(collected_content_string) > 0
|
||||||
|
|
||||||
|
# assert the response completed event is not None
|
||||||
|
assert response_completed_event is not None
|
||||||
|
|
||||||
|
# assert the response completed event has a response
|
||||||
|
assert response_completed_event.response is not None
|
||||||
|
|
||||||
|
# assert the response completed event includes the usage
|
||||||
|
assert response_completed_event.response.usage is not None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue