litellm-mirror/litellm/responses/streaming_iterator.py
2025-03-11 22:26:44 -07:00

107 lines
3.8 KiB
Python

import asyncio
import json
from datetime import datetime
from typing import Any, AsyncIterator, Dict, Optional, Union
import httpx
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.types.llms.openai import (
ResponsesAPIResponse,
ResponsesAPIStreamingResponse,
)
from litellm.utils import CustomStreamWrapper
COMPLETED_OPENAI_CHUNK_TYPE = "response.completed"
class ResponsesAPIStreamingIterator:
"""
Async iterator for processing streaming responses from the Responses API.
This iterator handles the chunked streaming format returned by the Responses API
and yields properly formatted ResponsesAPIStreamingResponse objects.
"""
def __init__(
self,
response: httpx.Response,
model: str,
responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj,
):
self.response = response
self.model = model
self.logging_obj = logging_obj
self.stream_iterator = response.aiter_lines()
self.finished = False
self.responses_api_provider_config = responses_api_provider_config
self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
self.start_time = datetime.now()
def __aiter__(self):
return self
async def __anext__(self) -> ResponsesAPIStreamingResponse:
try:
# Get the next chunk from the stream
try:
chunk = await self.stream_iterator.__anext__()
except StopAsyncIteration:
self.finished = True
raise StopAsyncIteration
if not chunk:
return await self.__anext__()
# Handle SSE format (data: {...})
chunk = CustomStreamWrapper._strip_sse_data_from_chunk(chunk)
if chunk is None:
return await self.__anext__()
# Handle "[DONE]" marker
if chunk == "[DONE]":
self.finished = True
raise StopAsyncIteration
try:
# Parse the JSON chunk
parsed_chunk = json.loads(chunk)
# Format as ResponsesAPIStreamingResponse
if isinstance(parsed_chunk, dict):
openai_responses_api_chunk: ResponsesAPIStreamingResponse = (
self.responses_api_provider_config.transform_streaming_response(
model=self.model,
parsed_chunk=parsed_chunk,
logging_obj=self.logging_obj,
)
)
# Store the completed response
if (
openai_responses_api_chunk
and openai_responses_api_chunk.type
== COMPLETED_OPENAI_CHUNK_TYPE
):
self.completed_response = openai_responses_api_chunk
await self.logging_obj.async_success_handler(
result=self.completed_response,
start_time=self.start_time,
end_time=datetime.now(),
cache_hit=None,
)
return openai_responses_api_chunk
return ResponsesAPIStreamingResponse(
type="response", response=parsed_chunk
)
except json.JSONDecodeError:
# If we can't parse the chunk, continue to the next one
return await self.__anext__()
except httpx.HTTPError as e:
# Handle HTTP errors
self.finished = True
raise e