forked from phoenix/litellm-mirror
use PassThroughStreamingHandler
This commit is contained in:
parent
4273837add
commit
04c9284da4
1 changed files with 87 additions and 77 deletions
|
@ -27,6 +27,9 @@ from .success_handler import PassThroughEndpointLogging
|
||||||
from .types import EndpointType
|
from .types import EndpointType
|
||||||
|
|
||||||
|
|
||||||
|
class PassThroughStreamingHandler:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
async def chunk_processor(
|
async def chunk_processor(
|
||||||
response: httpx.Response,
|
response: httpx.Response,
|
||||||
request_body: Optional[dict],
|
request_body: Optional[dict],
|
||||||
|
@ -41,44 +44,29 @@ async def chunk_processor(
|
||||||
- Collect non-empty chunks for post-processing (logging)
|
- Collect non-empty chunks for post-processing (logging)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if endpoint_type == EndpointType.VERTEX_AI:
|
raw_bytes: List[bytes] = []
|
||||||
async for chunk in response.aiter_bytes():
|
async for chunk in response.aiter_bytes():
|
||||||
|
raw_bytes.append(chunk)
|
||||||
yield chunk
|
yield chunk
|
||||||
else:
|
|
||||||
collected_chunks: List[str] = [] # List to store all chunks
|
|
||||||
async for chunk in response.aiter_lines():
|
|
||||||
verbose_proxy_logger.debug(f"Processing chunk: {chunk}")
|
|
||||||
if not chunk:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Handle SSE format - pass through the raw SSE format
|
|
||||||
if isinstance(chunk, bytes):
|
|
||||||
chunk = chunk.decode("utf-8")
|
|
||||||
|
|
||||||
# Store the chunk for post-processing
|
|
||||||
if chunk.strip(): # Only store non-empty chunks
|
|
||||||
collected_chunks.append(chunk)
|
|
||||||
yield f"{chunk}\n"
|
|
||||||
|
|
||||||
# After all chunks are processed, handle post-processing
|
# After all chunks are processed, handle post-processing
|
||||||
end_time = datetime.now()
|
end_time = datetime.now()
|
||||||
|
|
||||||
await _route_streaming_logging_to_handler(
|
await PassThroughStreamingHandler._route_streaming_logging_to_handler(
|
||||||
litellm_logging_obj=litellm_logging_obj,
|
litellm_logging_obj=litellm_logging_obj,
|
||||||
passthrough_success_handler_obj=passthrough_success_handler_obj,
|
passthrough_success_handler_obj=passthrough_success_handler_obj,
|
||||||
url_route=url_route,
|
url_route=url_route,
|
||||||
request_body=request_body or {},
|
request_body=request_body or {},
|
||||||
endpoint_type=endpoint_type,
|
endpoint_type=endpoint_type,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
all_chunks=collected_chunks,
|
raw_bytes=raw_bytes,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(f"Error in chunk_processor: {str(e)}")
|
verbose_proxy_logger.error(f"Error in chunk_processor: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
async def _route_streaming_logging_to_handler(
|
async def _route_streaming_logging_to_handler(
|
||||||
litellm_logging_obj: LiteLLMLoggingObj,
|
litellm_logging_obj: LiteLLMLoggingObj,
|
||||||
passthrough_success_handler_obj: PassThroughEndpointLogging,
|
passthrough_success_handler_obj: PassThroughEndpointLogging,
|
||||||
|
@ -86,7 +74,7 @@ async def _route_streaming_logging_to_handler(
|
||||||
request_body: dict,
|
request_body: dict,
|
||||||
endpoint_type: EndpointType,
|
endpoint_type: EndpointType,
|
||||||
start_time: datetime,
|
start_time: datetime,
|
||||||
all_chunks: List[str],
|
raw_bytes: List[bytes],
|
||||||
end_time: datetime,
|
end_time: datetime,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -96,6 +84,9 @@ async def _route_streaming_logging_to_handler(
|
||||||
- Anthropic
|
- Anthropic
|
||||||
- Vertex AI
|
- Vertex AI
|
||||||
"""
|
"""
|
||||||
|
all_chunks = PassThroughStreamingHandler._convert_raw_bytes_to_str_lines(
|
||||||
|
raw_bytes
|
||||||
|
)
|
||||||
if endpoint_type == EndpointType.ANTHROPIC:
|
if endpoint_type == EndpointType.ANTHROPIC:
|
||||||
await AnthropicPassthroughLoggingHandler._handle_logging_anthropic_collected_chunks(
|
await AnthropicPassthroughLoggingHandler._handle_logging_anthropic_collected_chunks(
|
||||||
litellm_logging_obj=litellm_logging_obj,
|
litellm_logging_obj=litellm_logging_obj,
|
||||||
|
@ -121,3 +112,22 @@ async def _route_streaming_logging_to_handler(
|
||||||
elif endpoint_type == EndpointType.GENERIC:
|
elif endpoint_type == EndpointType.GENERIC:
|
||||||
# No logging is supported for generic streaming endpoints
|
# No logging is supported for generic streaming endpoints
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _convert_raw_bytes_to_str_lines(raw_bytes: List[bytes]) -> List[str]:
|
||||||
|
"""
|
||||||
|
Converts a list of raw bytes into a list of string lines, similar to aiter_lines()
|
||||||
|
|
||||||
|
Args:
|
||||||
|
raw_bytes: List of bytes chunks from aiter.bytes()
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of string lines, with each line being a complete data: {} chunk
|
||||||
|
"""
|
||||||
|
# Combine all bytes and decode to string
|
||||||
|
combined_str = b"".join(raw_bytes).decode("utf-8")
|
||||||
|
|
||||||
|
# Split by newlines and filter out empty lines
|
||||||
|
lines = [line.strip() for line in combined_str.split("\n") if line.strip()]
|
||||||
|
|
||||||
|
return lines
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue