mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
MockResponsesAPIStreamingIterator
This commit is contained in:
parent
55115bf520
commit
a29587e178
1 changed files with 48 additions and 3 deletions
|
@ -20,6 +20,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||||
)
|
)
|
||||||
from litellm.responses.streaming_iterator import (
|
from litellm.responses.streaming_iterator import (
|
||||||
BaseResponsesAPIStreamingIterator,
|
BaseResponsesAPIStreamingIterator,
|
||||||
|
MockResponsesAPIStreamingIterator,
|
||||||
ResponsesAPIStreamingIterator,
|
ResponsesAPIStreamingIterator,
|
||||||
SyncResponsesAPIStreamingIterator,
|
SyncResponsesAPIStreamingIterator,
|
||||||
)
|
)
|
||||||
|
@ -1004,6 +1005,7 @@ class BaseLLMHTTPHandler:
|
||||||
extra_body=extra_body,
|
extra_body=extra_body,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
client=client if isinstance(client, AsyncHTTPHandler) else None,
|
client=client if isinstance(client, AsyncHTTPHandler) else None,
|
||||||
|
fake_stream=fake_stream,
|
||||||
)
|
)
|
||||||
|
|
||||||
if client is None or not isinstance(client, HTTPHandler):
|
if client is None or not isinstance(client, HTTPHandler):
|
||||||
|
@ -1052,14 +1054,27 @@ class BaseLLMHTTPHandler:
|
||||||
try:
|
try:
|
||||||
if stream:
|
if stream:
|
||||||
# For streaming, use stream=True in the request
|
# For streaming, use stream=True in the request
|
||||||
|
if fake_stream is True:
|
||||||
|
stream, data = self._prepare_fake_stream_request(
|
||||||
|
stream=stream,
|
||||||
|
data=data,
|
||||||
|
fake_stream=fake_stream,
|
||||||
|
)
|
||||||
response = sync_httpx_client.post(
|
response = sync_httpx_client.post(
|
||||||
url=api_base,
|
url=api_base,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
data=json.dumps(data),
|
data=json.dumps(data),
|
||||||
timeout=timeout
|
timeout=timeout
|
||||||
or response_api_optional_request_params.get("timeout"),
|
or response_api_optional_request_params.get("timeout"),
|
||||||
stream=True,
|
stream=stream,
|
||||||
)
|
)
|
||||||
|
if fake_stream is True:
|
||||||
|
return MockResponsesAPIStreamingIterator(
|
||||||
|
response=response,
|
||||||
|
model=model,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
responses_api_provider_config=responses_api_provider_config,
|
||||||
|
)
|
||||||
|
|
||||||
return SyncResponsesAPIStreamingIterator(
|
return SyncResponsesAPIStreamingIterator(
|
||||||
response=response,
|
response=response,
|
||||||
|
@ -1147,22 +1162,36 @@ class BaseLLMHTTPHandler:
|
||||||
"headers": headers,
|
"headers": headers,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check if streaming is requested
|
# Check if streaming is requested
|
||||||
stream = response_api_optional_request_params.get("stream", False)
|
stream = response_api_optional_request_params.get("stream", False)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if stream:
|
if stream:
|
||||||
# For streaming, we need to use stream=True in the request
|
# For streaming, we need to use stream=True in the request
|
||||||
|
if fake_stream is True:
|
||||||
|
stream, data = self._prepare_fake_stream_request(
|
||||||
|
stream=stream,
|
||||||
|
data=data,
|
||||||
|
fake_stream=fake_stream,
|
||||||
|
)
|
||||||
|
|
||||||
response = await async_httpx_client.post(
|
response = await async_httpx_client.post(
|
||||||
url=api_base,
|
url=api_base,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
data=json.dumps(data),
|
data=json.dumps(data),
|
||||||
timeout=timeout
|
timeout=timeout
|
||||||
or response_api_optional_request_params.get("timeout"),
|
or response_api_optional_request_params.get("timeout"),
|
||||||
stream=True,
|
stream=stream,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if fake_stream is True:
|
||||||
|
return MockResponsesAPIStreamingIterator(
|
||||||
|
response=response,
|
||||||
|
model=model,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
responses_api_provider_config=responses_api_provider_config,
|
||||||
|
)
|
||||||
|
|
||||||
# Return the streaming iterator
|
# Return the streaming iterator
|
||||||
return ResponsesAPIStreamingIterator(
|
return ResponsesAPIStreamingIterator(
|
||||||
response=response,
|
response=response,
|
||||||
|
@ -1179,6 +1208,7 @@ class BaseLLMHTTPHandler:
|
||||||
timeout=timeout
|
timeout=timeout
|
||||||
or response_api_optional_request_params.get("timeout"),
|
or response_api_optional_request_params.get("timeout"),
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise self._handle_error(
|
raise self._handle_error(
|
||||||
e=e,
|
e=e,
|
||||||
|
@ -1191,6 +1221,21 @@ class BaseLLMHTTPHandler:
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _prepare_fake_stream_request(
|
||||||
|
self,
|
||||||
|
stream: bool,
|
||||||
|
data: dict,
|
||||||
|
fake_stream: bool,
|
||||||
|
) -> Tuple[bool, dict]:
|
||||||
|
"""
|
||||||
|
Handles preparing a request when `fake_stream` is True.
|
||||||
|
"""
|
||||||
|
if fake_stream is True:
|
||||||
|
stream = False
|
||||||
|
data.pop("stream", None)
|
||||||
|
return stream, data
|
||||||
|
return stream, data
|
||||||
|
|
||||||
def _handle_error(
|
def _handle_error(
|
||||||
self,
|
self,
|
||||||
e: Exception,
|
e: Exception,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue