diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py index 01fe36acda..fa4cd5dcc6 100644 --- a/litellm/llms/custom_httpx/llm_http_handler.py +++ b/litellm/llms/custom_httpx/llm_http_handler.py @@ -978,6 +978,7 @@ class BaseLLMHTTPHandler: timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, _is_async: bool = False, + fake_stream: bool = False, ) -> Union[ ResponsesAPIResponse, BaseResponsesAPIStreamingIterator, @@ -1100,6 +1101,7 @@ class BaseLLMHTTPHandler: extra_body: Optional[Dict[str, Any]] = None, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, + fake_stream: bool = False, ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]: """ Async version of the responses API handler. diff --git a/litellm/responses/main.py b/litellm/responses/main.py index 43f37bdbc6..aec2f8fe4a 100644 --- a/litellm/responses/main.py +++ b/litellm/responses/main.py @@ -232,6 +232,9 @@ def responses( timeout=timeout or request_timeout, _is_async=_is_async, client=kwargs.get("client"), + fake_stream=responses_api_provider_config.should_fake_stream( + model=model, stream=stream, custom_llm_provider=custom_llm_provider + ), ) return response