add aresponses

This commit is contained in:
Ishaan Jaff 2025-03-12 09:22:44 -07:00
parent ca18a5c5f0
commit eeb01bca15
3 changed files with 273 additions and 59 deletions

View file

@ -1,6 +1,6 @@
import io
import json
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
from typing import TYPE_CHECKING, Any, Coroutine, Dict, Optional, Tuple, Union
import httpx # type: ignore
@ -18,7 +18,11 @@ from litellm.llms.custom_httpx.http_handler import (
_get_httpx_client,
get_async_httpx_client,
)
from litellm.responses.streaming_iterator import ResponsesAPIStreamingIterator
from litellm.responses.streaming_iterator import (
BaseResponsesAPIStreamingIterator,
ResponsesAPIStreamingIterator,
SyncResponsesAPIStreamingIterator,
)
from litellm.types.llms.openai import (
ResponseInputParam,
ResponsesAPIOptionalRequestParams,
@ -961,32 +965,164 @@ class BaseLLMHTTPHandler:
return returned_response
return model_response
async def async_response_api_handler(
def response_api_handler(
self,
model: str,
custom_llm_provider: str,
input: Union[str, ResponseInputParam],
responses_api_provider_config: BaseResponsesAPIConfig,
response_api_optional_request_params: Dict,
logging_obj: LiteLLMLoggingObj,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
) -> Union[ResponsesAPIResponse, ResponsesAPIStreamingIterator]:
if client is None or not isinstance(client, AsyncHTTPHandler):
async_httpx_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders(custom_llm_provider)
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
_is_async: bool = False,
) -> Union[
ResponsesAPIResponse,
BaseResponsesAPIStreamingIterator,
Coroutine[
Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
],
]:
"""
Handles responses API requests.
When _is_async=True, returns a coroutine instead of making the call directly.
"""
if _is_async:
# Return the async coroutine if called with _is_async=True
return self.async_response_api_handler(
model=model,
input=input,
responses_api_provider_config=responses_api_provider_config,
response_api_optional_request_params=response_api_optional_request_params,
custom_llm_provider=custom_llm_provider,
litellm_params=litellm_params,
logging_obj=logging_obj,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout,
client=client if isinstance(client, AsyncHTTPHandler) else None,
)
if client is None or not isinstance(client, HTTPHandler):
sync_httpx_client = _get_httpx_client(
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
)
else:
async_httpx_client = client
sync_httpx_client = client
headers = responses_api_provider_config.validate_environment(
api_key=litellm_params.api_key,
headers=response_api_optional_request_params.get("extra_headers", {}) or {},
model=model,
)
if extra_headers:
headers.update(extra_headers)
api_base = responses_api_provider_config.get_complete_url(
api_base=litellm_params.api_base,
model=model,
)
data = responses_api_provider_config.transform_responses_api_request(
model=model,
input=input,
response_api_optional_request_params=response_api_optional_request_params,
litellm_params=litellm_params,
headers=headers,
)
## LOGGING
logging_obj.pre_call(
input=input,
api_key="",
additional_args={
"complete_input_dict": data,
"api_base": api_base,
"headers": headers,
},
)
# Check if streaming is requested
stream = response_api_optional_request_params.get("stream", False)
try:
if stream:
# For streaming, use stream=True in the request
response = sync_httpx_client.post(
url=api_base,
headers=headers,
data=json.dumps(data),
timeout=timeout
or response_api_optional_request_params.get("timeout"),
stream=True,
)
return SyncResponsesAPIStreamingIterator(
response=response,
model=model,
logging_obj=logging_obj,
responses_api_provider_config=responses_api_provider_config,
)
else:
# For non-streaming requests
response = sync_httpx_client.post(
url=api_base,
headers=headers,
data=json.dumps(data),
timeout=timeout
or response_api_optional_request_params.get("timeout"),
)
except Exception as e:
raise self._handle_error(
e=e,
provider_config=responses_api_provider_config,
)
return responses_api_provider_config.transform_response_api_response(
model=model,
raw_response=response,
logging_obj=logging_obj,
)
async def async_response_api_handler(
self,
model: str,
input: Union[str, ResponseInputParam],
responses_api_provider_config: BaseResponsesAPIConfig,
response_api_optional_request_params: Dict,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
"""
Async version of the responses API handler.
Uses async HTTP client to make requests.
"""
if client is None or not isinstance(client, AsyncHTTPHandler):
async_httpx_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders(custom_llm_provider),
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
)
else:
async_httpx_client = client
headers = responses_api_provider_config.validate_environment(
api_key=litellm_params.api_key,
headers=response_api_optional_request_params.get("extra_headers", {}) or {},
model=model,
)
if extra_headers:
headers.update(extra_headers)
api_base = responses_api_provider_config.get_complete_url(
api_base=litellm_params.api_base,
model=model,
@ -1021,7 +1157,8 @@ class BaseLLMHTTPHandler:
url=api_base,
headers=headers,
data=json.dumps(data),
timeout=response_api_optional_request_params.get("timeout"),
timeout=timeout
or response_api_optional_request_params.get("timeout"),
stream=True,
)
@ -1038,7 +1175,8 @@ class BaseLLMHTTPHandler:
url=api_base,
headers=headers,
data=json.dumps(data),
timeout=response_api_optional_request_params.get("timeout"),
timeout=timeout
or response_api_optional_request_params.get("timeout"),
)
except Exception as e:
raise self._handle_error(