mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
216 lines
7.9 KiB
Python
216 lines
7.9 KiB
Python
import asyncio
|
|
import contextvars
|
|
from functools import partial
|
|
from typing import Any, Dict, Iterable, List, Literal, Optional, Union, get_type_hints
|
|
|
|
import httpx
|
|
|
|
import litellm
|
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
|
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
|
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
|
|
from litellm.responses.utils import ResponsesAPIRequestUtils
|
|
from litellm.types.llms.openai import (
|
|
Reasoning,
|
|
ResponseIncludable,
|
|
ResponseInputParam,
|
|
ResponsesAPIOptionalRequestParams,
|
|
ResponsesAPIResponse,
|
|
ResponseTextConfigParam,
|
|
ToolChoice,
|
|
ToolParam,
|
|
)
|
|
from litellm.types.router import GenericLiteLLMParams
|
|
from litellm.utils import ProviderConfigManager, client
|
|
|
|
from .streaming_iterator import BaseResponsesAPIStreamingIterator
|
|
|
|
####### ENVIRONMENT VARIABLES ###################
|
|
# Initialize any necessary instances or variables here
|
|
base_llm_http_handler = BaseLLMHTTPHandler()
|
|
#################################################
|
|
|
|
|
|
@client
|
|
async def aresponses(
|
|
input: Union[str, ResponseInputParam],
|
|
model: str,
|
|
include: Optional[List[ResponseIncludable]] = None,
|
|
instructions: Optional[str] = None,
|
|
max_output_tokens: Optional[int] = None,
|
|
metadata: Optional[Dict[str, Any]] = None,
|
|
parallel_tool_calls: Optional[bool] = None,
|
|
previous_response_id: Optional[str] = None,
|
|
reasoning: Optional[Reasoning] = None,
|
|
store: Optional[bool] = None,
|
|
stream: Optional[bool] = None,
|
|
temperature: Optional[float] = None,
|
|
text: Optional[ResponseTextConfigParam] = None,
|
|
tool_choice: Optional[ToolChoice] = None,
|
|
tools: Optional[Iterable[ToolParam]] = None,
|
|
top_p: Optional[float] = None,
|
|
truncation: Optional[Literal["auto", "disabled"]] = None,
|
|
user: Optional[str] = None,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Optional[Dict[str, Any]] = None,
|
|
extra_query: Optional[Dict[str, Any]] = None,
|
|
extra_body: Optional[Dict[str, Any]] = None,
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
**kwargs,
|
|
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
|
|
"""
|
|
Async: Handles responses API requests by reusing the synchronous function
|
|
"""
|
|
try:
|
|
loop = asyncio.get_event_loop()
|
|
kwargs["aresponses"] = True
|
|
|
|
func = partial(
|
|
responses,
|
|
input=input,
|
|
model=model,
|
|
include=include,
|
|
instructions=instructions,
|
|
max_output_tokens=max_output_tokens,
|
|
metadata=metadata,
|
|
parallel_tool_calls=parallel_tool_calls,
|
|
previous_response_id=previous_response_id,
|
|
reasoning=reasoning,
|
|
store=store,
|
|
stream=stream,
|
|
temperature=temperature,
|
|
text=text,
|
|
tool_choice=tool_choice,
|
|
tools=tools,
|
|
top_p=top_p,
|
|
truncation=truncation,
|
|
user=user,
|
|
extra_headers=extra_headers,
|
|
extra_query=extra_query,
|
|
extra_body=extra_body,
|
|
timeout=timeout,
|
|
**kwargs,
|
|
)
|
|
|
|
ctx = contextvars.copy_context()
|
|
func_with_context = partial(ctx.run, func)
|
|
init_response = await loop.run_in_executor(None, func_with_context)
|
|
|
|
if asyncio.iscoroutine(init_response):
|
|
response = await init_response
|
|
else:
|
|
response = init_response
|
|
return response
|
|
except Exception as e:
|
|
raise e
|
|
|
|
|
|
@client
|
|
def responses(
|
|
input: Union[str, ResponseInputParam],
|
|
model: str,
|
|
include: Optional[List[ResponseIncludable]] = None,
|
|
instructions: Optional[str] = None,
|
|
max_output_tokens: Optional[int] = None,
|
|
metadata: Optional[Dict[str, Any]] = None,
|
|
parallel_tool_calls: Optional[bool] = None,
|
|
previous_response_id: Optional[str] = None,
|
|
reasoning: Optional[Reasoning] = None,
|
|
store: Optional[bool] = None,
|
|
stream: Optional[bool] = None,
|
|
temperature: Optional[float] = None,
|
|
text: Optional[ResponseTextConfigParam] = None,
|
|
tool_choice: Optional[ToolChoice] = None,
|
|
tools: Optional[Iterable[ToolParam]] = None,
|
|
top_p: Optional[float] = None,
|
|
truncation: Optional[Literal["auto", "disabled"]] = None,
|
|
user: Optional[str] = None,
|
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
extra_headers: Optional[Dict[str, Any]] = None,
|
|
extra_query: Optional[Dict[str, Any]] = None,
|
|
extra_body: Optional[Dict[str, Any]] = None,
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
**kwargs,
|
|
):
|
|
"""
|
|
Synchronous version of the Responses API.
|
|
Uses the synchronous HTTP handler to make requests.
|
|
"""
|
|
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore
|
|
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
|
|
_is_async = kwargs.pop("aresponses", False) is True
|
|
|
|
# get llm provider logic
|
|
litellm_params = GenericLiteLLMParams(**kwargs)
|
|
model, custom_llm_provider, dynamic_api_key, dynamic_api_base = (
|
|
litellm.get_llm_provider(
|
|
model=model,
|
|
custom_llm_provider=kwargs.get("custom_llm_provider", None),
|
|
api_base=litellm_params.api_base,
|
|
api_key=litellm_params.api_key,
|
|
)
|
|
)
|
|
|
|
# get provider config
|
|
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
|
|
ProviderConfigManager.get_provider_responses_api_config(
|
|
model=model,
|
|
provider=litellm.LlmProviders(custom_llm_provider),
|
|
)
|
|
)
|
|
|
|
if responses_api_provider_config is None:
|
|
raise litellm.BadRequestError(
|
|
model=model,
|
|
llm_provider=custom_llm_provider,
|
|
message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}",
|
|
)
|
|
|
|
# Get all parameters using locals() and combine with kwargs
|
|
local_vars = locals()
|
|
local_vars.update(kwargs)
|
|
# Get ResponsesAPIOptionalRequestParams with only valid parameters
|
|
response_api_optional_params: ResponsesAPIOptionalRequestParams = (
|
|
ResponsesAPIRequestUtils.get_requested_response_api_optional_param(local_vars)
|
|
)
|
|
|
|
# Get optional parameters for the responses API
|
|
responses_api_request_params: Dict = (
|
|
ResponsesAPIRequestUtils.get_optional_params_responses_api(
|
|
model=model,
|
|
responses_api_provider_config=responses_api_provider_config,
|
|
response_api_optional_params=response_api_optional_params,
|
|
)
|
|
)
|
|
|
|
# Pre Call logging
|
|
litellm_logging_obj.update_environment_variables(
|
|
model=model,
|
|
user=user,
|
|
optional_params=dict(responses_api_request_params),
|
|
litellm_params={
|
|
"litellm_call_id": litellm_call_id,
|
|
**responses_api_request_params,
|
|
},
|
|
custom_llm_provider=custom_llm_provider,
|
|
)
|
|
|
|
# Call the handler with _is_async flag instead of directly calling the async handler
|
|
response = base_llm_http_handler.response_api_handler(
|
|
model=model,
|
|
input=input,
|
|
responses_api_provider_config=responses_api_provider_config,
|
|
response_api_optional_request_params=responses_api_request_params,
|
|
custom_llm_provider=custom_llm_provider,
|
|
litellm_params=litellm_params,
|
|
logging_obj=litellm_logging_obj,
|
|
extra_headers=extra_headers,
|
|
extra_body=extra_body,
|
|
timeout=timeout,
|
|
_is_async=_is_async,
|
|
client=kwargs.get("client"),
|
|
)
|
|
|
|
return response
|