[Feat] Support for all litellm providers on Responses API (works with Codex) - Anthropic, Bedrock API, VertexAI, Ollama (#10132)

* transform request

* basic handler for LiteLLMCompletionTransformationHandler

* complete transform litellm to responses api

* fixes to test

* fix stream=True

* fix streaming iterator

* fixes for transformation

* fixes for anthropic codex support

* fix pass response_api_optional_params

* test anthropic responses api tools

* update responses types

* working codex with litellm

* add session handler

* fixes streaming iterator

* fix handler

* add litellm codex example

* fix code quality

* test fix

* docs litellm codex

* litellm codexdoc

* docs openai codex with litellm

* docs litellm openai codex

* litellm codex

* linting fixes for transforming responses API

* fix import error

* fix responses api test

* add sync iterator support for responses api
This commit is contained in:
Ishaan Jaff 2025-04-18 19:53:59 -07:00 committed by GitHub
parent 3e87ec4f16
commit 3d5022bd79
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 1282 additions and 53 deletions

View file

@ -1,6 +1,13 @@
model_list:
- model_name: fake-openai-endpoint
- model_name: openai/*
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
model: openai/*
- model_name: anthropic/*
litellm_params:
model: anthropic/*
- model_name: gemini/*
litellm_params:
model: gemini/*
litellm_settings:
drop_params: true

View file

@ -0,0 +1,115 @@
"""
Handler for transforming responses api requests to litellm.completion requests
"""
from typing import Any, Coroutine, Optional, Union
import litellm
from litellm.responses.litellm_completion_transformation.streaming_iterator import (
LiteLLMCompletionStreamingIterator,
)
from litellm.responses.litellm_completion_transformation.transformation import (
LiteLLMCompletionResponsesConfig,
)
from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
from litellm.types.llms.openai import (
ResponseInputParam,
ResponsesAPIOptionalRequestParams,
ResponsesAPIResponse,
)
from litellm.types.utils import ModelResponse
class LiteLLMCompletionTransformationHandler:
def response_api_handler(
self,
model: str,
input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
custom_llm_provider: Optional[str] = None,
_is_async: bool = False,
stream: Optional[bool] = None,
**kwargs,
) -> Union[
ResponsesAPIResponse,
BaseResponsesAPIStreamingIterator,
Coroutine[
Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
],
]:
litellm_completion_request: dict = (
LiteLLMCompletionResponsesConfig.transform_responses_api_request_to_chat_completion_request(
model=model,
input=input,
responses_api_request=responses_api_request,
custom_llm_provider=custom_llm_provider,
stream=stream,
**kwargs,
)
)
if _is_async:
return self.async_response_api_handler(
litellm_completion_request=litellm_completion_request,
request_input=input,
responses_api_request=responses_api_request,
**kwargs,
)
litellm_completion_response: Union[
ModelResponse, litellm.CustomStreamWrapper
] = litellm.completion(
**litellm_completion_request,
**kwargs,
)
if isinstance(litellm_completion_response, ModelResponse):
responses_api_response: ResponsesAPIResponse = (
LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
chat_completion_response=litellm_completion_response,
request_input=input,
responses_api_request=responses_api_request,
)
)
return responses_api_response
elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
return LiteLLMCompletionStreamingIterator(
litellm_custom_stream_wrapper=litellm_completion_response,
request_input=input,
responses_api_request=responses_api_request,
)
async def async_response_api_handler(
self,
litellm_completion_request: dict,
request_input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
**kwargs,
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
litellm_completion_response: Union[
ModelResponse, litellm.CustomStreamWrapper
] = await litellm.acompletion(
**litellm_completion_request,
**kwargs,
)
if isinstance(litellm_completion_response, ModelResponse):
responses_api_response: ResponsesAPIResponse = (
LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
chat_completion_response=litellm_completion_response,
request_input=request_input,
responses_api_request=responses_api_request,
)
)
return responses_api_response
elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
return LiteLLMCompletionStreamingIterator(
litellm_custom_stream_wrapper=litellm_completion_response,
request_input=request_input,
responses_api_request=responses_api_request,
)

View file

@ -0,0 +1,59 @@
"""
Responses API has previous_response_id, which is the id of the previous response.
LiteLLM needs to maintain a cache of the previous response input, output, previous_response_id, and model.
This class handles that cache.
"""
from typing import List, Optional, Tuple, Union
from typing_extensions import TypedDict
from litellm.caching import InMemoryCache
from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
RESPONSES_API_PREVIOUS_RESPONSES_CACHE = InMemoryCache()
MAX_PREV_SESSION_INPUTS = 50
class ResponsesAPISessionElement(TypedDict, total=False):
input: Union[str, ResponseInputParam]
output: ResponsesAPIResponse
response_id: str
previous_response_id: Optional[str]
class SessionHandler:
def add_completed_response_to_cache(
self, response_id: str, session_element: ResponsesAPISessionElement
):
RESPONSES_API_PREVIOUS_RESPONSES_CACHE.set_cache(
key=response_id, value=session_element
)
def get_chain_of_previous_input_output_pairs(
self, previous_response_id: str
) -> List[Tuple[ResponseInputParam, ResponsesAPIResponse]]:
response_api_inputs: List[Tuple[ResponseInputParam, ResponsesAPIResponse]] = []
current_previous_response_id = previous_response_id
count_session_elements = 0
while current_previous_response_id:
if count_session_elements > MAX_PREV_SESSION_INPUTS:
break
session_element = RESPONSES_API_PREVIOUS_RESPONSES_CACHE.get_cache(
key=current_previous_response_id
)
if session_element:
response_api_inputs.append(
(session_element.get("input"), session_element.get("output"))
)
current_previous_response_id = session_element.get(
"previous_response_id"
)
else:
break
count_session_elements += 1
return response_api_inputs

View file

@ -0,0 +1,110 @@
from typing import List, Optional, Union
import litellm
from litellm.main import stream_chunk_builder
from litellm.responses.litellm_completion_transformation.transformation import (
LiteLLMCompletionResponsesConfig,
)
from litellm.responses.streaming_iterator import ResponsesAPIStreamingIterator
from litellm.types.llms.openai import (
ResponseCompletedEvent,
ResponseInputParam,
ResponsesAPIOptionalRequestParams,
ResponsesAPIStreamEvents,
ResponsesAPIStreamingResponse,
)
from litellm.types.utils import (
ModelResponse,
ModelResponseStream,
TextCompletionResponse,
)
class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
"""
Async iterator for processing streaming responses from the Responses API.
"""
def __init__(
self,
litellm_custom_stream_wrapper: litellm.CustomStreamWrapper,
request_input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
):
self.litellm_custom_stream_wrapper: litellm.CustomStreamWrapper = (
litellm_custom_stream_wrapper
)
self.request_input: Union[str, ResponseInputParam] = request_input
self.responses_api_request: ResponsesAPIOptionalRequestParams = (
responses_api_request
)
self.collected_chunks: List[ModelResponseStream] = []
self.finished: bool = False
async def __anext__(
self,
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
try:
while True:
if self.finished is True:
raise StopAsyncIteration
# Get the next chunk from the stream
try:
chunk = await self.litellm_custom_stream_wrapper.__anext__()
self.collected_chunks.append(chunk)
except StopAsyncIteration:
self.finished = True
response_completed_event = self._emit_response_completed_event()
if response_completed_event:
return response_completed_event
else:
raise StopAsyncIteration
except Exception as e:
# Handle HTTP errors
self.finished = True
raise e
def __iter__(self):
return self
def __next__(
self,
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
try:
while True:
if self.finished is True:
raise StopAsyncIteration
# Get the next chunk from the stream
try:
chunk = self.litellm_custom_stream_wrapper.__next__()
self.collected_chunks.append(chunk)
except StopAsyncIteration:
self.finished = True
response_completed_event = self._emit_response_completed_event()
if response_completed_event:
return response_completed_event
else:
raise StopAsyncIteration
except Exception as e:
# Handle HTTP errors
self.finished = True
raise e
def _emit_response_completed_event(self) -> Optional[ResponseCompletedEvent]:
litellm_model_response: Optional[
Union[ModelResponse, TextCompletionResponse]
] = stream_chunk_builder(chunks=self.collected_chunks)
if litellm_model_response and isinstance(litellm_model_response, ModelResponse):
return ResponseCompletedEvent(
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
response=LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
request_input=self.request_input,
chat_completion_response=litellm_model_response,
responses_api_request=self.responses_api_request,
),
)
else:
return None

View file

@ -0,0 +1,631 @@
"""
Handles transforming from Responses API -> LiteLLM completion (Chat Completion API)
"""
from typing import Any, Dict, List, Optional, Union
from openai.types.responses.tool_param import FunctionToolParam
from litellm.caching import InMemoryCache
from litellm.responses.litellm_completion_transformation.session_handler import (
ResponsesAPISessionElement,
SessionHandler,
)
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionResponseMessage,
ChatCompletionSystemMessage,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionToolMessage,
ChatCompletionToolParam,
ChatCompletionToolParamFunctionChunk,
ChatCompletionUserMessage,
GenericChatCompletionMessage,
Reasoning,
ResponseAPIUsage,
ResponseInputParam,
ResponsesAPIOptionalRequestParams,
ResponsesAPIResponse,
ResponseTextConfig,
)
from litellm.types.responses.main import (
GenericResponseOutputItem,
GenericResponseOutputItemContentAnnotation,
OutputFunctionToolCall,
OutputText,
)
from litellm.types.utils import (
ChatCompletionAnnotation,
ChatCompletionMessageToolCall,
Choices,
Function,
Message,
ModelResponse,
Usage,
)
########### Initialize Classes used for Responses API ###########
TOOL_CALLS_CACHE = InMemoryCache()
RESPONSES_API_SESSION_HANDLER = SessionHandler()
########### End of Initialize Classes used for Responses API ###########
class LiteLLMCompletionResponsesConfig:
@staticmethod
def transform_responses_api_request_to_chat_completion_request(
model: str,
input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
custom_llm_provider: Optional[str] = None,
stream: Optional[bool] = None,
**kwargs,
) -> dict:
"""
Transform a Responses API request into a Chat Completion request
"""
litellm_completion_request: dict = {
"messages": LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
input=input,
responses_api_request=responses_api_request,
previous_response_id=responses_api_request.get("previous_response_id"),
),
"model": model,
"tool_choice": responses_api_request.get("tool_choice"),
"tools": LiteLLMCompletionResponsesConfig.transform_responses_api_tools_to_chat_completion_tools(
responses_api_request.get("tools") or [] # type: ignore
),
"top_p": responses_api_request.get("top_p"),
"user": responses_api_request.get("user"),
"temperature": responses_api_request.get("temperature"),
"parallel_tool_calls": responses_api_request.get("parallel_tool_calls"),
"max_tokens": responses_api_request.get("max_output_tokens"),
"stream": stream,
"metadata": kwargs.get("metadata"),
"service_tier": kwargs.get("service_tier"),
# litellm specific params
"custom_llm_provider": custom_llm_provider,
}
# only pass non-None values
litellm_completion_request = {
k: v for k, v in litellm_completion_request.items() if v is not None
}
return litellm_completion_request
@staticmethod
def transform_responses_api_input_to_messages(
input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
previous_response_id: Optional[str] = None,
) -> List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
ChatCompletionResponseMessage,
]
]:
"""
Transform a Responses API input into a list of messages
"""
messages: List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
ChatCompletionResponseMessage,
]
] = []
if responses_api_request.get("instructions"):
messages.append(
LiteLLMCompletionResponsesConfig.transform_instructions_to_system_message(
responses_api_request.get("instructions")
)
)
if previous_response_id:
previous_response_pairs = (
RESPONSES_API_SESSION_HANDLER.get_chain_of_previous_input_output_pairs(
previous_response_id=previous_response_id
)
)
if previous_response_pairs:
for previous_response_pair in previous_response_pairs:
chat_completion_input_messages = LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
input=previous_response_pair[0],
)
chat_completion_output_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_outputs_to_chat_completion_messages(
responses_api_output=previous_response_pair[1],
)
messages.extend(chat_completion_input_messages)
messages.extend(chat_completion_output_messages)
messages.extend(
LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
input=input,
)
)
return messages
@staticmethod
def _transform_response_input_param_to_chat_completion_message(
input: Union[str, ResponseInputParam],
) -> List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
ChatCompletionResponseMessage,
]
]:
"""
Transform a ResponseInputParam into a Chat Completion message
"""
messages: List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
ChatCompletionResponseMessage,
]
] = []
tool_call_output_messages: List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
ChatCompletionResponseMessage,
]
] = []
if isinstance(input, str):
messages.append(ChatCompletionUserMessage(role="user", content=input))
elif isinstance(input, list):
for _input in input:
chat_completion_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_input_item_to_chat_completion_message(
input_item=_input
)
if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(
input_item=_input
):
tool_call_output_messages.extend(chat_completion_messages)
else:
messages.extend(chat_completion_messages)
messages.extend(tool_call_output_messages)
return messages
@staticmethod
def _ensure_tool_call_output_has_corresponding_tool_call(
messages: List[Union[AllMessageValues, GenericChatCompletionMessage]],
) -> bool:
"""
If any tool call output is present, ensure there is a corresponding tool call/tool_use block
"""
for message in messages:
if message.get("role") == "tool":
return True
return False
@staticmethod
def _transform_responses_api_input_item_to_chat_completion_message(
input_item: Any,
) -> List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionResponseMessage,
]
]:
"""
Transform a Responses API input item into a Chat Completion message
- EasyInputMessageParam
- Message
- ResponseOutputMessageParam
- ResponseFileSearchToolCallParam
- ResponseComputerToolCallParam
- ComputerCallOutput
- ResponseFunctionWebSearchParam
- ResponseFunctionToolCallParam
- FunctionCallOutput
- ResponseReasoningItemParam
- ItemReference
"""
if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(input_item):
# handle executed tool call results
return LiteLLMCompletionResponsesConfig._transform_responses_api_tool_call_output_to_chat_completion_message(
tool_call_output=input_item
)
else:
return [
GenericChatCompletionMessage(
role=input_item.get("role") or "user",
content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
input_item.get("content")
),
)
]
@staticmethod
def _is_input_item_tool_call_output(input_item: Any) -> bool:
"""
Check if the input item is a tool call output
"""
return input_item.get("type") in [
"function_call_output",
"web_search_call",
"computer_call_output",
]
@staticmethod
def _transform_responses_api_tool_call_output_to_chat_completion_message(
tool_call_output: Dict[str, Any],
) -> List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionResponseMessage,
]
]:
"""
ChatCompletionToolMessage is used to indicate the output from a tool call
"""
tool_output_message = ChatCompletionToolMessage(
role="tool",
content=tool_call_output.get("output") or "",
tool_call_id=tool_call_output.get("call_id") or "",
)
_tool_use_definition = TOOL_CALLS_CACHE.get_cache(
key=tool_call_output.get("call_id") or "",
)
if _tool_use_definition:
"""
Append the tool use definition to the list of messages
Providers like Anthropic require the tool use definition to be included with the tool output
- Input:
{'function':
arguments:'{"command": ["echo","<html>\\n<head>\\n <title>Hello</title>\\n</head>\\n<body>\\n <h1>Hi</h1>\\n</body>\\n</html>",">","index.html"]}',
name='shell',
'id': 'toolu_018KFWsEySHjdKZPdUzXpymJ',
'type': 'function'
}
- Output:
{
"id": "toolu_018KFWsEySHjdKZPdUzXpymJ",
"type": "function",
"function": {
"name": "get_weather",
"arguments": "{\"latitude\":48.8566,\"longitude\":2.3522}"
}
}
"""
function: dict = _tool_use_definition.get("function") or {}
tool_call_chunk = ChatCompletionToolCallChunk(
id=_tool_use_definition.get("id") or "",
type=_tool_use_definition.get("type") or "function",
function=ChatCompletionToolCallFunctionChunk(
name=function.get("name") or "",
arguments=function.get("arguments") or "",
),
index=0,
)
chat_completion_response_message = ChatCompletionResponseMessage(
tool_calls=[tool_call_chunk],
role="assistant",
)
return [chat_completion_response_message, tool_output_message]
return [tool_output_message]
@staticmethod
def _transform_responses_api_content_to_chat_completion_content(
content: Any,
) -> Union[str, List[Union[str, Dict[str, Any]]]]:
"""
Transform a Responses API content into a Chat Completion content
"""
if isinstance(content, str):
return content
elif isinstance(content, list):
content_list: List[Union[str, Dict[str, Any]]] = []
for item in content:
if isinstance(item, str):
content_list.append(item)
elif isinstance(item, dict):
content_list.append(
{
"type": LiteLLMCompletionResponsesConfig._get_chat_completion_request_content_type(
item.get("type") or "text"
),
"text": item.get("text"),
}
)
return content_list
else:
raise ValueError(f"Invalid content type: {type(content)}")
@staticmethod
def _get_chat_completion_request_content_type(content_type: str) -> str:
"""
Get the Chat Completion request content type
"""
# Responses API content has `input_` prefix, if it exists, remove it
if content_type.startswith("input_"):
return content_type[len("input_") :]
else:
return content_type
@staticmethod
def transform_instructions_to_system_message(
instructions: Optional[str],
) -> ChatCompletionSystemMessage:
"""
Transform a Instructions into a system message
"""
return ChatCompletionSystemMessage(role="system", content=instructions or "")
@staticmethod
def transform_responses_api_tools_to_chat_completion_tools(
tools: Optional[List[FunctionToolParam]],
) -> List[ChatCompletionToolParam]:
"""
Transform a Responses API tools into a Chat Completion tools
"""
if tools is None:
return []
chat_completion_tools: List[ChatCompletionToolParam] = []
for tool in tools:
chat_completion_tools.append(
ChatCompletionToolParam(
type="function",
function=ChatCompletionToolParamFunctionChunk(
name=tool["name"],
description=tool.get("description") or "",
parameters=tool.get("parameters", {}),
strict=tool.get("strict", False),
),
)
)
return chat_completion_tools
@staticmethod
def transform_chat_completion_tools_to_responses_tools(
chat_completion_response: ModelResponse,
) -> List[OutputFunctionToolCall]:
"""
Transform a Chat Completion tools into a Responses API tools
"""
all_chat_completion_tools: List[ChatCompletionMessageToolCall] = []
for choice in chat_completion_response.choices:
if isinstance(choice, Choices):
if choice.message.tool_calls:
all_chat_completion_tools.extend(choice.message.tool_calls)
for tool_call in choice.message.tool_calls:
TOOL_CALLS_CACHE.set_cache(
key=tool_call.id,
value=tool_call,
)
responses_tools: List[OutputFunctionToolCall] = []
for tool in all_chat_completion_tools:
if tool.type == "function":
function_definition = tool.function
responses_tools.append(
OutputFunctionToolCall(
name=function_definition.name or "",
arguments=function_definition.get("arguments") or "",
call_id=tool.id or "",
id=tool.id or "",
type="function_call", # critical this is "function_call" to work with tools like openai codex
status=function_definition.get("status") or "completed",
)
)
return responses_tools
@staticmethod
def transform_chat_completion_response_to_responses_api_response(
request_input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
chat_completion_response: ModelResponse,
) -> ResponsesAPIResponse:
"""
Transform a Chat Completion response into a Responses API response
"""
responses_api_response: ResponsesAPIResponse = ResponsesAPIResponse(
id=chat_completion_response.id,
created_at=chat_completion_response.created,
model=chat_completion_response.model,
object=chat_completion_response.object,
error=getattr(chat_completion_response, "error", None),
incomplete_details=getattr(
chat_completion_response, "incomplete_details", None
),
instructions=getattr(chat_completion_response, "instructions", None),
metadata=getattr(chat_completion_response, "metadata", {}),
output=LiteLLMCompletionResponsesConfig._transform_chat_completion_choices_to_responses_output(
chat_completion_response=chat_completion_response,
choices=getattr(chat_completion_response, "choices", []),
),
parallel_tool_calls=getattr(
chat_completion_response, "parallel_tool_calls", False
),
temperature=getattr(chat_completion_response, "temperature", 0),
tool_choice=getattr(chat_completion_response, "tool_choice", "auto"),
tools=getattr(chat_completion_response, "tools", []),
top_p=getattr(chat_completion_response, "top_p", None),
max_output_tokens=getattr(
chat_completion_response, "max_output_tokens", None
),
previous_response_id=getattr(
chat_completion_response, "previous_response_id", None
),
reasoning=Reasoning(),
status=getattr(chat_completion_response, "status", "completed"),
text=ResponseTextConfig(),
truncation=getattr(chat_completion_response, "truncation", None),
usage=LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
chat_completion_response=chat_completion_response
),
user=getattr(chat_completion_response, "user", None),
)
RESPONSES_API_SESSION_HANDLER.add_completed_response_to_cache(
response_id=responses_api_response.id,
session_element=ResponsesAPISessionElement(
input=request_input,
output=responses_api_response,
response_id=responses_api_response.id,
previous_response_id=responses_api_request.get("previous_response_id"),
),
)
return responses_api_response
@staticmethod
def _transform_chat_completion_choices_to_responses_output(
chat_completion_response: ModelResponse,
choices: List[Choices],
) -> List[Union[GenericResponseOutputItem, OutputFunctionToolCall]]:
responses_output: List[
Union[GenericResponseOutputItem, OutputFunctionToolCall]
] = []
for choice in choices:
responses_output.append(
GenericResponseOutputItem(
type="message",
id=chat_completion_response.id,
status=choice.finish_reason,
role=choice.message.role,
content=[
LiteLLMCompletionResponsesConfig._transform_chat_message_to_response_output_text(
choice.message
)
],
)
)
tool_calls = LiteLLMCompletionResponsesConfig.transform_chat_completion_tools_to_responses_tools(
chat_completion_response=chat_completion_response
)
responses_output.extend(tool_calls)
return responses_output
@staticmethod
def _transform_responses_api_outputs_to_chat_completion_messages(
responses_api_output: ResponsesAPIResponse,
) -> List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
]
]:
messages: List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
]
] = []
output_items = responses_api_output.output
for _output_item in output_items:
output_item: dict = dict(_output_item)
if output_item.get("type") == "function_call":
# handle function call output
messages.append(
LiteLLMCompletionResponsesConfig._transform_responses_output_tool_call_to_chat_completion_output_tool_call(
tool_call=output_item
)
)
else:
# transform as generic ResponseOutputItem
messages.append(
GenericChatCompletionMessage(
role=str(output_item.get("role")) or "user",
content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
output_item.get("content")
),
)
)
return messages
@staticmethod
def _transform_responses_output_tool_call_to_chat_completion_output_tool_call(
tool_call: dict,
) -> ChatCompletionMessageToolCall:
return ChatCompletionMessageToolCall(
id=tool_call.get("id") or "",
type="function",
function=Function(
name=tool_call.get("name") or "",
arguments=tool_call.get("arguments") or "",
),
)
@staticmethod
def _transform_chat_message_to_response_output_text(
message: Message,
) -> OutputText:
return OutputText(
type="output_text",
text=message.content,
annotations=LiteLLMCompletionResponsesConfig._transform_chat_completion_annotations_to_response_output_annotations(
annotations=getattr(message, "annotations", None)
),
)
@staticmethod
def _transform_chat_completion_annotations_to_response_output_annotations(
annotations: Optional[List[ChatCompletionAnnotation]],
) -> List[GenericResponseOutputItemContentAnnotation]:
response_output_annotations: List[
GenericResponseOutputItemContentAnnotation
] = []
if annotations is None:
return response_output_annotations
for annotation in annotations:
annotation_type = annotation.get("type")
if annotation_type == "url_citation" and "url_citation" in annotation:
url_citation = annotation["url_citation"]
response_output_annotations.append(
GenericResponseOutputItemContentAnnotation(
type=annotation_type,
start_index=url_citation.get("start_index"),
end_index=url_citation.get("end_index"),
url=url_citation.get("url"),
title=url_citation.get("title"),
)
)
# Handle other annotation types here
return response_output_annotations
@staticmethod
def _transform_chat_completion_usage_to_responses_usage(
chat_completion_response: ModelResponse,
) -> ResponseAPIUsage:
usage: Optional[Usage] = getattr(chat_completion_response, "usage", None)
if usage is None:
return ResponseAPIUsage(
input_tokens=0,
output_tokens=0,
total_tokens=0,
)
return ResponseAPIUsage(
input_tokens=usage.prompt_tokens,
output_tokens=usage.completion_tokens,
total_tokens=usage.total_tokens,
)

View file

@ -10,6 +10,9 @@ from litellm.constants import request_timeout
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
from litellm.responses.litellm_completion_transformation.handler import (
LiteLLMCompletionTransformationHandler,
)
from litellm.responses.utils import ResponsesAPIRequestUtils
from litellm.types.llms.openai import (
Reasoning,
@ -29,6 +32,7 @@ from .streaming_iterator import BaseResponsesAPIStreamingIterator
####### ENVIRONMENT VARIABLES ###################
# Initialize any necessary instances or variables here
base_llm_http_handler = BaseLLMHTTPHandler()
litellm_completion_transformation_handler = LiteLLMCompletionTransformationHandler()
#################################################
@ -178,19 +182,12 @@ def responses(
)
# get provider config
responses_api_provider_config: Optional[
BaseResponsesAPIConfig
] = ProviderConfigManager.get_provider_responses_api_config(
model=model,
provider=litellm.LlmProviders(custom_llm_provider),
)
if responses_api_provider_config is None:
raise litellm.BadRequestError(
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
ProviderConfigManager.get_provider_responses_api_config(
model=model,
llm_provider=custom_llm_provider,
message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}",
provider=litellm.LlmProviders(custom_llm_provider),
)
)
local_vars.update(kwargs)
# Get ResponsesAPIOptionalRequestParams with only valid parameters
@ -200,6 +197,17 @@ def responses(
)
)
if responses_api_provider_config is None:
return litellm_completion_transformation_handler.response_api_handler(
model=model,
input=input,
responses_api_request=response_api_optional_params,
custom_llm_provider=custom_llm_provider,
_is_async=_is_async,
stream=stream,
**kwargs,
)
# Get optional parameters for the responses API
responses_api_request_params: Dict = (
ResponsesAPIRequestUtils.get_optional_params_responses_api(

View file

@ -0,0 +1,15 @@
from pydantic import BaseModel
class BaseLiteLLMOpenAIResponseObject(BaseModel):
def __getitem__(self, key):
return self.__dict__[key]
def get(self, key, default=None):
return self.__dict__.get(key, default)
def __contains__(self, key):
return key in self.__dict__
def items(self):
return self.__dict__.items()

View file

@ -49,9 +49,16 @@ from openai.types.responses.response_create_params import (
ToolChoice,
ToolParam,
)
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
from pydantic import BaseModel, Discriminator, Field, PrivateAttr
from typing_extensions import Annotated, Dict, Required, TypedDict, override
from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
from litellm.types.responses.main import (
GenericResponseOutputItem,
OutputFunctionToolCall,
)
FileContent = Union[IO[bytes], bytes, PathLike]
FileTypes = Union[
@ -678,6 +685,11 @@ class ChatCompletionDeveloperMessage(OpenAIChatCompletionDeveloperMessage, total
cache_control: ChatCompletionCachedContent
class GenericChatCompletionMessage(TypedDict, total=False):
role: Required[str]
content: Required[Union[str, List]]
ValidUserMessageContentTypes = [
"text",
"image_url",
@ -803,12 +815,12 @@ class OpenAIChatCompletionChunk(ChatCompletionChunk):
class Hyperparameters(BaseModel):
batch_size: Optional[Union[str, int]] = None # "Number of examples in each batch."
learning_rate_multiplier: Optional[
Union[str, float]
] = None # Scaling factor for the learning rate
n_epochs: Optional[
Union[str, int]
] = None # "The number of epochs to train the model for"
learning_rate_multiplier: Optional[Union[str, float]] = (
None # Scaling factor for the learning rate
)
n_epochs: Optional[Union[str, int]] = (
None # "The number of epochs to train the model for"
)
class FineTuningJobCreate(BaseModel):
@ -835,18 +847,18 @@ class FineTuningJobCreate(BaseModel):
model: str # "The name of the model to fine-tune."
training_file: str # "The ID of an uploaded file that contains training data."
hyperparameters: Optional[
Hyperparameters
] = None # "The hyperparameters used for the fine-tuning job."
suffix: Optional[
str
] = None # "A string of up to 18 characters that will be added to your fine-tuned model name."
validation_file: Optional[
str
] = None # "The ID of an uploaded file that contains validation data."
integrations: Optional[
List[str]
] = None # "A list of integrations to enable for your fine-tuning job."
hyperparameters: Optional[Hyperparameters] = (
None # "The hyperparameters used for the fine-tuning job."
)
suffix: Optional[str] = (
None # "A string of up to 18 characters that will be added to your fine-tuned model name."
)
validation_file: Optional[str] = (
None # "The ID of an uploaded file that contains validation data."
)
integrations: Optional[List[str]] = (
None # "A list of integrations to enable for your fine-tuning job."
)
seed: Optional[int] = None # "The seed controls the reproducibility of the job."
@ -887,7 +899,7 @@ class ResponsesAPIOptionalRequestParams(TypedDict, total=False):
temperature: Optional[float]
text: Optional[ResponseTextConfigParam]
tool_choice: Optional[ToolChoice]
tools: Optional[Iterable[ToolParam]]
tools: Optional[List[ToolParam]]
top_p: Optional[float]
truncation: Optional[Literal["auto", "disabled"]]
user: Optional[str]
@ -900,20 +912,6 @@ class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
model: str
class BaseLiteLLMOpenAIResponseObject(BaseModel):
def __getitem__(self, key):
return self.__dict__[key]
def get(self, key, default=None):
return self.__dict__.get(key, default)
def __contains__(self, key):
return key in self.__dict__
def items(self):
return self.__dict__.items()
class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject):
reasoning_tokens: Optional[int] = None
@ -958,11 +956,14 @@ class ResponsesAPIResponse(BaseLiteLLMOpenAIResponseObject):
metadata: Optional[Dict]
model: Optional[str]
object: Optional[str]
output: List[ResponseOutputItem]
output: Union[
List[ResponseOutputItem],
List[Union[GenericResponseOutputItem, OutputFunctionToolCall]],
]
parallel_tool_calls: bool
temperature: Optional[float]
tool_choice: ToolChoice
tools: List[Tool]
tools: Union[List[Tool], List[ResponseFunctionToolCall]]
top_p: Optional[float]
max_output_tokens: Optional[int]
previous_response_id: Optional[str]

View file

@ -0,0 +1,48 @@
from typing import Literal
from typing_extensions import Any, List, Optional, TypedDict
from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
class GenericResponseOutputItemContentAnnotation(BaseLiteLLMOpenAIResponseObject):
"""Annotation for content in a message"""
type: Optional[str]
start_index: Optional[int]
end_index: Optional[int]
url: Optional[str]
title: Optional[str]
pass
class OutputText(BaseLiteLLMOpenAIResponseObject):
"""Text output content from an assistant message"""
type: Optional[str] # "output_text"
text: Optional[str]
annotations: Optional[List[GenericResponseOutputItemContentAnnotation]]
class OutputFunctionToolCall(BaseLiteLLMOpenAIResponseObject):
"""A tool call to run a function"""
arguments: Optional[str]
call_id: Optional[str]
name: Optional[str]
type: Optional[str] # "function_call"
id: Optional[str]
status: Literal["in_progress", "completed", "incomplete"]
class GenericResponseOutputItem(BaseLiteLLMOpenAIResponseObject):
"""
Generic response API output item
"""
type: str # "message"
id: str
status: str # "completed", "in_progress", etc.
role: str # "assistant", "user", etc.
content: List[OutputText]