Wire through parallel_tool_calls to Responses API

Signed-off-by: Anastas Stoyanovsky <astoyano@redhat.com>
This commit is contained in:
Anastas Stoyanovsky 2025-11-11 08:54:02 -05:00
parent 7093978754
commit 7a9b7ecdc2
9 changed files with 159 additions and 20 deletions

View file

@ -92,6 +92,7 @@ class MetaReferenceAgentsImpl(Agents):
model: str,
prompt: OpenAIResponsePrompt | None = None,
instructions: str | None = None,
parallel_tool_calls: bool | None = True,
previous_response_id: str | None = None,
conversation: str | None = None,
store: bool | None = True,
@ -120,6 +121,7 @@ class MetaReferenceAgentsImpl(Agents):
include,
max_infer_iters,
guardrails,
parallel_tool_calls,
max_tool_calls,
)
return result # type: ignore[no-any-return]

View file

@ -252,6 +252,7 @@ class OpenAIResponsesImpl:
include: list[str] | None = None,
max_infer_iters: int | None = 10,
guardrails: list[str | ResponseGuardrailSpec] | None = None,
parallel_tool_calls: bool | None = None,
max_tool_calls: int | None = None,
):
stream = bool(stream)
@ -296,6 +297,7 @@ class OpenAIResponsesImpl:
tools=tools,
max_infer_iters=max_infer_iters,
guardrail_ids=guardrail_ids,
parallel_tool_calls=parallel_tool_calls,
max_tool_calls=max_tool_calls,
)
@ -346,6 +348,7 @@ class OpenAIResponsesImpl:
tools: list[OpenAIResponseInputTool] | None = None,
max_infer_iters: int | None = 10,
guardrail_ids: list[str] | None = None,
parallel_tool_calls: bool | None = True,
max_tool_calls: int | None = None,
) -> AsyncIterator[OpenAIResponseObjectStream]:
# These should never be None when called from create_openai_response (which sets defaults)
@ -385,6 +388,7 @@ class OpenAIResponsesImpl:
created_at=created_at,
text=text,
max_infer_iters=max_infer_iters,
parallel_tool_calls=parallel_tool_calls,
tool_executor=self.tool_executor,
safety_api=self.safety_api,
guardrail_ids=guardrail_ids,

View file

@ -114,6 +114,7 @@ class StreamingResponseOrchestrator:
safety_api,
guardrail_ids: list[str] | None = None,
prompt: OpenAIResponsePrompt | None = None,
parallel_tool_calls: bool | None = None,
max_tool_calls: int | None = None,
):
self.inference_api = inference_api
@ -128,6 +129,8 @@ class StreamingResponseOrchestrator:
self.prompt = prompt
# System message that is inserted into the model's context
self.instructions = instructions
# Whether to allow more than one function tool call generated per turn.
self.parallel_tool_calls = parallel_tool_calls
# Max number of total calls to built-in tools that can be processed in a response
self.max_tool_calls = max_tool_calls
self.sequence_number = 0
@ -190,6 +193,7 @@ class StreamingResponseOrchestrator:
usage=self.accumulated_usage,
instructions=self.instructions,
prompt=self.prompt,
parallel_tool_calls=self.parallel_tool_calls,
max_tool_calls=self.max_tool_calls,
)
@ -301,6 +305,7 @@ class StreamingResponseOrchestrator:
completion_result_data,
output_messages,
next_turn_messages,
not self.parallel_tool_calls,
):
yield stream_event
@ -897,6 +902,7 @@ class StreamingResponseOrchestrator:
completion_result_data: ChatCompletionResult,
output_messages: list[OpenAIResponseOutput],
next_turn_messages: list,
incremental_function_calling: bool,
) -> AsyncIterator[OpenAIResponseObjectStream]:
"""Coordinate execution of both function and non-function tool calls."""
# Execute non-function tool calls
@ -1020,6 +1026,10 @@ class StreamingResponseOrchestrator:
sequence_number=self.sequence_number,
)
# TODO: Make sure that multi-turn incremental execution works
if incremental_function_calling:
break
async def _process_new_tools(
self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
) -> AsyncIterator[OpenAIResponseObjectStream]:

View file

@ -72,6 +72,7 @@ class Agents(Protocol):
model: str,
prompt: OpenAIResponsePrompt | None = None,
instructions: str | None = None,
parallel_tool_calls: bool | None = True,
previous_response_id: str | None = None,
conversation: str | None = None,
store: bool | None = True,

View file

@ -585,7 +585,7 @@ class OpenAIResponseObject(BaseModel):
:param model: Model identifier used for generation
:param object: Object type identifier, always "response"
:param output: List of generated output items (messages, tool calls, etc.)
:param parallel_tool_calls: Whether tool calls can be executed in parallel
:param parallel_tool_calls: (Optional) Whether to allow more than one function tool call generated per turn.
:param previous_response_id: (Optional) ID of the previous response in a conversation
:param prompt: (Optional) Reference to a prompt template and its variables.
:param status: Current status of the response generation
@ -605,7 +605,7 @@ class OpenAIResponseObject(BaseModel):
model: str
object: Literal["response"] = "response"
output: Sequence[OpenAIResponseOutput]
parallel_tool_calls: bool = False
parallel_tool_calls: bool | None = True
previous_response_id: str | None = None
prompt: OpenAIResponsePrompt | None = None
status: str