feat!: Wire through parallel_tool_calls to Responses API (#4124)

# What does this PR do?
Initial PR against #4123
Adds `parallel_tool_calls` spec to Responses API and basic initial
implementation where no more than one function call is generated when
set to `False`.

## Test Plan
* Unit tests have been added to verify no more than one function call is
generated.
* A followup PR will verify passing through `parallel_tool_calls` to
providers.
* A followup PR will address verification and/or implementation of
incremental function calling across multiple conversational turns.

---------

Signed-off-by: Anastas Stoyanovsky <astoyano@redhat.com>
This commit is contained in:
Anastas Stoyanovsky 2025-11-18 14:25:08 -05:00 committed by GitHub
parent 7093978754
commit a3580e6bc0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 73 additions and 32 deletions

View file

@ -92,6 +92,7 @@ class MetaReferenceAgentsImpl(Agents):
model: str,
prompt: OpenAIResponsePrompt | None = None,
instructions: str | None = None,
parallel_tool_calls: bool | None = True,
previous_response_id: str | None = None,
conversation: str | None = None,
store: bool | None = True,
@ -120,6 +121,7 @@ class MetaReferenceAgentsImpl(Agents):
include,
max_infer_iters,
guardrails,
parallel_tool_calls,
max_tool_calls,
)
return result # type: ignore[no-any-return]

View file

@ -252,6 +252,7 @@ class OpenAIResponsesImpl:
include: list[str] | None = None,
max_infer_iters: int | None = 10,
guardrails: list[str | ResponseGuardrailSpec] | None = None,
parallel_tool_calls: bool | None = None,
max_tool_calls: int | None = None,
):
stream = bool(stream)
@ -296,6 +297,7 @@ class OpenAIResponsesImpl:
tools=tools,
max_infer_iters=max_infer_iters,
guardrail_ids=guardrail_ids,
parallel_tool_calls=parallel_tool_calls,
max_tool_calls=max_tool_calls,
)
@ -346,6 +348,7 @@ class OpenAIResponsesImpl:
tools: list[OpenAIResponseInputTool] | None = None,
max_infer_iters: int | None = 10,
guardrail_ids: list[str] | None = None,
parallel_tool_calls: bool | None = True,
max_tool_calls: int | None = None,
) -> AsyncIterator[OpenAIResponseObjectStream]:
# These should never be None when called from create_openai_response (which sets defaults)
@ -385,6 +388,7 @@ class OpenAIResponsesImpl:
created_at=created_at,
text=text,
max_infer_iters=max_infer_iters,
parallel_tool_calls=parallel_tool_calls,
tool_executor=self.tool_executor,
safety_api=self.safety_api,
guardrail_ids=guardrail_ids,

View file

@ -114,6 +114,7 @@ class StreamingResponseOrchestrator:
safety_api,
guardrail_ids: list[str] | None = None,
prompt: OpenAIResponsePrompt | None = None,
parallel_tool_calls: bool | None = None,
max_tool_calls: int | None = None,
):
self.inference_api = inference_api
@ -128,6 +129,8 @@ class StreamingResponseOrchestrator:
self.prompt = prompt
# System message that is inserted into the model's context
self.instructions = instructions
# Whether to allow more than one function tool call generated per turn.
self.parallel_tool_calls = parallel_tool_calls
# Max number of total calls to built-in tools that can be processed in a response
self.max_tool_calls = max_tool_calls
self.sequence_number = 0
@ -190,6 +193,7 @@ class StreamingResponseOrchestrator:
usage=self.accumulated_usage,
instructions=self.instructions,
prompt=self.prompt,
parallel_tool_calls=self.parallel_tool_calls,
max_tool_calls=self.max_tool_calls,
)