Wire through parallel_tool_calls to Responses API

Signed-off-by: Anastas Stoyanovsky <astoyano@redhat.com>
This commit is contained in:
Anastas Stoyanovsky 2025-11-11 08:54:02 -05:00
parent 7093978754
commit 7a9b7ecdc2
9 changed files with 159 additions and 20 deletions

View file

@ -6723,9 +6723,12 @@ components:
type: array type: array
title: Output title: Output
parallel_tool_calls: parallel_tool_calls:
type: boolean
title: Parallel Tool Calls title: Parallel Tool Calls
default: false type: boolean
default: true
description: >-
(Optional) Whether to allow more than one function tool call generated
per turn.
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -6838,8 +6841,10 @@ components:
- created_at - created_at
- id - id
- model - model
- object
- output - output
- status - status
- text
- input - input
title: OpenAIResponseObjectWithInput title: OpenAIResponseObjectWithInput
description: OpenAI response object extended with input context information. description: OpenAI response object extended with input context information.
@ -7122,9 +7127,12 @@ components:
- type: 'null' - type: 'null'
title: OpenAIResponsePrompt title: OpenAIResponsePrompt
instructions: instructions:
type: string
anyOf: anyOf:
- type: string - type: string
- type: 'null' - type: 'null'
parallel_tool_calls:
type: boolean
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -7253,7 +7261,10 @@ components:
parallel_tool_calls: parallel_tool_calls:
type: boolean type: boolean
title: Parallel Tool Calls title: Parallel Tool Calls
default: false default: true
description: >-
(Optional) Whether to allow more than one function tool call generated
per turn.
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string

View file

@ -5746,7 +5746,10 @@ components:
parallel_tool_calls: parallel_tool_calls:
type: boolean type: boolean
title: Parallel Tool Calls title: Parallel Tool Calls
default: false default: true
description: >-
(Optional) Whether to allow more than one function tool call generated
per turn.
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -6143,9 +6146,12 @@ components:
- type: 'null' - type: 'null'
title: OpenAIResponsePrompt title: OpenAIResponsePrompt
instructions: instructions:
type: string
anyOf: anyOf:
- type: string - type: string
- type: 'null' - type: 'null'
parallel_tool_calls:
type: boolean
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -6274,7 +6280,10 @@ components:
parallel_tool_calls: parallel_tool_calls:
type: boolean type: boolean
title: Parallel Tool Calls title: Parallel Tool Calls
default: false default: true
description: >-
(Optional) Whether to allow more than one function tool call generated
per turn.
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string

View file

@ -6725,7 +6725,10 @@ components:
parallel_tool_calls: parallel_tool_calls:
type: boolean type: boolean
title: Parallel Tool Calls title: Parallel Tool Calls
default: false default: true
description: >-
(Optional) Whether to allow more than one function tool call generated
per turn.
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -7125,6 +7128,9 @@ components:
anyOf: anyOf:
- type: string - type: string
- type: 'null' - type: 'null'
type: string
parallel_tool_calls:
type: boolean
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string
@ -7253,7 +7259,10 @@ components:
parallel_tool_calls: parallel_tool_calls:
type: boolean type: boolean
title: Parallel Tool Calls title: Parallel Tool Calls
default: false default: true
description: >-
(Optional) Whether to allow more than one function tool call generated
per turn.
previous_response_id: previous_response_id:
anyOf: anyOf:
- type: string - type: string

View file

@ -92,6 +92,7 @@ class MetaReferenceAgentsImpl(Agents):
model: str, model: str,
prompt: OpenAIResponsePrompt | None = None, prompt: OpenAIResponsePrompt | None = None,
instructions: str | None = None, instructions: str | None = None,
parallel_tool_calls: bool | None = True,
previous_response_id: str | None = None, previous_response_id: str | None = None,
conversation: str | None = None, conversation: str | None = None,
store: bool | None = True, store: bool | None = True,
@ -120,6 +121,7 @@ class MetaReferenceAgentsImpl(Agents):
include, include,
max_infer_iters, max_infer_iters,
guardrails, guardrails,
parallel_tool_calls,
max_tool_calls, max_tool_calls,
) )
return result # type: ignore[no-any-return] return result # type: ignore[no-any-return]

View file

@ -252,6 +252,7 @@ class OpenAIResponsesImpl:
include: list[str] | None = None, include: list[str] | None = None,
max_infer_iters: int | None = 10, max_infer_iters: int | None = 10,
guardrails: list[str | ResponseGuardrailSpec] | None = None, guardrails: list[str | ResponseGuardrailSpec] | None = None,
parallel_tool_calls: bool | None = None,
max_tool_calls: int | None = None, max_tool_calls: int | None = None,
): ):
stream = bool(stream) stream = bool(stream)
@ -296,6 +297,7 @@ class OpenAIResponsesImpl:
tools=tools, tools=tools,
max_infer_iters=max_infer_iters, max_infer_iters=max_infer_iters,
guardrail_ids=guardrail_ids, guardrail_ids=guardrail_ids,
parallel_tool_calls=parallel_tool_calls,
max_tool_calls=max_tool_calls, max_tool_calls=max_tool_calls,
) )
@ -346,6 +348,7 @@ class OpenAIResponsesImpl:
tools: list[OpenAIResponseInputTool] | None = None, tools: list[OpenAIResponseInputTool] | None = None,
max_infer_iters: int | None = 10, max_infer_iters: int | None = 10,
guardrail_ids: list[str] | None = None, guardrail_ids: list[str] | None = None,
parallel_tool_calls: bool | None = True,
max_tool_calls: int | None = None, max_tool_calls: int | None = None,
) -> AsyncIterator[OpenAIResponseObjectStream]: ) -> AsyncIterator[OpenAIResponseObjectStream]:
# These should never be None when called from create_openai_response (which sets defaults) # These should never be None when called from create_openai_response (which sets defaults)
@ -385,6 +388,7 @@ class OpenAIResponsesImpl:
created_at=created_at, created_at=created_at,
text=text, text=text,
max_infer_iters=max_infer_iters, max_infer_iters=max_infer_iters,
parallel_tool_calls=parallel_tool_calls,
tool_executor=self.tool_executor, tool_executor=self.tool_executor,
safety_api=self.safety_api, safety_api=self.safety_api,
guardrail_ids=guardrail_ids, guardrail_ids=guardrail_ids,

View file

@ -114,6 +114,7 @@ class StreamingResponseOrchestrator:
safety_api, safety_api,
guardrail_ids: list[str] | None = None, guardrail_ids: list[str] | None = None,
prompt: OpenAIResponsePrompt | None = None, prompt: OpenAIResponsePrompt | None = None,
parallel_tool_calls: bool | None = None,
max_tool_calls: int | None = None, max_tool_calls: int | None = None,
): ):
self.inference_api = inference_api self.inference_api = inference_api
@ -128,6 +129,8 @@ class StreamingResponseOrchestrator:
self.prompt = prompt self.prompt = prompt
# System message that is inserted into the model's context # System message that is inserted into the model's context
self.instructions = instructions self.instructions = instructions
# Whether to allow more than one function tool call generated per turn.
self.parallel_tool_calls = parallel_tool_calls
# Max number of total calls to built-in tools that can be processed in a response # Max number of total calls to built-in tools that can be processed in a response
self.max_tool_calls = max_tool_calls self.max_tool_calls = max_tool_calls
self.sequence_number = 0 self.sequence_number = 0
@ -190,6 +193,7 @@ class StreamingResponseOrchestrator:
usage=self.accumulated_usage, usage=self.accumulated_usage,
instructions=self.instructions, instructions=self.instructions,
prompt=self.prompt, prompt=self.prompt,
parallel_tool_calls=self.parallel_tool_calls,
max_tool_calls=self.max_tool_calls, max_tool_calls=self.max_tool_calls,
) )
@ -301,6 +305,7 @@ class StreamingResponseOrchestrator:
completion_result_data, completion_result_data,
output_messages, output_messages,
next_turn_messages, next_turn_messages,
not self.parallel_tool_calls,
): ):
yield stream_event yield stream_event
@ -897,6 +902,7 @@ class StreamingResponseOrchestrator:
completion_result_data: ChatCompletionResult, completion_result_data: ChatCompletionResult,
output_messages: list[OpenAIResponseOutput], output_messages: list[OpenAIResponseOutput],
next_turn_messages: list, next_turn_messages: list,
incremental_function_calling: bool,
) -> AsyncIterator[OpenAIResponseObjectStream]: ) -> AsyncIterator[OpenAIResponseObjectStream]:
"""Coordinate execution of both function and non-function tool calls.""" """Coordinate execution of both function and non-function tool calls."""
# Execute non-function tool calls # Execute non-function tool calls
@ -1020,6 +1026,10 @@ class StreamingResponseOrchestrator:
sequence_number=self.sequence_number, sequence_number=self.sequence_number,
) )
# TODO: Make sure that multi-turn incremental execution works
if incremental_function_calling:
break
async def _process_new_tools( async def _process_new_tools(
self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput] self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
) -> AsyncIterator[OpenAIResponseObjectStream]: ) -> AsyncIterator[OpenAIResponseObjectStream]:

View file

@ -72,6 +72,7 @@ class Agents(Protocol):
model: str, model: str,
prompt: OpenAIResponsePrompt | None = None, prompt: OpenAIResponsePrompt | None = None,
instructions: str | None = None, instructions: str | None = None,
parallel_tool_calls: bool | None = True,
previous_response_id: str | None = None, previous_response_id: str | None = None,
conversation: str | None = None, conversation: str | None = None,
store: bool | None = True, store: bool | None = True,

View file

@ -585,7 +585,7 @@ class OpenAIResponseObject(BaseModel):
:param model: Model identifier used for generation :param model: Model identifier used for generation
:param object: Object type identifier, always "response" :param object: Object type identifier, always "response"
:param output: List of generated output items (messages, tool calls, etc.) :param output: List of generated output items (messages, tool calls, etc.)
:param parallel_tool_calls: Whether tool calls can be executed in parallel :param parallel_tool_calls: (Optional) Whether to allow more than one function tool call generated per turn.
:param previous_response_id: (Optional) ID of the previous response in a conversation :param previous_response_id: (Optional) ID of the previous response in a conversation
:param prompt: (Optional) Reference to a prompt template and its variables. :param prompt: (Optional) Reference to a prompt template and its variables.
:param status: Current status of the response generation :param status: Current status of the response generation
@ -605,7 +605,7 @@ class OpenAIResponseObject(BaseModel):
model: str model: str
object: Literal["response"] = "response" object: Literal["response"] = "response"
output: Sequence[OpenAIResponseOutput] output: Sequence[OpenAIResponseOutput]
parallel_tool_calls: bool = False parallel_tool_calls: bool | None = True
previous_response_id: str | None = None previous_response_id: str | None = None
prompt: OpenAIResponsePrompt | None = None prompt: OpenAIResponsePrompt | None = None
status: str status: str

View file

@ -682,3 +682,96 @@ def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, te
# Verify we have a valid max_tool_calls field # Verify we have a valid max_tool_calls field
assert response_3.max_tool_calls == max_tool_calls[1] assert response_3.max_tool_calls == max_tool_calls[1]
@pytest.mark.skip(reason="Tool calling is not reliable.")
def test_parallel_tool_calls_true(openai_client, client_with_models, text_model_id):
"""Test handling of max_tool_calls with function tools in responses."""
if isinstance(client_with_models, LlamaStackAsLibraryClient):
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
client = openai_client
parallel_tool_calls = True
tools = [
{
"type": "function",
"name": "get_weather",
"description": "Get weather information for a specified location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name (e.g., 'New York', 'London')",
},
},
},
}
]
# First create a response that triggers function tools
response = client.responses.create(
model=text_model_id,
input="Get the weather in New York and in Paris",
tools=tools,
stream=False,
parallel_tool_calls=parallel_tool_calls,
)
# Verify we got two function calls and that the max_tool_calls do not affect function tools
assert len(response.output) == 2
assert response.output[0].type == "function_call"
assert response.output[0].name == "get_weather"
assert response.output[0].status == "completed"
assert response.output[1].type == "function_call"
assert response.output[1].name == "get_weather"
assert response.output[0].status == "completed"
# Verify we have a valid max_tool_calls field
assert response.parallel_tool_calls == parallel_tool_calls
@pytest.mark.skip(reason="Tool calling is not reliable.")
def test_parallel_tool_calls_false(openai_client, client_with_models, text_model_id):
"""Test handling of max_tool_calls with function tools in responses."""
if isinstance(client_with_models, LlamaStackAsLibraryClient):
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
client = openai_client
parallel_tool_calls = False
tools = [
{
"type": "function",
"name": "get_weather",
"description": "Get weather information for a specified location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name (e.g., 'New York', 'London')",
},
},
},
}
]
# First create a response that triggers function tools
response = client.responses.create(
model=text_model_id,
input="Get the weather in New York and in Paris",
tools=tools,
stream=False,
parallel_tool_calls=parallel_tool_calls,
)
# Verify we got two function calls and that the max_tool_calls do not affect function tools
assert len(response.output) == 1
assert response.output[0].type == "function_call"
assert response.output[0].name == "get_weather"
assert response.output[0].status == "completed"
# Verify we have a valid max_tool_calls field
assert response.parallel_tool_calls == parallel_tool_calls