mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
feat!: Wire through parallel_tool_calls to Responses API (#4124)
# What does this PR do? Initial PR against #4123 Adds `parallel_tool_calls` spec to Responses API and basic initial implementation where no more than one function call is generated when set to `False`. ## Test Plan * Unit tests have been added to verify no more than one function call is generated. * A followup PR will verify passing through `parallel_tool_calls` to providers. * A followup PR will address verification and/or implementation of incremental function calling across multiple conversational turns. --------- Signed-off-by: Anastas Stoyanovsky <astoyano@redhat.com>
This commit is contained in:
parent
7093978754
commit
a3580e6bc0
10 changed files with 73 additions and 32 deletions
|
|
@ -92,6 +92,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
model: str,
|
||||
prompt: OpenAIResponsePrompt | None = None,
|
||||
instructions: str | None = None,
|
||||
parallel_tool_calls: bool | None = True,
|
||||
previous_response_id: str | None = None,
|
||||
conversation: str | None = None,
|
||||
store: bool | None = True,
|
||||
|
|
@ -120,6 +121,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
include,
|
||||
max_infer_iters,
|
||||
guardrails,
|
||||
parallel_tool_calls,
|
||||
max_tool_calls,
|
||||
)
|
||||
return result # type: ignore[no-any-return]
|
||||
|
|
|
|||
|
|
@ -252,6 +252,7 @@ class OpenAIResponsesImpl:
|
|||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10,
|
||||
guardrails: list[str | ResponseGuardrailSpec] | None = None,
|
||||
parallel_tool_calls: bool | None = None,
|
||||
max_tool_calls: int | None = None,
|
||||
):
|
||||
stream = bool(stream)
|
||||
|
|
@ -296,6 +297,7 @@ class OpenAIResponsesImpl:
|
|||
tools=tools,
|
||||
max_infer_iters=max_infer_iters,
|
||||
guardrail_ids=guardrail_ids,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
max_tool_calls=max_tool_calls,
|
||||
)
|
||||
|
||||
|
|
@ -346,6 +348,7 @@ class OpenAIResponsesImpl:
|
|||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
max_infer_iters: int | None = 10,
|
||||
guardrail_ids: list[str] | None = None,
|
||||
parallel_tool_calls: bool | None = True,
|
||||
max_tool_calls: int | None = None,
|
||||
) -> AsyncIterator[OpenAIResponseObjectStream]:
|
||||
# These should never be None when called from create_openai_response (which sets defaults)
|
||||
|
|
@ -385,6 +388,7 @@ class OpenAIResponsesImpl:
|
|||
created_at=created_at,
|
||||
text=text,
|
||||
max_infer_iters=max_infer_iters,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
tool_executor=self.tool_executor,
|
||||
safety_api=self.safety_api,
|
||||
guardrail_ids=guardrail_ids,
|
||||
|
|
|
|||
|
|
@ -114,6 +114,7 @@ class StreamingResponseOrchestrator:
|
|||
safety_api,
|
||||
guardrail_ids: list[str] | None = None,
|
||||
prompt: OpenAIResponsePrompt | None = None,
|
||||
parallel_tool_calls: bool | None = None,
|
||||
max_tool_calls: int | None = None,
|
||||
):
|
||||
self.inference_api = inference_api
|
||||
|
|
@ -128,6 +129,8 @@ class StreamingResponseOrchestrator:
|
|||
self.prompt = prompt
|
||||
# System message that is inserted into the model's context
|
||||
self.instructions = instructions
|
||||
# Whether to allow more than one function tool call generated per turn.
|
||||
self.parallel_tool_calls = parallel_tool_calls
|
||||
# Max number of total calls to built-in tools that can be processed in a response
|
||||
self.max_tool_calls = max_tool_calls
|
||||
self.sequence_number = 0
|
||||
|
|
@ -190,6 +193,7 @@ class StreamingResponseOrchestrator:
|
|||
usage=self.accumulated_usage,
|
||||
instructions=self.instructions,
|
||||
prompt=self.prompt,
|
||||
parallel_tool_calls=self.parallel_tool_calls,
|
||||
max_tool_calls=self.max_tool_calls,
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue