diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index e02d0031c..be3d1730b 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -6723,9 +6723,10 @@ components: type: array title: Output parallel_tool_calls: - type: boolean - title: Parallel Tool Calls - default: false + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string @@ -7125,6 +7126,11 @@ components: anyOf: - type: string - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string @@ -7251,9 +7257,10 @@ components: type: array title: Output parallel_tool_calls: - type: boolean - title: Parallel Tool Calls - default: false + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index d3d42f806..2290f4cf0 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -3566,9 +3566,10 @@ components: type: array title: Output parallel_tool_calls: - type: boolean - title: Parallel Tool Calls - default: false + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string @@ -3968,6 +3969,11 @@ components: anyOf: - type: string - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string @@ -4094,9 +4100,10 @@ components: type: array title: Output parallel_tool_calls: - type: boolean - title: Parallel Tool Calls - default: false + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index e2f0dcf08..dcb9337d7 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -3263,9 +3263,10 @@ components: type: array title: Output parallel_tool_calls: - type: boolean - title: Parallel Tool Calls - default: false + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string @@ -3662,9 +3663,10 @@ components: type: array title: Output parallel_tool_calls: - type: boolean - title: Parallel Tool Calls - default: false + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 3d296b36d..b17fae790 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -5744,9 +5744,10 @@ components: type: array title: Output parallel_tool_calls: - type: boolean - title: Parallel Tool Calls - default: false + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string @@ -6146,6 +6147,11 @@ components: anyOf: - type: string - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string @@ -6272,9 +6278,10 @@ components: type: array title: Output parallel_tool_calls: - type: boolean - title: Parallel Tool Calls - default: false + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index e02d0031c..be3d1730b 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -6723,9 +6723,10 @@ components: type: array title: Output parallel_tool_calls: - type: boolean - title: Parallel Tool Calls - default: false + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string @@ -7125,6 +7126,11 @@ components: anyOf: - type: string - type: 'null' + parallel_tool_calls: + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string @@ -7251,9 +7257,10 @@ components: type: array title: Output parallel_tool_calls: - type: boolean - title: Parallel Tool Calls - default: false + anyOf: + - type: boolean + - type: 'null' + default: true previous_response_id: anyOf: - type: string diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py index 6dce2889c..2620a9d22 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -90,6 +90,7 @@ class MetaReferenceAgentsImpl(Agents): model: str, prompt: OpenAIResponsePrompt | None = None, instructions: str | None = None, + parallel_tool_calls: bool | None = True, previous_response_id: str | None = None, conversation: str | None = None, store: bool | None = True, @@ -118,6 +119,7 @@ class MetaReferenceAgentsImpl(Agents): include, max_infer_iters, guardrails, + parallel_tool_calls, max_tool_calls, ) return result # type: ignore[no-any-return] diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index cb0fe284e..7e080a675 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -252,6 +252,7 @@ class OpenAIResponsesImpl: include: list[str] | None = None, max_infer_iters: int | None = 10, guardrails: list[str | ResponseGuardrailSpec] | None = None, + parallel_tool_calls: bool | None = None, max_tool_calls: int | None = None, ): stream = bool(stream) @@ -296,6 +297,7 @@ class OpenAIResponsesImpl: tools=tools, max_infer_iters=max_infer_iters, guardrail_ids=guardrail_ids, + parallel_tool_calls=parallel_tool_calls, max_tool_calls=max_tool_calls, ) @@ -346,6 +348,7 @@ class OpenAIResponsesImpl: tools: list[OpenAIResponseInputTool] | None = None, max_infer_iters: int | None = 10, guardrail_ids: list[str] | None = None, + parallel_tool_calls: bool | None = True, max_tool_calls: int | None = None, ) -> AsyncIterator[OpenAIResponseObjectStream]: # These should never be None when called from create_openai_response (which sets defaults) @@ -385,6 +388,7 @@ class OpenAIResponsesImpl: created_at=created_at, text=text, max_infer_iters=max_infer_iters, + parallel_tool_calls=parallel_tool_calls, tool_executor=self.tool_executor, safety_api=self.safety_api, guardrail_ids=guardrail_ids, diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index a73b6bf68..953067140 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -116,6 +116,7 @@ class StreamingResponseOrchestrator: safety_api, guardrail_ids: list[str] | None = None, prompt: OpenAIResponsePrompt | None = None, + parallel_tool_calls: bool | None = None, max_tool_calls: int | None = None, ): self.inference_api = inference_api @@ -130,6 +131,8 @@ class StreamingResponseOrchestrator: self.prompt = prompt # System message that is inserted into the model's context self.instructions = instructions + # Whether to allow more than one function tool call generated per turn. + self.parallel_tool_calls = parallel_tool_calls # Max number of total calls to built-in tools that can be processed in a response self.max_tool_calls = max_tool_calls self.sequence_number = 0 @@ -192,6 +195,7 @@ class StreamingResponseOrchestrator: usage=self.accumulated_usage, instructions=self.instructions, prompt=self.prompt, + parallel_tool_calls=self.parallel_tool_calls, max_tool_calls=self.max_tool_calls, ) diff --git a/src/llama_stack_api/agents.py b/src/llama_stack_api/agents.py index ca0611746..9b767608a 100644 --- a/src/llama_stack_api/agents.py +++ b/src/llama_stack_api/agents.py @@ -72,6 +72,7 @@ class Agents(Protocol): model: str, prompt: OpenAIResponsePrompt | None = None, instructions: str | None = None, + parallel_tool_calls: bool | None = True, previous_response_id: str | None = None, conversation: str | None = None, store: bool | None = True, diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py index 952418f1c..e20004487 100644 --- a/src/llama_stack_api/openai_responses.py +++ b/src/llama_stack_api/openai_responses.py @@ -585,7 +585,7 @@ class OpenAIResponseObject(BaseModel): :param model: Model identifier used for generation :param object: Object type identifier, always "response" :param output: List of generated output items (messages, tool calls, etc.) - :param parallel_tool_calls: Whether tool calls can be executed in parallel + :param parallel_tool_calls: (Optional) Whether to allow more than one function tool call generated per turn. :param previous_response_id: (Optional) ID of the previous response in a conversation :param prompt: (Optional) Reference to a prompt template and its variables. :param status: Current status of the response generation @@ -605,7 +605,7 @@ class OpenAIResponseObject(BaseModel): model: str object: Literal["response"] = "response" output: Sequence[OpenAIResponseOutput] - parallel_tool_calls: bool = False + parallel_tool_calls: bool | None = True previous_response_id: str | None = None prompt: OpenAIResponsePrompt | None = None status: str