diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index f338aeea0..dbfe65960 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -7283,6 +7283,9 @@
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputTool"
}
+ },
+ "max_infer_iters": {
+ "type": "integer"
}
},
"additionalProperties": false,
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index a87c6a80b..c185488b4 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -5149,6 +5149,8 @@ components:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputTool'
+ max_infer_iters:
+ type: integer
additionalProperties: false
required:
- input
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index b79c512b8..956f4a614 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -604,6 +604,7 @@ class Agents(Protocol):
stream: bool | None = False,
temperature: float | None = None,
tools: list[OpenAIResponseInputTool] | None = None,
+ max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
"""Create a new OpenAI response.
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index bcbfcbe31..854f8b285 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -325,9 +325,10 @@ class MetaReferenceAgentsImpl(Agents):
stream: bool | None = False,
temperature: float | None = None,
tools: list[OpenAIResponseInputTool] | None = None,
+ max_infer_iters: int | None = 10,
) -> OpenAIResponseObject:
return await self.openai_responses_impl.create_openai_response(
- input, model, instructions, previous_response_id, store, stream, temperature, tools
+ input, model, instructions, previous_response_id, store, stream, temperature, tools, max_infer_iters
)
async def list_openai_responses(