diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 0a5d93d80..516a0174d 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -4287,6 +4287,9 @@
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig"
+ },
+ "allow_turn_resume": {
+ "type": "boolean"
}
},
"additionalProperties": false,
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index c05eef95e..c7f439982 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -2779,6 +2779,8 @@ components:
$ref: '#/components/schemas/AgentTool'
tool_config:
$ref: '#/components/schemas/ToolConfig'
+ allow_turn_resume:
+ type: boolean
additionalProperties: false
required:
- messages
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index 8fde864e4..eb1cdde90 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -296,6 +296,9 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
stream: Optional[bool] = False
tool_config: Optional[ToolConfig] = None
+ # TODO (xiyan): used for backward compatibility, update for 0.1.5
+ allow_turn_resume: Optional[bool] = False
+
@json_schema_type
class AgentTurnResumeRequest(BaseModel):
@@ -352,6 +355,7 @@ class Agents(Protocol):
documents: Optional[List[Document]] = None,
toolgroups: Optional[List[AgentToolGroup]] = None,
tool_config: Optional[ToolConfig] = None,
+ allow_turn_resume: Optional[bool] = False,
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
@webmethod(
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index 1d731fd8f..e3f4b2173 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -228,7 +228,7 @@ class ChatAgent(ShieldRunnerMixin):
)
await self.storage.add_turn_to_session(request.session_id, turn)
- if output_message.tool_calls:
+ if output_message.tool_calls and request.allow_turn_resume:
chunk = AgentTurnResponseStreamChunk(
event=AgentTurnResponseEvent(
payload=AgentTurnResponseTurnAwaitingInputPayload(
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 19b4c0925..8a4d91238 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -147,6 +147,7 @@ class MetaReferenceAgentsImpl(Agents):
documents: Optional[List[Document]] = None,
stream: Optional[bool] = False,
tool_config: Optional[ToolConfig] = None,
+ allow_turn_resume: Optional[bool] = False,
) -> AsyncGenerator:
request = AgentTurnCreateRequest(
agent_id=agent_id,
@@ -156,6 +157,7 @@ class MetaReferenceAgentsImpl(Agents):
toolgroups=toolgroups,
documents=documents,
tool_config=tool_config,
+ allow_turn_resume=allow_turn_resume,
)
if stream:
return self._create_agent_turn_streaming(request)