chore: deprecate allow_turn_resume (#1377)

# What does this PR do? - Deprecate allow_turn_resume flag as this is used for staying backward compat. - Closes https://github.com/meta-llama/llama-stack/issues/1363 [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/api/agents/test_agents.py --inference-model "meta-llama/Llama-3.3-70B-Instruct" --record-responses ``` <img width="1054" alt="image" src="https://github.com/user-attachments/assets/d31de2d4-0953-41e1-a71a-7e1579fa351a" /> [//]: # (## Documentation)
2025-06-28 02:53:30 +00:00 · 2025-03-04 12:22:11 -08:00 · 2025-03-04 12:22:11 -08:00 · 158b6dc404
commit 158b6dc404
parent cad5eed4b5
9 changed files with 8554 additions and 50 deletions
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -243,8 +243,7 @@ class ChatAgent(ShieldRunnerMixin):
                steps=steps,
            )
            await self.storage.add_turn_to_session(request.session_id, turn)
-
-            if output_message.tool_calls and request.allow_turn_resume:
+            if output_message.tool_calls:
                chunk = AgentTurnResponseStreamChunk(
                    event=AgentTurnResponseEvent(
                        payload=AgentTurnResponseTurnAwaitingInputPayload(
@ -686,10 +685,16 @@ class ChatAgent(ShieldRunnerMixin):
                            message.content = [message.content] + output_attachments
                    yield message
                else:
-                    logcat.debug("agents", f"completion message with EOM (iter: {n_iter}): {str(message)}")
+                    logcat.debug(
+                        "agents",
+                        f"completion message with EOM (iter: {n_iter}): {str(message)}",
+                    )
                    input_messages = input_messages + [message]
            else:
-                logcat.debug("agents", f"completion message (iter: {n_iter}) from the model: {str(message)}")
+                logcat.debug(
+                    "agents",
+                    f"completion message (iter: {n_iter}) from the model: {str(message)}",
+                )
                # 1. Start the tool execution step and progress
                step_id = str(uuid.uuid4())
                yield AgentTurnResponseStreamChunk(
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@ -140,7 +140,6 @@ class MetaReferenceAgentsImpl(Agents):
        documents: Optional[List[Document]] = None,
        stream: Optional[bool] = False,
        tool_config: Optional[ToolConfig] = None,
-        allow_turn_resume: Optional[bool] = False,
    ) -> AsyncGenerator:
        request = AgentTurnCreateRequest(
            agent_id=agent_id,
@ -150,7 +149,6 @@ class MetaReferenceAgentsImpl(Agents):
            toolgroups=toolgroups,
            documents=documents,
            tool_config=tool_config,
-            allow_turn_resume=allow_turn_resume,
        )
        if stream:
            return self._create_agent_turn_streaming(request)