feat(1/n): api: unify agents for handling server & client tools (#1178)

# Problem Our current Agent framework has discrepancies in definition on how we handle server side and client side tools. 1. Server Tools: a single Turn is returned including `ToolExecutionStep` in agenst 2. Client Tools: `create_agent_turn` is called in loop with client agent lib yielding the agent chunk ad6ffc63df/src/llama_stack_client/lib/agents/agent.py (L186-L211) This makes it inconsistent to work with server & client tools. It also complicates the logs to telemetry to get information about agents turn / history for observability. #### Principle The same `turn_id` should be used to represent the steps required to complete a user message including client tools. ## Solution 1. `AgentTurnResponseEventType.turn_awaiting_input` status to indicate that the current turn is not completed, and awaiting tool input 2. `continue_agent_turn` endpoint to update agent turn with client's tool response. # What does this PR do? - Skeleton API as example ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] - Just API update, no functionality change ``` llama stack run + client-sdk test ``` <img width="842" alt="image" src="https://github.com/user-attachments/assets/7ac56b5f-f424-4632-9476-7e0f57555bc3" /> [//]: # (## Documentation)
2025-06-28 02:53:30 +00:00 · 2025-02-21 11:48:27 -08:00 · 2025-02-21 11:48:27 -08:00 · 0fe071764f
commit 0fe071764f
parent 992f865b2e
7 changed files with 454 additions and 21 deletions
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -194,6 +194,7 @@ class AgentTurnResponseEventType(Enum):

    turn_start = "turn_start"
    turn_complete = "turn_complete"
+    turn_awaiting_input = "turn_awaiting_input"


@json_schema_type
@ -235,6 +236,14 @@ class AgentTurnResponseTurnCompletePayload(BaseModel):
    turn: Turn


+@json_schema_type
+class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
+    event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input.value] = (
+        AgentTurnResponseEventType.turn_awaiting_input.value
+    )
+    turn: Turn
+
+
 AgentTurnResponseEventPayload = register_schema(
    Annotated[
        Union[
@ -243,6 +252,7 @@ AgentTurnResponseEventPayload = register_schema(
            AgentTurnResponseStepCompletePayload,
            AgentTurnResponseTurnStartPayload,
            AgentTurnResponseTurnCompletePayload,
+            AgentTurnResponseTurnAwaitingInputPayload,
        ],
        Field(discriminator="event_type"),
    ],
@ -286,6 +296,18 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
    stream: Optional[bool] = False
    tool_config: Optional[ToolConfig] = None

+    # TODO (xiyan): temporary flag, will remove for 0.1.5
+    allow_turn_resume: Optional[bool] = False
+
+
+@json_schema_type
+class AgentTurnResumeRequest(BaseModel):
+    agent_id: str
+    session_id: str
+    turn_id: str
+    tool_responses: List[ToolResponseMessage]
+    stream: Optional[bool] = False
+

@json_schema_type
 class AgentTurnResponseStreamChunk(BaseModel):
@ -333,8 +355,34 @@ class Agents(Protocol):
        documents: Optional[List[Document]] = None,
        toolgroups: Optional[List[AgentToolGroup]] = None,
        tool_config: Optional[ToolConfig] = None,
+        allow_turn_resume: Optional[bool] = False,
    ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...

+    @webmethod(
+        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
+        method="POST",
+    )
+    async def resume_agent_turn(
+        self,
+        agent_id: str,
+        session_id: str,
+        turn_id: str,
+        tool_responses: List[ToolResponseMessage],
+        stream: Optional[bool] = False,
+    ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]:
+        """Resume an agent turn with executed tool call responses.
+
+        When a Turn has the status `awaiting_input` due to pending input from client side tool calls, this endpoint can be used to submit the outputs from the tool calls once they are ready.
+
+        :param agent_id: The ID of the agent to resume.
+        :param session_id: The ID of the session to resume.
+        :param turn_id: The ID of the turn to resume.
+        :param tool_responses: The tool call responses to resume the turn with.
+        :param stream: Whether to stream the response.
+        :returns: A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk objects.
+        """
+        ...
+
    @webmethod(
        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
        method="GET",