diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index c904fdbef..eb3399788 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -296,9 +296,6 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn): stream: Optional[bool] = False tool_config: Optional[ToolConfig] = None - # TODO (xiyan): temporary flag, will remove for 0.1.5 - allow_turn_resume: Optional[bool] = False - @json_schema_type class AgentTurnResumeRequest(BaseModel): @@ -355,7 +352,6 @@ class Agents(Protocol): documents: Optional[List[Document]] = None, toolgroups: Optional[List[AgentToolGroup]] = None, tool_config: Optional[ToolConfig] = None, - allow_turn_resume: Optional[bool] = False, ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ... @webmethod( diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 7a6cc551b..25cf0990a 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -244,22 +244,13 @@ class ChatAgent(ShieldRunnerMixin): ) await self.storage.add_turn_to_session(request.session_id, turn) - if output_message.tool_calls and request.allow_turn_resume: - chunk = AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseTurnAwaitingInputPayload( - turn=turn, - ) - ) - ) - else: - chunk = AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseTurnCompletePayload( - turn=turn, - ) + chunk = AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseTurnCompletePayload( + turn=turn, ) ) + ) yield chunk @@ -686,10 +677,16 @@ class ChatAgent(ShieldRunnerMixin): message.content = [message.content] + output_attachments yield message else: - logcat.debug("agents", f"completion message with EOM (iter: {n_iter}): {str(message)}") + logcat.debug( + "agents", + f"completion message with EOM (iter: {n_iter}): {str(message)}", + ) input_messages = input_messages + [message] else: - logcat.debug("agents", f"completion message (iter: {n_iter}) from the model: {str(message)}") + logcat.debug( + "agents", + f"completion message (iter: {n_iter}) from the model: {str(message)}", + ) # 1. Start the tool execution step and progress step_id = str(uuid.uuid4()) yield AgentTurnResponseStreamChunk( @@ -810,7 +807,7 @@ class ChatAgent(ShieldRunnerMixin): ) -> Tuple[List[ToolDefinition], Dict[str, str]]: # Determine which tools to include agent_config_toolgroups = { - toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup + (toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup) for toolgroup in self.agent_config.toolgroups } toolgroups_for_turn_set = ( diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index b5eb12c49..db33bca4a 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -140,7 +140,6 @@ class MetaReferenceAgentsImpl(Agents): documents: Optional[List[Document]] = None, stream: Optional[bool] = False, tool_config: Optional[ToolConfig] = None, - allow_turn_resume: Optional[bool] = False, ) -> AsyncGenerator: request = AgentTurnCreateRequest( agent_id=agent_id, @@ -150,7 +149,6 @@ class MetaReferenceAgentsImpl(Agents): toolgroups=toolgroups, documents=documents, tool_config=tool_config, - allow_turn_resume=allow_turn_resume, ) if stream: return self._create_agent_turn_streaming(request)