diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 09ba552c9..7995f4b31 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -536,7 +536,6 @@ class ChatAgent(ShieldRunnerMixin): output_attachments = [] n_iter = await self.storage.get_num_infer_iters_in_turn(session_id, turn_id) or 0 - print("initial n_iter", n_iter) # Build a map of custom tools to their definitions for faster lookup client_tools = {} @@ -616,9 +615,7 @@ class ChatAgent(ShieldRunnerMixin): span.set_attribute("output", f"content: {content} tool_calls: {tool_calls}") n_iter += 1 - print(f"n_iter after update: {n_iter}") await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter) - print(self.agent_config.max_infer_iters) stop_reason = stop_reason or StopReason.out_of_tokens @@ -655,8 +652,8 @@ class ChatAgent(ShieldRunnerMixin): if n_iter >= self.agent_config.max_infer_iters: log.info("Done with MAX iterations, exiting.") - # we always resume tool call, so we need a way to indicate to client that we are done - # currently let's do it with having the tool call be + # NOTE: mark end_of_turn to indicate to client that we are done with the turn + # Do not continue the tool call loop after this point message.stop_reason = StopReason.end_of_turn yield message break @@ -707,6 +704,8 @@ class ChatAgent(ShieldRunnerMixin): # If tool is a client tool, yield CompletionMessage and return if tool_call.tool_name in client_tools: + # NOTE: mark end_of_message to indicate to client that it may + # call the tool and continue the conversation with the tool's response. message.stop_reason = StopReason.end_of_message await self.storage.set_in_progress_tool_call_step( session_id,