feat(agent): support multiple tool groups (#1556)

Summary: closes #1488 Test Plan: added new integration test ``` LLAMA_STACK_CONFIG=dev pytest -s -v tests/integration/agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B --text-model openai/gpt-4o-mini ``` --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/meta-llama/llama-stack/pull/1556). * __->__ #1556 * #1550
2025-12-03 09:53:45 +00:00 · 2025-03-17 22:13:09 -07:00 · 2025-03-17 22:13:09 -07:00 · 37f155e41d
commit 37f155e41d
parent c23a7af5d6
3 changed files with 157 additions and 108 deletions
--- a/tests/integration/agents/test_agents.py
+++ b/tests/integration/agents/test_agents.py
@ -584,7 +584,7 @@ def test_rag_and_code_agent(llama_stack_client_with_mocked_inference, agent_conf
    [(get_boiling_point, False), (get_boiling_point_with_metadata, True)],
 )
 def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_config, client_tools):
-    client_tool, expectes_metadata = client_tools
+    client_tool, expects_metadata = client_tools
    agent_config = {
        **agent_config,
        "input_shields": [],
@ -610,7 +610,7 @@ def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_co
    assert steps[0].step_type == "inference"
    assert steps[1].step_type == "tool_execution"
    assert steps[1].tool_calls[0].tool_name.startswith("get_boiling_point")
-    if expectes_metadata:
+    if expects_metadata:
        assert steps[1].tool_responses[0].metadata["source"] == "https://www.google.com"
    assert steps[2].step_type == "inference"

@ -622,3 +622,44 @@ def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_co
            assert last_step_completed_at < step.started_at
            assert step.started_at < step.completed_at
            last_step_completed_at = step.completed_at
+
+
+def test_multi_tool_calls(llama_stack_client_with_mocked_inference, agent_config):
+    if "gpt" not in agent_config["model"]:
+        pytest.xfail("Only tested on GPT models")
+
+    agent_config = {
+        **agent_config,
+        "tools": [get_boiling_point],
+    }
+
+    agent = Agent(llama_stack_client_with_mocked_inference, **agent_config)
+    session_id = agent.create_session(f"test-session-{uuid4()}")
+
+    response = agent.create_turn(
+        messages=[
+            {
+                "role": "user",
+                "content": "Call get_boiling_point twice to answer: What is the boiling point of polyjuice in both celsius and fahrenheit?",
+            },
+        ],
+        session_id=session_id,
+        stream=False,
+    )
+    steps = response.steps
+    assert len(steps) == 7
+    assert steps[0].step_type == "shield_call"
+    assert steps[1].step_type == "inference"
+    assert steps[2].step_type == "shield_call"
+    assert steps[3].step_type == "tool_execution"
+    assert steps[4].step_type == "shield_call"
+    assert steps[5].step_type == "inference"
+    assert steps[6].step_type == "shield_call"
+
+    tool_execution_step = steps[3]
+    assert len(tool_execution_step.tool_calls) == 2
+    assert tool_execution_step.tool_calls[0].tool_name.startswith("get_boiling_point")
+    assert tool_execution_step.tool_calls[1].tool_name.startswith("get_boiling_point")
+
+    output = response.output_message.content.lower()
+    assert "-100" in output and "-212" in output