diff --git a/llama_stack/models/llama/llama4/chat_format.py b/llama_stack/models/llama/llama4/chat_format.py
index 9e840c11b..75d7cbc0e 100644
--- a/llama_stack/models/llama/llama4/chat_format.py
+++ b/llama_stack/models/llama/llama4/chat_format.py
@@ -224,7 +224,9 @@ class ChatFormat:
 
         eom = False
         if message.role == "assistant":
-            eom = message.stop_reason == StopReason.end_of_message
+            eom = message.stop_reason == StopReason.end_of_message or message.tool_calls
+        elif message.role == "tool":
+            eom = True
 
         tokens.append(self.tokenizer.special_tokens["<|eom|>" if eom else "<|eot|>"])
         return tokens, images
diff --git a/tests/integration/inference/test_text_inference.py b/tests/integration/inference/test_text_inference.py
index e01790c30..a204511d0 100644
--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@@ -513,7 +513,11 @@ def test_text_chat_completion_with_tool_calling_loop_non_streaming(client_with_m
     # 2. no messages bust last message is tool response
     while len(tc["messages"]) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"):
         # do not take new messages if last message is tool response
-        if len(messages) == 0 or messages[-1]["role"] != "tool":
+        if (
+            len(messages) == 0
+            or (isinstance(messages[-1], dict) and messages[-1]["role"] != "tool")
+            or (not isinstance(messages[-1], dict) and messages[-1].role != "tool")
+        ):
             new_messages = tc["messages"].pop(0)
             messages += new_messages
 
@@ -523,9 +527,16 @@ def test_text_chat_completion_with_tool_calling_loop_non_streaming(client_with_m
             messages=messages,
             tools=tc["tools"],
             stream=False,
+            # sampling_params={
+            #     "strategy": {
+            #         "type": "top_p",
+            #         "top_p": 0.9,
+            #         "temperature": 0.6,
+            #     }
+            # },
         )
         op_msg = response.completion_message
-        messages.append(op_msg)
+        messages.append(op_msg.model_dump())
         pprint(op_msg)
 
         assert op_msg.role == "assistant"
@@ -536,8 +547,6 @@ def test_text_chat_completion_with_tool_calling_loop_non_streaming(client_with_m
             assert op_msg.tool_calls[0].tool_name == expected["tool_name"]
             assert op_msg.tool_calls[0].arguments == expected["tool_arguments"]
 
-            # messages.append(op_msg)
-
             tool_response = tc["tool_responses"].pop(0)
             messages.append(
                 # Tool Response Message
diff --git a/tests/integration/test_cases/inference/chat_completion.json b/tests/integration/test_cases/inference/chat_completion.json
index c6e29d0b7..e6136cc75 100644
--- a/tests/integration/test_cases/inference/chat_completion.json
+++ b/tests/integration/test_cases/inference/chat_completion.json
@@ -105,7 +105,7 @@
         [
           {
             "role": "system",
-            "content": "You are a helpful assistant"
+            "content": "You are a helpful assistant who can answer general questions or invoke tools when necessary. In addition to tool calls, you should also augement your responses by using the tool outputs."
           },
           {
             "role": "user",
@@ -133,7 +133,7 @@
       ],
       "tool_responses": [
         {
-          "response": "70 degrees and foggy"
+          "response": "{'resposne': '70 degrees and foggy'}"
         }
       ],
       "expected": [
@@ -161,7 +161,7 @@
         [
           {
             "role": "system",
-            "content": "NEVER invoke the same function with the same argumennts twice. Use the response of the first call instead."
+            "content": "You are a helpful assistant who can answer general questions or invoke tools when necessary. In addition to tool calls, you should also augement your responses by using the tool outputs."
           },
           {
             "role": "user",
@@ -183,7 +183,7 @@
       ],
       "tool_responses": [
         {
-          "response": "70 degrees and foggy"
+          "response": "{'resposne': '70 degrees and foggy'}"
         }
       ],
       "expected": [
@@ -207,11 +207,11 @@
         [
           {
             "role": "system",
-            "content": "You are a helpful assistant with tools. NEVER invoke the same function with the same argumennts twice. Use the response of the first call instead."
+            "content": "You are a helpful assistant who can answer general questions or invoke tools when necessary. In addition to tool calls, you should also augement your responses by using the tool outputs."
           },
           {
             "role": "user",
-            "content": "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale']."
+            "content": "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
           }
         ]
       ],
@@ -241,7 +241,7 @@
       ],
       "tool_responses": [
         {
-          "response": "Successfully added product with id: 123"
+          "response": "{'response': 'Successfully added product with id: 123'}"
         }
       ],
       "expected": [
@@ -271,7 +271,7 @@
         [
           {
             "role": "system",
-            "content": "You are a peronal assistant with tools. Todays date is 2025-03-01."
+            "content": "Todays date is 2025-03-01.\n\nYou are a helpful assistant who can answer general questions or invoke tools when necessary. In addition to tool calls, you should also augement your responses by using the tool outputs."
           },
           {
             "role": "user",
@@ -329,10 +329,10 @@
       ],
       "tool_responses": [
         {
-          "response": "No meetings found"
+          "response": "{'response': 'No events found for 2025-03-03 at 10:00'}"
         },
         {
-          "response": "Successfully created new event with id: e_123"
+          "response": "{'response': 'Successfully created new event with id: e_123'}"
         }
       ],
       "expected": [
@@ -376,7 +376,7 @@
         [
           {
             "role": "system",
-            "content": "You are a helpful assistant with tools. Todays date is 2025-03-01."
+            "content": "Todays date is 2025-03-01.\n\nYou are a helpful assistant who can answer general questions or invoke tools when necessary. In addition to tool calls, you should also augement your responses by using the tool outputs."
           },
           {
             "role": "user",
@@ -386,7 +386,7 @@
         [
           {
             "role": "user",
-            "content": "Was is less than Feb of last year? Only answer with yes or no."
+            "content": "Was it less than Feb of last year? Only answer with yes or no."
           }
         ]
       ],
@@ -396,11 +396,11 @@
           "description": "Get monthly expense summary",
           "parameters": {
             "month": {
-              "param_type": "number",
+              "param_type": "int",
               "description": "Month of the year (1-12)"
             },
             "year": {
-              "param_type": "number",
+              "param_type": "int",
               "description": "Year"
             }
           }
@@ -408,10 +408,10 @@
       ],
       "tool_responses": [
         {
-          "response": "Total expenses for January 2025: $1000"
+          "response": "{'response': 'Total expenses for January 2025: $1000'}"
         },
         {
-          "response": "Total expenses for February 2024: $2000"
+          "response": "{'resposne': 'Total expenses for February 2024: $2000'}"
         }
       ],
       "expected": [