diff --git a/llama_stack/models/llama/llama3/chat_format.py b/llama_stack/models/llama/llama3/chat_format.py
index f55cd5e1c..fe7a7a898 100644
--- a/llama_stack/models/llama/llama3/chat_format.py
+++ b/llama_stack/models/llama/llama3/chat_format.py
@@ -226,7 +226,6 @@ class ChatFormat:
                     arguments_json=json.dumps(tool_arguments),
                 )
             )
-            content = ""
 
         return RawMessage(
             role="assistant",
diff --git a/llama_stack/models/llama/llama4/chat_format.py b/llama_stack/models/llama/llama4/chat_format.py
index 160bb00f8..9d60d00e9 100644
--- a/llama_stack/models/llama/llama4/chat_format.py
+++ b/llama_stack/models/llama/llama4/chat_format.py
@@ -301,7 +301,6 @@ class ChatFormat:
                     arguments=tool_arguments,
                 )
             )
-            content = ""
 
         return RawMessage(
             role="assistant",
diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py
index da5ded0f3..0b56ba1f7 100644
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -452,7 +452,7 @@ class MetaReferenceInferenceImpl(
 
             for token_results in self.generator.chat_completion(request_batch):
                 first = token_results[0]
-                if not first.finished:
+                if not first.finished and not first.ignore_token:
                     if os.environ.get("LLAMA_MODELS_DEBUG", "0") in ("1", "2"):
                         cprint(first.text, "cyan", end="")
                     if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "2":