From fc960a35dd76cfad148a8360117a5fed9241d9c5 Mon Sep 17 00:00:00 2001 From: Swapna Lekkala Date: Wed, 15 Oct 2025 06:28:06 -0700 Subject: [PATCH] skip emitting deltas --- .../agents/meta_reference/responses/streaming.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 0cb350df8..839974ec7 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -601,13 +601,15 @@ class StreamingResponseOrchestrator: sequence_number=self.sequence_number, ) self.sequence_number += 1 - yield OpenAIResponseObjectStreamResponseOutputTextDelta( - content_index=content_index, - delta=chunk_choice.delta.content, - item_id=message_item_id, - output_index=message_output_index, - sequence_number=self.sequence_number, - ) + # Skip Emitting text content delta event if guardrails are configured, only emits chunks after guardrails are applied + if not self.guardrail_ids: + yield OpenAIResponseObjectStreamResponseOutputTextDelta( + content_index=content_index, + delta=chunk_choice.delta.content, + item_id=message_item_id, + output_index=message_output_index, + sequence_number=self.sequence_number, + ) # Collect content for final response chat_response_content.append(chunk_choice.delta.content or "")