add support for instructions parameter in response object

2025-12-13 13:22:36 +00:00 · 2025-10-14 14:39:23 -04:00 · 2025-10-14 14:39:23 -04:00 · f176e1a74b
commit f176e1a74b
parent 08cbb69ef7
10 changed files with 229 additions and 29 deletions
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@ -123,6 +123,17 @@ class OpenAIResponsesImpl:
                # Use stored messages directly and convert only new input
                message_adapter = TypeAdapter(list[OpenAIMessageParam])
                messages = message_adapter.validate_python(previous_response.messages)
+                # When managing conversation state with the previous_response_id parameter,
+                # the instructions used on previous turns will not be carried over in the context
+                previous_instructions = previous_response.instructions
+                if previous_instructions:
+                    if (isinstance(previous_instructions, str) and
+                        previous_instructions == messages[0].content and
+                        messages[0].role == "system"):
+                        # Omit instructions from previous response
+                        del messages[0]
+                    else:
+                        raise ValueError("Instructions from the previous response could not be validated")
                new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
                messages.extend(new_messages)
            else:
@ -359,6 +370,7 @@ class OpenAIResponsesImpl:
            tool_executor=self.tool_executor,
            safety_api=self.safety_api,
            guardrail_ids=guardrail_ids,
+            instructions=instructions,
        )

        # Stream the response
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -112,6 +112,7 @@ class StreamingResponseOrchestrator:
        tool_executor,  # Will be the tool execution logic from the main class
        safety_api,
        guardrail_ids: list[str] | None = None,
+        instructions: str,
    ):
        self.inference_api = inference_api
        self.ctx = ctx
@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
        self.accumulated_usage: OpenAIResponseUsage | None = None
        # Track if we've sent a refusal response
        self.violation_detected = False
+        # system message that is inserted into the model's context
+        self.instructions = instructions

    async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
        """Create a refusal response to replace streaming content."""
@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
            tools=self.ctx.available_tools(),
            error=error,
            usage=self.accumulated_usage,
+            instructions=self.instructions,
        )

    async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: