feat: Add instructions parameter in response object (#3741)

# Problem The current inline provider appends the user provided instructions to messages as a system prompt, but the returned response object does not contain the instructions field (as specified in the OpenAI responses spec). # What does this PR do? This pull request adds the instruction field to the response object definition and updates the inline provider. It also ensures that instructions from previous response is not carried over to the next response (as specified in the openAI spec). Closes #[3566](https://github.com/llamastack/llama-stack/issues/3566) ## Test Plan - Tested manually for change in model response w.r.t supplied instructions field. - Added unit test to check that the instructions from previous response is not carried over to the next response. - Added integration tests to check instructions parameter in the returned response object. - Added new recordings for the integration tests. --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2025-10-22 16:23:08 +00:00 · 2025-10-20 16:10:37 -04:00 · 2025-10-20 16:10:37 -04:00 · add64e8e2a
commit add64e8e2a
parent 1f38359d95
56 changed files with 10032 additions and 5816 deletions
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@ -359,6 +359,7 @@ class OpenAIResponsesImpl:
            tool_executor=self.tool_executor,
            safety_api=self.safety_api,
            guardrail_ids=guardrail_ids,
+            instructions=instructions,
        )

        # Stream the response
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -110,6 +110,7 @@ class StreamingResponseOrchestrator:
        text: OpenAIResponseText,
        max_infer_iters: int,
        tool_executor,  # Will be the tool execution logic from the main class
+        instructions: str,
        safety_api,
        guardrail_ids: list[str] | None = None,
    ):
@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
        self.accumulated_usage: OpenAIResponseUsage | None = None
        # Track if we've sent a refusal response
        self.violation_detected = False
+        # system message that is inserted into the model's context
+        self.instructions = instructions

    async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
        """Create a refusal response to replace streaming content."""
@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
            tools=self.ctx.available_tools(),
            error=error,
            usage=self.accumulated_usage,
+            instructions=self.instructions,
        )

    async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: