From f176e1a74b3296f0ac4ec8e3ba1216c675960670 Mon Sep 17 00:00:00 2001
From: Shabana Baig <43451943+s-akhtar-baig@users.noreply.github.com>
Date: Tue, 14 Oct 2025 14:39:23 -0400
Subject: [PATCH] add support for instructions parameter in response object

---
 docs/static/deprecated-llama-stack-spec.html  | 28 +++++++++
 docs/static/deprecated-llama-stack-spec.yaml  | 16 +++++
 docs/static/llama-stack-spec.html             | 28 +++++++++
 docs/static/llama-stack-spec.yaml             | 16 +++++
 docs/static/stainless-llama-stack-spec.html   | 28 +++++++++
 docs/static/stainless-llama-stack-spec.yaml   | 16 +++++
 llama_stack/apis/agents/openai_responses.py   | 60 ++++++++++---------
 .../responses/openai_responses.py             | 12 ++++
 .../meta_reference/responses/streaming.py     |  4 ++
 .../agents/test_openai_responses.py           | 50 ++++++++++++++++
 10 files changed, 229 insertions(+), 29 deletions(-)

diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html
index 60a8b9fbd..37fab0e26 100644
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@@ -9024,6 +9024,20 @@
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
                     },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                     "input": {
                         "type": "array",
                         "items": {
@@ -9901,6 +9915,20 @@
                     "usage": {
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
                     }
                 },
                 "additionalProperties": false,
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index aaa6cd413..08ace6f7e 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -6734,6 +6734,14 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
         input:
           type: array
           items:
@@ -7403,6 +7411,14 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
       additionalProperties: false
       required:
         - created_at
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 413e4f23e..8be929754 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -7600,6 +7600,20 @@
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
                     },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                     "input": {
                         "type": "array",
                         "items": {
@@ -8148,6 +8162,20 @@
                     "usage": {
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
                     }
                 },
                 "additionalProperties": false,
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 93e51de6a..be138dcc4 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -5815,6 +5815,14 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
         input:
           type: array
           items:
@@ -6218,6 +6226,14 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
       additionalProperties: false
       required:
         - created_at
diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html
index 858f20725..178109c44 100644
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@@ -9272,6 +9272,20 @@
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
                     },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                     "input": {
                         "type": "array",
                         "items": {
@@ -9820,6 +9834,20 @@
                     "usage": {
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
                     }
                 },
                 "additionalProperties": false,
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 886549dbc..27c54e185 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -7028,6 +7028,14 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
         input:
           type: array
           items:
@@ -7431,6 +7439,14 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
       additionalProperties: false
       required:
         - created_at
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index 25dc89a6b..a9a5bcb3f 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -327,6 +327,35 @@ OpenAIResponseOutput = Annotated[
 register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
 
 
+@json_schema_type
+class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
+    """
+    This represents the output of a function call that gets passed back to the model.
+    """
+
+    call_id: str
+    output: str
+    type: Literal["function_call_output"] = "function_call_output"
+    id: str | None = None
+    status: str | None = None
+
+
+OpenAIResponseInput = Annotated[
+    # Responses API allows output messages to be passed in as input
+    OpenAIResponseOutputMessageWebSearchToolCall
+    | OpenAIResponseOutputMessageFileSearchToolCall
+    | OpenAIResponseOutputMessageFunctionToolCall
+    | OpenAIResponseInputFunctionToolCallOutput
+    | OpenAIResponseMCPApprovalRequest
+    | OpenAIResponseMCPApprovalResponse
+    | OpenAIResponseOutputMessageMCPCall
+    | OpenAIResponseOutputMessageMCPListTools
+    | OpenAIResponseMessage,
+    Field(union_mode="left_to_right"),
+]
+register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
+
+
 # This has to be a TypedDict because we need a "schema" field and our strong
 # typing code in the schema generator doesn't support Pydantic aliases. That also
 # means we can't use a discriminator field here, because TypedDicts don't support
@@ -545,6 +574,7 @@ class OpenAIResponseObject(BaseModel):
     :param tools: (Optional) An array of tools the model may call while generating a response.
     :param truncation: (Optional) Truncation strategy applied to the response
     :param usage: (Optional) Token usage information for the response
+    :param instructions: (Optional) System message inserted into the model's context
     """
 
     created_at: int
@@ -564,6 +594,7 @@ class OpenAIResponseObject(BaseModel):
     tools: list[OpenAIResponseTool] | None = None
     truncation: str | None = None
     usage: OpenAIResponseUsage | None = None
+    instructions: str | list[OpenAIResponseInput] | None = None
 
 
 @json_schema_type
@@ -1237,35 +1268,6 @@ OpenAIResponseObjectStream = Annotated[
 register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
 
 
-@json_schema_type
-class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
-    """
-    This represents the output of a function call that gets passed back to the model.
-    """
-
-    call_id: str
-    output: str
-    type: Literal["function_call_output"] = "function_call_output"
-    id: str | None = None
-    status: str | None = None
-
-
-OpenAIResponseInput = Annotated[
-    # Responses API allows output messages to be passed in as input
-    OpenAIResponseOutputMessageWebSearchToolCall
-    | OpenAIResponseOutputMessageFileSearchToolCall
-    | OpenAIResponseOutputMessageFunctionToolCall
-    | OpenAIResponseInputFunctionToolCallOutput
-    | OpenAIResponseMCPApprovalRequest
-    | OpenAIResponseMCPApprovalResponse
-    | OpenAIResponseOutputMessageMCPCall
-    | OpenAIResponseOutputMessageMCPListTools
-    | OpenAIResponseMessage,
-    Field(union_mode="left_to_right"),
-]
-register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
-
-
 class ListOpenAIResponseInputItem(BaseModel):
     """List container for OpenAI response input items.
 
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 851e6ef28..2526dc1c3 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -123,6 +123,17 @@ class OpenAIResponsesImpl:
                 # Use stored messages directly and convert only new input
                 message_adapter = TypeAdapter(list[OpenAIMessageParam])
                 messages = message_adapter.validate_python(previous_response.messages)
+                # When managing conversation state with the previous_response_id parameter,
+                # the instructions used on previous turns will not be carried over in the context
+                previous_instructions = previous_response.instructions
+                if previous_instructions:
+                    if (isinstance(previous_instructions, str) and
+                        previous_instructions == messages[0].content and
+                        messages[0].role == "system"):
+                        # Omit instructions from previous response
+                        del messages[0]
+                    else:
+                        raise ValueError("Instructions from the previous response could not be validated")
                 new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
                 messages.extend(new_messages)
             else:
@@ -359,6 +370,7 @@ class OpenAIResponsesImpl:
             tool_executor=self.tool_executor,
             safety_api=self.safety_api,
             guardrail_ids=guardrail_ids,
+            instructions=instructions,
         )
 
         # Stream the response
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index caf899cdd..896491452 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -112,6 +112,7 @@ class StreamingResponseOrchestrator:
         tool_executor,  # Will be the tool execution logic from the main class
         safety_api,
         guardrail_ids: list[str] | None = None,
+        instructions: str,
     ):
         self.inference_api = inference_api
         self.ctx = ctx
@@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
         self.accumulated_usage: OpenAIResponseUsage | None = None
         # Track if we've sent a refusal response
         self.violation_detected = False
+        # system message that is inserted into the model's context
+        self.instructions = instructions
 
     async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
         """Create a refusal response to replace streaming content."""
@@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
             tools=self.ctx.available_tools(),
             error=error,
             usage=self.accumulated_usage,
+            instructions=self.instructions,
         )
 
     async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py
index 675e2b904..d413d5201 100644
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@@ -466,3 +466,53 @@ def test_guardrails_with_tools(compat_client, text_model_id):
     # Response should be either a function call or a message
     output_type = response.output[0].type
     assert output_type in ["function_call", "message"]
+
+
+def test_response_with_instructions(openai_client, client_with_models, text_model_id):
+    """Test instructions parameter in the responses object."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    messages = [
+        {
+            "role": "user",
+            "content": "What is the capital of France?",
+        }
+    ]
+
+    # First create a response without instructions parameter
+    response_w_o_instructions = client.responses.create(
+        model=text_model_id,
+        input=messages,
+        stream=False,
+    )
+
+    # Verify we have None in the instructions field
+    assert response_w_o_instructions.instructions is None
+
+    # Next create a response and pass instructions parameter
+    instructions = "You are a helpful assistant."
+    response_with_instructions = client.responses.create(
+        model=text_model_id,
+        instructions=instructions,
+        input=messages,
+        stream=False,
+    )
+
+    # Verify we have a valid instructions field
+    assert response_with_instructions.instructions == instructions
+
+    # Finally test instructions parameter with a previous response id
+    instructions2 = "You are a helpful assistant and speak in pirate language."
+    response_with_instructions2 = client.responses.create(
+        model=text_model_id,
+        instructions=instructions2,
+        input=messages,
+        previous_response_id=response_with_instructions.id,
+        stream=False,
+    )
+
+    # Verify instructions from previous response was not carried over to the next response
+    assert response_with_instructions2.instructions == instructions2