add support for instructions parameter in response object

2025-12-12 12:06:04 +00:00 · 2025-10-14 14:39:23 -04:00 · 2025-10-14 14:39:23 -04:00 · f176e1a74b
commit f176e1a74b
parent 08cbb69ef7
10 changed files with 229 additions and 29 deletions
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -9024,6 +9024,20 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -9901,6 +9915,20 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -6734,6 +6734,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -7403,6 +7411,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -7600,6 +7600,20 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -8148,6 +8162,20 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -5815,6 +5815,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -6218,6 +6226,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -9272,6 +9272,20 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -9820,6 +9834,20 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/OpenAIResponseInput"
+                                }
+                            }
+                        ],
+                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -7028,6 +7028,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -7431,6 +7439,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
+        instructions:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -327,6 +327,35 @@ OpenAIResponseOutput = Annotated[
 register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")


+@json_schema_type
+class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
+    """
+    This represents the output of a function call that gets passed back to the model.
+    """
+
+    call_id: str
+    output: str
+    type: Literal["function_call_output"] = "function_call_output"
+    id: str | None = None
+    status: str | None = None
+
+
+OpenAIResponseInput = Annotated[
+    # Responses API allows output messages to be passed in as input
+    OpenAIResponseOutputMessageWebSearchToolCall
+    | OpenAIResponseOutputMessageFileSearchToolCall
+    | OpenAIResponseOutputMessageFunctionToolCall
+    | OpenAIResponseInputFunctionToolCallOutput
+    | OpenAIResponseMCPApprovalRequest
+    | OpenAIResponseMCPApprovalResponse
+    | OpenAIResponseOutputMessageMCPCall
+    | OpenAIResponseOutputMessageMCPListTools
+    | OpenAIResponseMessage,
+    Field(union_mode="left_to_right"),
+]
+register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
+
+
 # This has to be a TypedDict because we need a "schema" field and our strong
 # typing code in the schema generator doesn't support Pydantic aliases. That also
 # means we can't use a discriminator field here, because TypedDicts don't support
@ -545,6 +574,7 @@ class OpenAIResponseObject(BaseModel):
    :param tools: (Optional) An array of tools the model may call while generating a response.
    :param truncation: (Optional) Truncation strategy applied to the response
    :param usage: (Optional) Token usage information for the response
+    :param instructions: (Optional) System message inserted into the model's context
    """

    created_at: int
@ -564,6 +594,7 @@ class OpenAIResponseObject(BaseModel):
    tools: list[OpenAIResponseTool] | None = None
    truncation: str | None = None
    usage: OpenAIResponseUsage | None = None
+    instructions: str | list[OpenAIResponseInput] | None = None


@json_schema_type
@ -1237,35 +1268,6 @@ OpenAIResponseObjectStream = Annotated[
 register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")


-@json_schema_type
-class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
-    """
-    This represents the output of a function call that gets passed back to the model.
-    """
-
-    call_id: str
-    output: str
-    type: Literal["function_call_output"] = "function_call_output"
-    id: str | None = None
-    status: str | None = None
-
-
-OpenAIResponseInput = Annotated[
-    # Responses API allows output messages to be passed in as input
-    OpenAIResponseOutputMessageWebSearchToolCall
-    | OpenAIResponseOutputMessageFileSearchToolCall
-    | OpenAIResponseOutputMessageFunctionToolCall
-    | OpenAIResponseInputFunctionToolCallOutput
-    | OpenAIResponseMCPApprovalRequest
-    | OpenAIResponseMCPApprovalResponse
-    | OpenAIResponseOutputMessageMCPCall
-    | OpenAIResponseOutputMessageMCPListTools
-    | OpenAIResponseMessage,
-    Field(union_mode="left_to_right"),
-]
-register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
-
-
 class ListOpenAIResponseInputItem(BaseModel):
    """List container for OpenAI response input items.

--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@ -123,6 +123,17 @@ class OpenAIResponsesImpl:
                # Use stored messages directly and convert only new input
                message_adapter = TypeAdapter(list[OpenAIMessageParam])
                messages = message_adapter.validate_python(previous_response.messages)
+                # When managing conversation state with the previous_response_id parameter,
+                # the instructions used on previous turns will not be carried over in the context
+                previous_instructions = previous_response.instructions
+                if previous_instructions:
+                    if (isinstance(previous_instructions, str) and
+                        previous_instructions == messages[0].content and
+                        messages[0].role == "system"):
+                        # Omit instructions from previous response
+                        del messages[0]
+                    else:
+                        raise ValueError("Instructions from the previous response could not be validated")
                new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
                messages.extend(new_messages)
            else:
@ -359,6 +370,7 @@ class OpenAIResponsesImpl:
            tool_executor=self.tool_executor,
            safety_api=self.safety_api,
            guardrail_ids=guardrail_ids,
+            instructions=instructions,
        )

        # Stream the response
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -112,6 +112,7 @@ class StreamingResponseOrchestrator:
        tool_executor,  # Will be the tool execution logic from the main class
        safety_api,
        guardrail_ids: list[str] | None = None,
+        instructions: str,
    ):
        self.inference_api = inference_api
        self.ctx = ctx
@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
        self.accumulated_usage: OpenAIResponseUsage | None = None
        # Track if we've sent a refusal response
        self.violation_detected = False
+        # system message that is inserted into the model's context
+        self.instructions = instructions

    async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
        """Create a refusal response to replace streaming content."""
@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
            tools=self.ctx.available_tools(),
            error=error,
            usage=self.accumulated_usage,
+            instructions=self.instructions,
        )

    async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@ -466,3 +466,53 @@ def test_guardrails_with_tools(compat_client, text_model_id):
    # Response should be either a function call or a message
    output_type = response.output[0].type
    assert output_type in ["function_call", "message"]
+
+
+def test_response_with_instructions(openai_client, client_with_models, text_model_id):
+    """Test instructions parameter in the responses object."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    messages = [
+        {
+            "role": "user",
+            "content": "What is the capital of France?",
+        }
+    ]
+
+    # First create a response without instructions parameter
+    response_w_o_instructions = client.responses.create(
+        model=text_model_id,
+        input=messages,
+        stream=False,
+    )
+
+    # Verify we have None in the instructions field
+    assert response_w_o_instructions.instructions is None
+
+    # Next create a response and pass instructions parameter
+    instructions = "You are a helpful assistant."
+    response_with_instructions = client.responses.create(
+        model=text_model_id,
+        instructions=instructions,
+        input=messages,
+        stream=False,
+    )
+
+    # Verify we have a valid instructions field
+    assert response_with_instructions.instructions == instructions
+
+    # Finally test instructions parameter with a previous response id
+    instructions2 = "You are a helpful assistant and speak in pirate language."
+    response_with_instructions2 = client.responses.create(
+        model=text_model_id,
+        instructions=instructions2,
+        input=messages,
+        previous_response_id=response_with_instructions.id,
+        stream=False,
+    )
+
+    # Verify instructions from previous response was not carried over to the next response
+    assert response_with_instructions2.instructions == instructions2