add support for instructions parameter in response object

2025-12-12 12:06:04 +00:00 · 2025-10-14 14:39:23 -04:00 · 2025-10-14 14:39:23 -04:00 · f176e1a74b
commit f176e1a74b
parent 08cbb69ef7
10 changed files with 229 additions and 29 deletions
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -9024,6 +9024,20 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "oneOf": [
                            {
                                "type": "string"
                            },
                            {
                                "type": "array",
                                "items": {
                                    "$ref": "#/components/schemas/OpenAIResponseInput"
                                }
                            }
                        ],
                        "description": "(Optional) System message inserted into the model's context"
                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -9901,6 +9915,20 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "oneOf": [
                            {
                                "type": "string"
                            },
                            {
                                "type": "array",
                                "items": {
                                    "$ref": "#/components/schemas/OpenAIResponseInput"
                                }
                            }
                        ],
                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -6734,6 +6734,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          oneOf:
            - type: string
            - type: array
              items:
                $ref: '#/components/schemas/OpenAIResponseInput'
          description: >-
            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -7403,6 +7411,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          oneOf:
            - type: string
            - type: array
              items:
                $ref: '#/components/schemas/OpenAIResponseInput'
          description: >-
            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -7600,6 +7600,20 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "oneOf": [
                            {
                                "type": "string"
                            },
                            {
                                "type": "array",
                                "items": {
                                    "$ref": "#/components/schemas/OpenAIResponseInput"
                                }
                            }
                        ],
                        "description": "(Optional) System message inserted into the model's context"
                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -8148,6 +8162,20 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "oneOf": [
                            {
                                "type": "string"
                            },
                            {
                                "type": "array",
                                "items": {
                                    "$ref": "#/components/schemas/OpenAIResponseInput"
                                }
                            }
                        ],
                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -5815,6 +5815,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          oneOf:
            - type: string
            - type: array
              items:
                $ref: '#/components/schemas/OpenAIResponseInput'
          description: >-
            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -6218,6 +6226,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          oneOf:
            - type: string
            - type: array
              items:
                $ref: '#/components/schemas/OpenAIResponseInput'
          description: >-
            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -9272,6 +9272,20 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "oneOf": [
                            {
                                "type": "string"
                            },
                            {
                                "type": "array",
                                "items": {
                                    "$ref": "#/components/schemas/OpenAIResponseInput"
                                }
                            }
                        ],
                        "description": "(Optional) System message inserted into the model's context"
                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -9820,6 +9834,20 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "oneOf": [
                            {
                                "type": "string"
                            },
                            {
                                "type": "array",
                                "items": {
                                    "$ref": "#/components/schemas/OpenAIResponseInput"
                                }
                            }
                        ],
                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -7028,6 +7028,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          oneOf:
            - type: string
            - type: array
              items:
                $ref: '#/components/schemas/OpenAIResponseInput'
          description: >-
            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -7431,6 +7439,14 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          oneOf:
            - type: string
            - type: array
              items:
                $ref: '#/components/schemas/OpenAIResponseInput'
          description: >-
            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -327,6 +327,35 @@ OpenAIResponseOutput = Annotated[
 register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
@json_schema_type
 class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
    """
    This represents the output of a function call that gets passed back to the model.
    """
    call_id: str
    output: str
    type: Literal["function_call_output"] = "function_call_output"
    id: str | None = None
    status: str | None = None
 OpenAIResponseInput = Annotated[
    # Responses API allows output messages to be passed in as input
    OpenAIResponseOutputMessageWebSearchToolCall
    | OpenAIResponseOutputMessageFileSearchToolCall
    | OpenAIResponseOutputMessageFunctionToolCall
    | OpenAIResponseInputFunctionToolCallOutput
    | OpenAIResponseMCPApprovalRequest
    | OpenAIResponseMCPApprovalResponse
    | OpenAIResponseOutputMessageMCPCall
    | OpenAIResponseOutputMessageMCPListTools
    | OpenAIResponseMessage,
    Field(union_mode="left_to_right"),
 ]
 register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
 # This has to be a TypedDict because we need a "schema" field and our strong
 # typing code in the schema generator doesn't support Pydantic aliases. That also
 # means we can't use a discriminator field here, because TypedDicts don't support
@ -545,6 +574,7 @@ class OpenAIResponseObject(BaseModel):
    :param tools: (Optional) An array of tools the model may call while generating a response.
    :param truncation: (Optional) Truncation strategy applied to the response
    :param usage: (Optional) Token usage information for the response
    :param instructions: (Optional) System message inserted into the model's context
    """
    created_at: int
@ -564,6 +594,7 @@ class OpenAIResponseObject(BaseModel):
    tools: list[OpenAIResponseTool] | None = None
    truncation: str | None = None
    usage: OpenAIResponseUsage | None = None
    instructions: str | list[OpenAIResponseInput] | None = None
@json_schema_type
@ -1237,35 +1268,6 @@ OpenAIResponseObjectStream = Annotated[
 register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
@json_schema_type
 class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
    """
    This represents the output of a function call that gets passed back to the model.
    """
    call_id: str
    output: str
    type: Literal["function_call_output"] = "function_call_output"
    id: str | None = None
    status: str | None = None
 OpenAIResponseInput = Annotated[
    # Responses API allows output messages to be passed in as input
    OpenAIResponseOutputMessageWebSearchToolCall
    | OpenAIResponseOutputMessageFileSearchToolCall
    | OpenAIResponseOutputMessageFunctionToolCall
    | OpenAIResponseInputFunctionToolCallOutput
    | OpenAIResponseMCPApprovalRequest
    | OpenAIResponseMCPApprovalResponse
    | OpenAIResponseOutputMessageMCPCall
    | OpenAIResponseOutputMessageMCPListTools
    | OpenAIResponseMessage,
    Field(union_mode="left_to_right"),
 ]
 register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
 class ListOpenAIResponseInputItem(BaseModel):
    """List container for OpenAI response input items.
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@ -123,6 +123,17 @@ class OpenAIResponsesImpl:
                # Use stored messages directly and convert only new input
                message_adapter = TypeAdapter(list[OpenAIMessageParam])
                messages = message_adapter.validate_python(previous_response.messages)
                # When managing conversation state with the previous_response_id parameter,
                # the instructions used on previous turns will not be carried over in the context
                previous_instructions = previous_response.instructions
                if previous_instructions:
                    if (isinstance(previous_instructions, str) and
                        previous_instructions == messages[0].content and
                        messages[0].role == "system"):
                        # Omit instructions from previous response
                        del messages[0]
                    else:
                        raise ValueError("Instructions from the previous response could not be validated")
                new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
                messages.extend(new_messages)
            else:
@ -359,6 +370,7 @@ class OpenAIResponsesImpl:
            tool_executor=self.tool_executor,
            safety_api=self.safety_api,
            guardrail_ids=guardrail_ids,
            instructions=instructions,
        )
        # Stream the response
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -112,6 +112,7 @@ class StreamingResponseOrchestrator:
        tool_executor,  # Will be the tool execution logic from the main class
        safety_api,
        guardrail_ids: list[str] | None = None,
        instructions: str,
    ):
        self.inference_api = inference_api
        self.ctx = ctx
@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
        self.accumulated_usage: OpenAIResponseUsage | None = None
        # Track if we've sent a refusal response
        self.violation_detected = False
        # system message that is inserted into the model's context
        self.instructions = instructions
    async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
        """Create a refusal response to replace streaming content."""
@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
            tools=self.ctx.available_tools(),
            error=error,
            usage=self.accumulated_usage,
            instructions=self.instructions,
        )
    async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@ -466,3 +466,53 @@ def test_guardrails_with_tools(compat_client, text_model_id):
    # Response should be either a function call or a message
    output_type = response.output[0].type
    assert output_type in ["function_call", "message"]
 def test_response_with_instructions(openai_client, client_with_models, text_model_id):
    """Test instructions parameter in the responses object."""
    if isinstance(client_with_models, LlamaStackAsLibraryClient):
        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
    client = openai_client
    messages = [
        {
            "role": "user",
            "content": "What is the capital of France?",
        }
    ]
    # First create a response without instructions parameter
    response_w_o_instructions = client.responses.create(
        model=text_model_id,
        input=messages,
        stream=False,
    )
    # Verify we have None in the instructions field
    assert response_w_o_instructions.instructions is None
    # Next create a response and pass instructions parameter
    instructions = "You are a helpful assistant."
    response_with_instructions = client.responses.create(
        model=text_model_id,
        instructions=instructions,
        input=messages,
        stream=False,
    )
    # Verify we have a valid instructions field
    assert response_with_instructions.instructions == instructions
    # Finally test instructions parameter with a previous response id
    instructions2 = "You are a helpful assistant and speak in pirate language."
    response_with_instructions2 = client.responses.create(
        model=text_model_id,
        instructions=instructions2,
        input=messages,
        previous_response_id=response_with_instructions.id,
        stream=False,
    )
    # Verify instructions from previous response was not carried over to the next response
    assert response_with_instructions2.instructions == instructions2