From f176e1a74b3296f0ac4ec8e3ba1216c675960670 Mon Sep 17 00:00:00 2001 From: Shabana Baig <43451943+s-akhtar-baig@users.noreply.github.com> Date: Tue, 14 Oct 2025 14:39:23 -0400 Subject: [PATCH] add support for instructions parameter in response object --- docs/static/deprecated-llama-stack-spec.html | 28 +++++++++ docs/static/deprecated-llama-stack-spec.yaml | 16 +++++ docs/static/llama-stack-spec.html | 28 +++++++++ docs/static/llama-stack-spec.yaml | 16 +++++ docs/static/stainless-llama-stack-spec.html | 28 +++++++++ docs/static/stainless-llama-stack-spec.yaml | 16 +++++ llama_stack/apis/agents/openai_responses.py | 60 ++++++++++--------- .../responses/openai_responses.py | 12 ++++ .../meta_reference/responses/streaming.py | 4 ++ .../agents/test_openai_responses.py | 50 ++++++++++++++++ 10 files changed, 229 insertions(+), 29 deletions(-) diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 60a8b9fbd..37fab0e26 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -9024,6 +9024,20 @@ "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" }, + "instructions": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInput" + } + } + ], + "description": "(Optional) System message inserted into the model's context" + }, "input": { "type": "array", "items": { @@ -9901,6 +9915,20 @@ "usage": { "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" + }, + "instructions": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInput" + } + } + ], + "description": "(Optional) System message inserted into the model's context" } }, "additionalProperties": false, diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index aaa6cd413..08ace6f7e 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -6734,6 +6734,14 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: >- + (Optional) System message inserted into the model's context input: type: array items: @@ -7403,6 +7411,14 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: >- + (Optional) System message inserted into the model's context additionalProperties: false required: - created_at diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 413e4f23e..8be929754 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -7600,6 +7600,20 @@ "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" }, + "instructions": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInput" + } + } + ], + "description": "(Optional) System message inserted into the model's context" + }, "input": { "type": "array", "items": { @@ -8148,6 +8162,20 @@ "usage": { "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" + }, + "instructions": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInput" + } + } + ], + "description": "(Optional) System message inserted into the model's context" } }, "additionalProperties": false, diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 93e51de6a..be138dcc4 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -5815,6 +5815,14 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: >- + (Optional) System message inserted into the model's context input: type: array items: @@ -6218,6 +6226,14 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: >- + (Optional) System message inserted into the model's context additionalProperties: false required: - created_at diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 858f20725..178109c44 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -9272,6 +9272,20 @@ "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" }, + "instructions": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInput" + } + } + ], + "description": "(Optional) System message inserted into the model's context" + }, "input": { "type": "array", "items": { @@ -9820,6 +9834,20 @@ "usage": { "$ref": "#/components/schemas/OpenAIResponseUsage", "description": "(Optional) Token usage information for the response" + }, + "instructions": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInput" + } + } + ], + "description": "(Optional) System message inserted into the model's context" } }, "additionalProperties": false, diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 886549dbc..27c54e185 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -7028,6 +7028,14 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: >- + (Optional) System message inserted into the model's context input: type: array items: @@ -7431,6 +7439,14 @@ components: $ref: '#/components/schemas/OpenAIResponseUsage' description: >- (Optional) Token usage information for the response + instructions: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: >- + (Optional) System message inserted into the model's context additionalProperties: false required: - created_at diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index 25dc89a6b..a9a5bcb3f 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -327,6 +327,35 @@ OpenAIResponseOutput = Annotated[ register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput") +@json_schema_type +class OpenAIResponseInputFunctionToolCallOutput(BaseModel): + """ + This represents the output of a function call that gets passed back to the model. + """ + + call_id: str + output: str + type: Literal["function_call_output"] = "function_call_output" + id: str | None = None + status: str | None = None + + +OpenAIResponseInput = Annotated[ + # Responses API allows output messages to be passed in as input + OpenAIResponseOutputMessageWebSearchToolCall + | OpenAIResponseOutputMessageFileSearchToolCall + | OpenAIResponseOutputMessageFunctionToolCall + | OpenAIResponseInputFunctionToolCallOutput + | OpenAIResponseMCPApprovalRequest + | OpenAIResponseMCPApprovalResponse + | OpenAIResponseOutputMessageMCPCall + | OpenAIResponseOutputMessageMCPListTools + | OpenAIResponseMessage, + Field(union_mode="left_to_right"), +] +register_schema(OpenAIResponseInput, name="OpenAIResponseInput") + + # This has to be a TypedDict because we need a "schema" field and our strong # typing code in the schema generator doesn't support Pydantic aliases. That also # means we can't use a discriminator field here, because TypedDicts don't support @@ -545,6 +574,7 @@ class OpenAIResponseObject(BaseModel): :param tools: (Optional) An array of tools the model may call while generating a response. :param truncation: (Optional) Truncation strategy applied to the response :param usage: (Optional) Token usage information for the response + :param instructions: (Optional) System message inserted into the model's context """ created_at: int @@ -564,6 +594,7 @@ class OpenAIResponseObject(BaseModel): tools: list[OpenAIResponseTool] | None = None truncation: str | None = None usage: OpenAIResponseUsage | None = None + instructions: str | list[OpenAIResponseInput] | None = None @json_schema_type @@ -1237,35 +1268,6 @@ OpenAIResponseObjectStream = Annotated[ register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream") -@json_schema_type -class OpenAIResponseInputFunctionToolCallOutput(BaseModel): - """ - This represents the output of a function call that gets passed back to the model. - """ - - call_id: str - output: str - type: Literal["function_call_output"] = "function_call_output" - id: str | None = None - status: str | None = None - - -OpenAIResponseInput = Annotated[ - # Responses API allows output messages to be passed in as input - OpenAIResponseOutputMessageWebSearchToolCall - | OpenAIResponseOutputMessageFileSearchToolCall - | OpenAIResponseOutputMessageFunctionToolCall - | OpenAIResponseInputFunctionToolCallOutput - | OpenAIResponseMCPApprovalRequest - | OpenAIResponseMCPApprovalResponse - | OpenAIResponseOutputMessageMCPCall - | OpenAIResponseOutputMessageMCPListTools - | OpenAIResponseMessage, - Field(union_mode="left_to_right"), -] -register_schema(OpenAIResponseInput, name="OpenAIResponseInput") - - class ListOpenAIResponseInputItem(BaseModel): """List container for OpenAI response input items. diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index 851e6ef28..2526dc1c3 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -123,6 +123,17 @@ class OpenAIResponsesImpl: # Use stored messages directly and convert only new input message_adapter = TypeAdapter(list[OpenAIMessageParam]) messages = message_adapter.validate_python(previous_response.messages) + # When managing conversation state with the previous_response_id parameter, + # the instructions used on previous turns will not be carried over in the context + previous_instructions = previous_response.instructions + if previous_instructions: + if (isinstance(previous_instructions, str) and + previous_instructions == messages[0].content and + messages[0].role == "system"): + # Omit instructions from previous response + del messages[0] + else: + raise ValueError("Instructions from the previous response could not be validated") new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages) messages.extend(new_messages) else: @@ -359,6 +370,7 @@ class OpenAIResponsesImpl: tool_executor=self.tool_executor, safety_api=self.safety_api, guardrail_ids=guardrail_ids, + instructions=instructions, ) # Stream the response diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index caf899cdd..896491452 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -112,6 +112,7 @@ class StreamingResponseOrchestrator: tool_executor, # Will be the tool execution logic from the main class safety_api, guardrail_ids: list[str] | None = None, + instructions: str, ): self.inference_api = inference_api self.ctx = ctx @@ -133,6 +134,8 @@ class StreamingResponseOrchestrator: self.accumulated_usage: OpenAIResponseUsage | None = None # Track if we've sent a refusal response self.violation_detected = False + # system message that is inserted into the model's context + self.instructions = instructions async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream: """Create a refusal response to replace streaming content.""" @@ -176,6 +179,7 @@ class StreamingResponseOrchestrator: tools=self.ctx.available_tools(), error=error, usage=self.accumulated_usage, + instructions=self.instructions, ) async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py index 675e2b904..d413d5201 100644 --- a/tests/integration/agents/test_openai_responses.py +++ b/tests/integration/agents/test_openai_responses.py @@ -466,3 +466,53 @@ def test_guardrails_with_tools(compat_client, text_model_id): # Response should be either a function call or a message output_type = response.output[0].type assert output_type in ["function_call", "message"] + + +def test_response_with_instructions(openai_client, client_with_models, text_model_id): + """Test instructions parameter in the responses object.""" + if isinstance(client_with_models, LlamaStackAsLibraryClient): + pytest.skip("OpenAI responses are not supported when testing with library client yet.") + + client = openai_client + + messages = [ + { + "role": "user", + "content": "What is the capital of France?", + } + ] + + # First create a response without instructions parameter + response_w_o_instructions = client.responses.create( + model=text_model_id, + input=messages, + stream=False, + ) + + # Verify we have None in the instructions field + assert response_w_o_instructions.instructions is None + + # Next create a response and pass instructions parameter + instructions = "You are a helpful assistant." + response_with_instructions = client.responses.create( + model=text_model_id, + instructions=instructions, + input=messages, + stream=False, + ) + + # Verify we have a valid instructions field + assert response_with_instructions.instructions == instructions + + # Finally test instructions parameter with a previous response id + instructions2 = "You are a helpful assistant and speak in pirate language." + response_with_instructions2 = client.responses.create( + model=text_model_id, + instructions=instructions2, + input=messages, + previous_response_id=response_with_instructions.id, + stream=False, + ) + + # Verify instructions from previous response was not carried over to the next response + assert response_with_instructions2.instructions == instructions2