diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 6378a5ced..6adfe9b2b 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -7027,6 +7027,9 @@
"type": "string",
"description": "The underlying LLM used for completions."
},
+ "instructions": {
+ "type": "string"
+ },
"previous_response_id": {
"type": "string",
"description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 012610d02..31ca3f52a 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -4952,6 +4952,8 @@ components:
model:
type: string
description: The underlying LLM used for completions.
+ instructions:
+ type: string
previous_response_id:
type: string
description: >-
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index b2f85336c..8ecafdf26 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -596,6 +596,7 @@ class Agents(Protocol):
self,
input: str | list[OpenAIResponseInput],
model: str,
+ instructions: str | None = None,
previous_response_id: str | None = None,
store: bool | None = True,
stream: bool | None = False,
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 86780fd61..8f54cc737 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -313,6 +313,7 @@ class MetaReferenceAgentsImpl(Agents):
self,
input: str | list[OpenAIResponseInput],
model: str,
+ instructions: str | None = None,
previous_response_id: str | None = None,
store: bool | None = True,
stream: bool | None = False,
@@ -320,5 +321,5 @@ class MetaReferenceAgentsImpl(Agents):
tools: list[OpenAIResponseInputTool] | None = None,
) -> OpenAIResponseObject:
return await self.openai_responses_impl.create_openai_response(
- input, model, previous_response_id, store, stream, temperature, tools
+ input, model, instructions, previous_response_id, store, stream, temperature, tools
)
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index 6d9d06109..f5b0d8c31 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -208,6 +208,10 @@ class OpenAIResponsesImpl:
return input
+ async def _prepend_instructions(self, messages, instructions):
+ if instructions:
+ messages.insert(0, OpenAISystemMessageParam(content=instructions))
+
async def get_openai_response(
self,
id: str,
@@ -219,6 +223,7 @@ class OpenAIResponsesImpl:
self,
input: str | list[OpenAIResponseInput],
model: str,
+ instructions: str | None = None,
previous_response_id: str | None = None,
store: bool | None = True,
stream: bool | None = False,
@@ -229,7 +234,9 @@ class OpenAIResponsesImpl:
input = await self._prepend_previous_response(input, previous_response_id)
messages = await _convert_response_input_to_chat_messages(input)
+ await self._prepend_instructions(messages, instructions)
chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None
+
chat_response = await self.inference_api.openai_chat_completion(
model=model,
messages=messages,
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index ed5f13a58..0a8d59306 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -384,3 +384,141 @@ async def test_prepend_previous_response_web_search(get_previous_response_with_i
# Check for new input
assert isinstance(input[3], OpenAIResponseMessage)
assert input[3].content == "fake_input"
+
+
+@pytest.mark.asyncio
+async def test_create_openai_response_with_instructions(openai_responses_impl, mock_inference_api):
+ # Setup
+ input_text = "What is the capital of Ireland?"
+ model = "meta-llama/Llama-3.1-8B-Instruct"
+ instructions = "You are a geography expert. Provide concise answers."
+
+ # Load the chat completion fixture
+ mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
+ mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
+
+ # Execute
+ await openai_responses_impl.create_openai_response(
+ input=input_text,
+ model=model,
+ instructions=instructions,
+ )
+
+ # Verify
+ mock_inference_api.openai_chat_completion.assert_called_once()
+ call_args = mock_inference_api.openai_chat_completion.call_args
+ sent_messages = call_args.kwargs["messages"]
+
+ # Check that instructions were prepended as a system message
+ assert len(sent_messages) == 2
+ assert sent_messages[0].role == "system"
+ assert sent_messages[0].content == instructions
+ assert sent_messages[1].role == "user"
+ assert sent_messages[1].content == input_text
+
+
+@pytest.mark.asyncio
+async def test_create_openai_response_with_instructions_and_multiple_messages(
+ openai_responses_impl, mock_inference_api
+):
+ # Setup
+ input_messages = [
+ OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
+ OpenAIResponseMessage(
+ role="assistant",
+ content="Galway, Longford, Sligo",
+ name=None,
+ ),
+ OpenAIResponseMessage(role="user", content="Which is the largest?", name=None),
+ ]
+ model = "meta-llama/Llama-3.1-8B-Instruct"
+ instructions = "You are a geography expert. Provide concise answers."
+
+ mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
+ mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
+
+ # Execute
+ await openai_responses_impl.create_openai_response(
+ input=input_messages,
+ model=model,
+ instructions=instructions,
+ )
+
+ # Verify
+ mock_inference_api.openai_chat_completion.assert_called_once()
+ call_args = mock_inference_api.openai_chat_completion.call_args
+ sent_messages = call_args.kwargs["messages"]
+
+ # Check that instructions were prepended as a system message
+ assert len(sent_messages) == 4 # 1 system + 3 input messages
+ assert sent_messages[0].role == "system"
+ assert sent_messages[0].content == instructions
+
+ # Check the rest of the messages were converted correctly
+ assert sent_messages[1].role == "user"
+ assert sent_messages[1].content == "Name some towns in Ireland"
+ assert sent_messages[2].role == "assistant"
+ assert sent_messages[2].content == "Galway, Longford, Sligo"
+ assert sent_messages[3].role == "user"
+ assert sent_messages[3].content == "Which is the largest?"
+
+
+@pytest.mark.asyncio
+@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
+async def test_create_openai_response_with_instructions_and_previous_response(
+ get_previous_response_with_input, openai_responses_impl, mock_inference_api
+):
+ """Test prepending both instructions and previous response."""
+
+ input_item_message = OpenAIResponseMessage(
+ id="123",
+ content="Name some towns in Ireland",
+ role="user",
+ )
+ input_items = OpenAIResponseInputItemList(data=[input_item_message])
+ response_output_message = OpenAIResponseMessage(
+ id="123",
+ content="Galway, Longford, Sligo",
+ status="completed",
+ role="assistant",
+ )
+ response = OpenAIResponseObject(
+ created_at=1,
+ id="resp_123",
+ model="fake_model",
+ output=[response_output_message],
+ status="completed",
+ )
+ previous_response = OpenAIResponsePreviousResponseWithInputItems(
+ input_items=input_items,
+ response=response,
+ )
+ get_previous_response_with_input.return_value = previous_response
+
+ model = "meta-llama/Llama-3.1-8B-Instruct"
+ instructions = "You are a geography expert. Provide concise answers."
+ mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
+ mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
+
+ # Execute
+ await openai_responses_impl.create_openai_response(
+ input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123"
+ )
+
+ # Verify
+ mock_inference_api.openai_chat_completion.assert_called_once()
+ call_args = mock_inference_api.openai_chat_completion.call_args
+ sent_messages = call_args.kwargs["messages"]
+
+ # Check that instructions were prepended as a system message
+ assert len(sent_messages) == 4
+ assert sent_messages[0].role == "system"
+ assert sent_messages[0].content == instructions
+
+ # Check the rest of the messages were converted correctly
+ assert sent_messages[1].role == "user"
+ assert sent_messages[1].content == "Name some towns in Ireland"
+ assert sent_messages[2].role == "assistant"
+ assert sent_messages[2].content == "Galway, Longford, Sligo"
+ assert sent_messages[3].role == "user"
+ assert sent_messages[3].content == "Which is the largest?"