From 3339844fda052ee4ad0449de924d28f39f641ff7 Mon Sep 17 00:00:00 2001 From: Derek Higgins Date: Tue, 20 May 2025 17:52:10 +0100 Subject: [PATCH] feat: Add "instructions" support to responses API (#2205) # What does this PR do? Add support for "instructions" to the responses API. Instructions provide a way to swap out system (or developer) messages in new responses. ## Test Plan unit tests added Signed-off-by: Derek Higgins --- docs/_static/llama-stack-spec.html | 3 + docs/_static/llama-stack-spec.yaml | 2 + llama_stack/apis/agents/agents.py | 1 + .../inline/agents/meta_reference/agents.py | 3 +- .../agents/meta_reference/openai_responses.py | 7 + .../meta_reference/test_openai_responses.py | 138 ++++++++++++++++++ 6 files changed, 153 insertions(+), 1 deletion(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 6378a5ced..6adfe9b2b 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -7027,6 +7027,9 @@ "type": "string", "description": "The underlying LLM used for completions." }, + "instructions": { + "type": "string" + }, "previous_response_id": { "type": "string", "description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses." diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 012610d02..31ca3f52a 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -4952,6 +4952,8 @@ components: model: type: string description: The underlying LLM used for completions. + instructions: + type: string previous_response_id: type: string description: >- diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index b2f85336c..8ecafdf26 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -596,6 +596,7 @@ class Agents(Protocol): self, input: str | list[OpenAIResponseInput], model: str, + instructions: str | None = None, previous_response_id: str | None = None, store: bool | None = True, stream: bool | None = False, diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 86780fd61..8f54cc737 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -313,6 +313,7 @@ class MetaReferenceAgentsImpl(Agents): self, input: str | list[OpenAIResponseInput], model: str, + instructions: str | None = None, previous_response_id: str | None = None, store: bool | None = True, stream: bool | None = False, @@ -320,5 +321,5 @@ class MetaReferenceAgentsImpl(Agents): tools: list[OpenAIResponseInputTool] | None = None, ) -> OpenAIResponseObject: return await self.openai_responses_impl.create_openai_response( - input, model, previous_response_id, store, stream, temperature, tools + input, model, instructions, previous_response_id, store, stream, temperature, tools ) diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py index 6d9d06109..f5b0d8c31 100644 --- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -208,6 +208,10 @@ class OpenAIResponsesImpl: return input + async def _prepend_instructions(self, messages, instructions): + if instructions: + messages.insert(0, OpenAISystemMessageParam(content=instructions)) + async def get_openai_response( self, id: str, @@ -219,6 +223,7 @@ class OpenAIResponsesImpl: self, input: str | list[OpenAIResponseInput], model: str, + instructions: str | None = None, previous_response_id: str | None = None, store: bool | None = True, stream: bool | None = False, @@ -229,7 +234,9 @@ class OpenAIResponsesImpl: input = await self._prepend_previous_response(input, previous_response_id) messages = await _convert_response_input_to_chat_messages(input) + await self._prepend_instructions(messages, instructions) chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None + chat_response = await self.inference_api.openai_chat_completion( model=model, messages=messages, diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index ed5f13a58..0a8d59306 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -384,3 +384,141 @@ async def test_prepend_previous_response_web_search(get_previous_response_with_i # Check for new input assert isinstance(input[3], OpenAIResponseMessage) assert input[3].content == "fake_input" + + +@pytest.mark.asyncio +async def test_create_openai_response_with_instructions(openai_responses_impl, mock_inference_api): + # Setup + input_text = "What is the capital of Ireland?" + model = "meta-llama/Llama-3.1-8B-Instruct" + instructions = "You are a geography expert. Provide concise answers." + + # Load the chat completion fixture + mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml") + mock_inference_api.openai_chat_completion.return_value = mock_chat_completion + + # Execute + await openai_responses_impl.create_openai_response( + input=input_text, + model=model, + instructions=instructions, + ) + + # Verify + mock_inference_api.openai_chat_completion.assert_called_once() + call_args = mock_inference_api.openai_chat_completion.call_args + sent_messages = call_args.kwargs["messages"] + + # Check that instructions were prepended as a system message + assert len(sent_messages) == 2 + assert sent_messages[0].role == "system" + assert sent_messages[0].content == instructions + assert sent_messages[1].role == "user" + assert sent_messages[1].content == input_text + + +@pytest.mark.asyncio +async def test_create_openai_response_with_instructions_and_multiple_messages( + openai_responses_impl, mock_inference_api +): + # Setup + input_messages = [ + OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None), + OpenAIResponseMessage( + role="assistant", + content="Galway, Longford, Sligo", + name=None, + ), + OpenAIResponseMessage(role="user", content="Which is the largest?", name=None), + ] + model = "meta-llama/Llama-3.1-8B-Instruct" + instructions = "You are a geography expert. Provide concise answers." + + mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml") + mock_inference_api.openai_chat_completion.return_value = mock_chat_completion + + # Execute + await openai_responses_impl.create_openai_response( + input=input_messages, + model=model, + instructions=instructions, + ) + + # Verify + mock_inference_api.openai_chat_completion.assert_called_once() + call_args = mock_inference_api.openai_chat_completion.call_args + sent_messages = call_args.kwargs["messages"] + + # Check that instructions were prepended as a system message + assert len(sent_messages) == 4 # 1 system + 3 input messages + assert sent_messages[0].role == "system" + assert sent_messages[0].content == instructions + + # Check the rest of the messages were converted correctly + assert sent_messages[1].role == "user" + assert sent_messages[1].content == "Name some towns in Ireland" + assert sent_messages[2].role == "assistant" + assert sent_messages[2].content == "Galway, Longford, Sligo" + assert sent_messages[3].role == "user" + assert sent_messages[3].content == "Which is the largest?" + + +@pytest.mark.asyncio +@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input") +async def test_create_openai_response_with_instructions_and_previous_response( + get_previous_response_with_input, openai_responses_impl, mock_inference_api +): + """Test prepending both instructions and previous response.""" + + input_item_message = OpenAIResponseMessage( + id="123", + content="Name some towns in Ireland", + role="user", + ) + input_items = OpenAIResponseInputItemList(data=[input_item_message]) + response_output_message = OpenAIResponseMessage( + id="123", + content="Galway, Longford, Sligo", + status="completed", + role="assistant", + ) + response = OpenAIResponseObject( + created_at=1, + id="resp_123", + model="fake_model", + output=[response_output_message], + status="completed", + ) + previous_response = OpenAIResponsePreviousResponseWithInputItems( + input_items=input_items, + response=response, + ) + get_previous_response_with_input.return_value = previous_response + + model = "meta-llama/Llama-3.1-8B-Instruct" + instructions = "You are a geography expert. Provide concise answers." + mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml") + mock_inference_api.openai_chat_completion.return_value = mock_chat_completion + + # Execute + await openai_responses_impl.create_openai_response( + input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123" + ) + + # Verify + mock_inference_api.openai_chat_completion.assert_called_once() + call_args = mock_inference_api.openai_chat_completion.call_args + sent_messages = call_args.kwargs["messages"] + + # Check that instructions were prepended as a system message + assert len(sent_messages) == 4 + assert sent_messages[0].role == "system" + assert sent_messages[0].content == instructions + + # Check the rest of the messages were converted correctly + assert sent_messages[1].role == "user" + assert sent_messages[1].content == "Name some towns in Ireland" + assert sent_messages[2].role == "assistant" + assert sent_messages[2].content == "Galway, Longford, Sligo" + assert sent_messages[3].role == "user" + assert sent_messages[3].content == "Which is the largest?"