feat: Add "instructions" support to responses API (#2205)

# What does this PR do?
Add support for "instructions" to the responses API. Instructions
provide a way to swap out system (or developer) messages in new
responses.


## Test Plan
unit tests added

Signed-off-by: Derek Higgins <derekh@redhat.com>
This commit is contained in:
Derek Higgins 2025-05-20 17:52:10 +01:00 committed by GitHub
parent 1a770cf8ac
commit 3339844fda
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 153 additions and 1 deletions

View file

@ -384,3 +384,141 @@ async def test_prepend_previous_response_web_search(get_previous_response_with_i
# Check for new input
assert isinstance(input[3], OpenAIResponseMessage)
assert input[3].content == "fake_input"
@pytest.mark.asyncio
async def test_create_openai_response_with_instructions(openai_responses_impl, mock_inference_api):
# Setup
input_text = "What is the capital of Ireland?"
model = "meta-llama/Llama-3.1-8B-Instruct"
instructions = "You are a geography expert. Provide concise answers."
# Load the chat completion fixture
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
# Execute
await openai_responses_impl.create_openai_response(
input=input_text,
model=model,
instructions=instructions,
)
# Verify
mock_inference_api.openai_chat_completion.assert_called_once()
call_args = mock_inference_api.openai_chat_completion.call_args
sent_messages = call_args.kwargs["messages"]
# Check that instructions were prepended as a system message
assert len(sent_messages) == 2
assert sent_messages[0].role == "system"
assert sent_messages[0].content == instructions
assert sent_messages[1].role == "user"
assert sent_messages[1].content == input_text
@pytest.mark.asyncio
async def test_create_openai_response_with_instructions_and_multiple_messages(
openai_responses_impl, mock_inference_api
):
# Setup
input_messages = [
OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
OpenAIResponseMessage(
role="assistant",
content="Galway, Longford, Sligo",
name=None,
),
OpenAIResponseMessage(role="user", content="Which is the largest?", name=None),
]
model = "meta-llama/Llama-3.1-8B-Instruct"
instructions = "You are a geography expert. Provide concise answers."
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
# Execute
await openai_responses_impl.create_openai_response(
input=input_messages,
model=model,
instructions=instructions,
)
# Verify
mock_inference_api.openai_chat_completion.assert_called_once()
call_args = mock_inference_api.openai_chat_completion.call_args
sent_messages = call_args.kwargs["messages"]
# Check that instructions were prepended as a system message
assert len(sent_messages) == 4 # 1 system + 3 input messages
assert sent_messages[0].role == "system"
assert sent_messages[0].content == instructions
# Check the rest of the messages were converted correctly
assert sent_messages[1].role == "user"
assert sent_messages[1].content == "Name some towns in Ireland"
assert sent_messages[2].role == "assistant"
assert sent_messages[2].content == "Galway, Longford, Sligo"
assert sent_messages[3].role == "user"
assert sent_messages[3].content == "Which is the largest?"
@pytest.mark.asyncio
@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
async def test_create_openai_response_with_instructions_and_previous_response(
get_previous_response_with_input, openai_responses_impl, mock_inference_api
):
"""Test prepending both instructions and previous response."""
input_item_message = OpenAIResponseMessage(
id="123",
content="Name some towns in Ireland",
role="user",
)
input_items = OpenAIResponseInputItemList(data=[input_item_message])
response_output_message = OpenAIResponseMessage(
id="123",
content="Galway, Longford, Sligo",
status="completed",
role="assistant",
)
response = OpenAIResponseObject(
created_at=1,
id="resp_123",
model="fake_model",
output=[response_output_message],
status="completed",
)
previous_response = OpenAIResponsePreviousResponseWithInputItems(
input_items=input_items,
response=response,
)
get_previous_response_with_input.return_value = previous_response
model = "meta-llama/Llama-3.1-8B-Instruct"
instructions = "You are a geography expert. Provide concise answers."
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
# Execute
await openai_responses_impl.create_openai_response(
input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123"
)
# Verify
mock_inference_api.openai_chat_completion.assert_called_once()
call_args = mock_inference_api.openai_chat_completion.call_args
sent_messages = call_args.kwargs["messages"]
# Check that instructions were prepended as a system message
assert len(sent_messages) == 4
assert sent_messages[0].role == "system"
assert sent_messages[0].content == instructions
# Check the rest of the messages were converted correctly
assert sent_messages[1].role == "user"
assert sent_messages[1].content == "Name some towns in Ireland"
assert sent_messages[2].role == "assistant"
assert sent_messages[2].content == "Galway, Longford, Sligo"
assert sent_messages[3].role == "user"
assert sent_messages[3].content == "Which is the largest?"