feat: Add instructions parameter in response object (#3741)

# Problem
The current inline provider appends the user provided instructions to
messages as a system prompt, but the returned response object does not
contain the instructions field (as specified in the OpenAI responses
spec).

# What does this PR do?
This pull request adds the instruction field to the response object
definition and updates the inline provider. It also ensures that
instructions from previous response is not carried over to the next
response (as specified in the openAI spec).

Closes #[3566](https://github.com/llamastack/llama-stack/issues/3566)

## Test Plan

- Tested manually for change in model response w.r.t supplied
instructions field.
- Added unit test to check that the instructions from previous response
is not carried over to the next response.
- Added integration tests to check instructions parameter in the
returned response object.
- Added new recordings for the integration tests.

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Shabana Baig 2025-10-20 16:10:37 -04:00 committed by GitHub
parent 1f38359d95
commit add64e8e2a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
56 changed files with 10032 additions and 5816 deletions

View file

@ -814,6 +814,69 @@ async def test_create_openai_response_with_instructions_and_previous_response(
assert sent_messages[3].content == "Which is the largest?"
async def test_create_openai_response_with_previous_response_instructions(
openai_responses_impl, mock_responses_store, mock_inference_api
):
"""Test prepending instructions and previous response with instructions."""
input_item_message = OpenAIResponseMessage(
id="123",
content="Name some towns in Ireland",
role="user",
)
response_output_message = OpenAIResponseMessage(
id="123",
content="Galway, Longford, Sligo",
status="completed",
role="assistant",
)
response = _OpenAIResponseObjectWithInputAndMessages(
created_at=1,
id="resp_123",
model="fake_model",
output=[response_output_message],
status="completed",
text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
input=[input_item_message],
messages=[
OpenAIUserMessageParam(content="Name some towns in Ireland"),
OpenAIAssistantMessageParam(content="Galway, Longford, Sligo"),
],
instructions="You are a helpful assistant.",
)
mock_responses_store.get_response_object.return_value = response
model = "meta-llama/Llama-3.1-8B-Instruct"
instructions = "You are a geography expert. Provide concise answers."
mock_inference_api.openai_chat_completion.return_value = fake_stream()
# Execute
await openai_responses_impl.create_openai_response(
input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123"
)
# Verify
mock_inference_api.openai_chat_completion.assert_called_once()
call_args = mock_inference_api.openai_chat_completion.call_args
params = call_args.args[0]
sent_messages = params.messages
# Check that instructions were prepended as a system message
# and that the previous response instructions were not carried over
assert len(sent_messages) == 4, sent_messages
assert sent_messages[0].role == "system"
assert sent_messages[0].content == instructions
# Check the rest of the messages were converted correctly
assert sent_messages[1].role == "user"
assert sent_messages[1].content == "Name some towns in Ireland"
assert sent_messages[2].role == "assistant"
assert sent_messages[2].content == "Galway, Longford, Sligo"
assert sent_messages[3].role == "user"
assert sent_messages[3].content == "Which is the largest?"
async def test_list_openai_response_input_items_delegation(openai_responses_impl, mock_responses_store):
"""Test that list_openai_response_input_items properly delegates to responses_store with correct parameters."""
# Setup