mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
feat: Add "instructions" support to responses API (#2205)
# What does this PR do? Add support for "instructions" to the responses API. Instructions provide a way to swap out system (or developer) messages in new responses. ## Test Plan unit tests added Signed-off-by: Derek Higgins <derekh@redhat.com>
This commit is contained in:
parent
1a770cf8ac
commit
3339844fda
6 changed files with 153 additions and 1 deletions
3
docs/_static/llama-stack-spec.html
vendored
3
docs/_static/llama-stack-spec.html
vendored
|
@ -7027,6 +7027,9 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The underlying LLM used for completions."
|
"description": "The underlying LLM used for completions."
|
||||||
},
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
"previous_response_id": {
|
"previous_response_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
|
"description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
|
||||||
|
|
2
docs/_static/llama-stack-spec.yaml
vendored
2
docs/_static/llama-stack-spec.yaml
vendored
|
@ -4952,6 +4952,8 @@ components:
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
description: The underlying LLM used for completions.
|
description: The underlying LLM used for completions.
|
||||||
|
instructions:
|
||||||
|
type: string
|
||||||
previous_response_id:
|
previous_response_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
|
|
@ -596,6 +596,7 @@ class Agents(Protocol):
|
||||||
self,
|
self,
|
||||||
input: str | list[OpenAIResponseInput],
|
input: str | list[OpenAIResponseInput],
|
||||||
model: str,
|
model: str,
|
||||||
|
instructions: str | None = None,
|
||||||
previous_response_id: str | None = None,
|
previous_response_id: str | None = None,
|
||||||
store: bool | None = True,
|
store: bool | None = True,
|
||||||
stream: bool | None = False,
|
stream: bool | None = False,
|
||||||
|
|
|
@ -313,6 +313,7 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
self,
|
self,
|
||||||
input: str | list[OpenAIResponseInput],
|
input: str | list[OpenAIResponseInput],
|
||||||
model: str,
|
model: str,
|
||||||
|
instructions: str | None = None,
|
||||||
previous_response_id: str | None = None,
|
previous_response_id: str | None = None,
|
||||||
store: bool | None = True,
|
store: bool | None = True,
|
||||||
stream: bool | None = False,
|
stream: bool | None = False,
|
||||||
|
@ -320,5 +321,5 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
tools: list[OpenAIResponseInputTool] | None = None,
|
tools: list[OpenAIResponseInputTool] | None = None,
|
||||||
) -> OpenAIResponseObject:
|
) -> OpenAIResponseObject:
|
||||||
return await self.openai_responses_impl.create_openai_response(
|
return await self.openai_responses_impl.create_openai_response(
|
||||||
input, model, previous_response_id, store, stream, temperature, tools
|
input, model, instructions, previous_response_id, store, stream, temperature, tools
|
||||||
)
|
)
|
||||||
|
|
|
@ -208,6 +208,10 @@ class OpenAIResponsesImpl:
|
||||||
|
|
||||||
return input
|
return input
|
||||||
|
|
||||||
|
async def _prepend_instructions(self, messages, instructions):
|
||||||
|
if instructions:
|
||||||
|
messages.insert(0, OpenAISystemMessageParam(content=instructions))
|
||||||
|
|
||||||
async def get_openai_response(
|
async def get_openai_response(
|
||||||
self,
|
self,
|
||||||
id: str,
|
id: str,
|
||||||
|
@ -219,6 +223,7 @@ class OpenAIResponsesImpl:
|
||||||
self,
|
self,
|
||||||
input: str | list[OpenAIResponseInput],
|
input: str | list[OpenAIResponseInput],
|
||||||
model: str,
|
model: str,
|
||||||
|
instructions: str | None = None,
|
||||||
previous_response_id: str | None = None,
|
previous_response_id: str | None = None,
|
||||||
store: bool | None = True,
|
store: bool | None = True,
|
||||||
stream: bool | None = False,
|
stream: bool | None = False,
|
||||||
|
@ -229,7 +234,9 @@ class OpenAIResponsesImpl:
|
||||||
|
|
||||||
input = await self._prepend_previous_response(input, previous_response_id)
|
input = await self._prepend_previous_response(input, previous_response_id)
|
||||||
messages = await _convert_response_input_to_chat_messages(input)
|
messages = await _convert_response_input_to_chat_messages(input)
|
||||||
|
await self._prepend_instructions(messages, instructions)
|
||||||
chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None
|
chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None
|
||||||
|
|
||||||
chat_response = await self.inference_api.openai_chat_completion(
|
chat_response = await self.inference_api.openai_chat_completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
|
|
@ -384,3 +384,141 @@ async def test_prepend_previous_response_web_search(get_previous_response_with_i
|
||||||
# Check for new input
|
# Check for new input
|
||||||
assert isinstance(input[3], OpenAIResponseMessage)
|
assert isinstance(input[3], OpenAIResponseMessage)
|
||||||
assert input[3].content == "fake_input"
|
assert input[3].content == "fake_input"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_openai_response_with_instructions(openai_responses_impl, mock_inference_api):
|
||||||
|
# Setup
|
||||||
|
input_text = "What is the capital of Ireland?"
|
||||||
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
||||||
|
instructions = "You are a geography expert. Provide concise answers."
|
||||||
|
|
||||||
|
# Load the chat completion fixture
|
||||||
|
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
||||||
|
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
await openai_responses_impl.create_openai_response(
|
||||||
|
input=input_text,
|
||||||
|
model=model,
|
||||||
|
instructions=instructions,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
mock_inference_api.openai_chat_completion.assert_called_once()
|
||||||
|
call_args = mock_inference_api.openai_chat_completion.call_args
|
||||||
|
sent_messages = call_args.kwargs["messages"]
|
||||||
|
|
||||||
|
# Check that instructions were prepended as a system message
|
||||||
|
assert len(sent_messages) == 2
|
||||||
|
assert sent_messages[0].role == "system"
|
||||||
|
assert sent_messages[0].content == instructions
|
||||||
|
assert sent_messages[1].role == "user"
|
||||||
|
assert sent_messages[1].content == input_text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_openai_response_with_instructions_and_multiple_messages(
|
||||||
|
openai_responses_impl, mock_inference_api
|
||||||
|
):
|
||||||
|
# Setup
|
||||||
|
input_messages = [
|
||||||
|
OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
|
||||||
|
OpenAIResponseMessage(
|
||||||
|
role="assistant",
|
||||||
|
content="Galway, Longford, Sligo",
|
||||||
|
name=None,
|
||||||
|
),
|
||||||
|
OpenAIResponseMessage(role="user", content="Which is the largest?", name=None),
|
||||||
|
]
|
||||||
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
||||||
|
instructions = "You are a geography expert. Provide concise answers."
|
||||||
|
|
||||||
|
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
||||||
|
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
await openai_responses_impl.create_openai_response(
|
||||||
|
input=input_messages,
|
||||||
|
model=model,
|
||||||
|
instructions=instructions,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
mock_inference_api.openai_chat_completion.assert_called_once()
|
||||||
|
call_args = mock_inference_api.openai_chat_completion.call_args
|
||||||
|
sent_messages = call_args.kwargs["messages"]
|
||||||
|
|
||||||
|
# Check that instructions were prepended as a system message
|
||||||
|
assert len(sent_messages) == 4 # 1 system + 3 input messages
|
||||||
|
assert sent_messages[0].role == "system"
|
||||||
|
assert sent_messages[0].content == instructions
|
||||||
|
|
||||||
|
# Check the rest of the messages were converted correctly
|
||||||
|
assert sent_messages[1].role == "user"
|
||||||
|
assert sent_messages[1].content == "Name some towns in Ireland"
|
||||||
|
assert sent_messages[2].role == "assistant"
|
||||||
|
assert sent_messages[2].content == "Galway, Longford, Sligo"
|
||||||
|
assert sent_messages[3].role == "user"
|
||||||
|
assert sent_messages[3].content == "Which is the largest?"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
|
||||||
|
async def test_create_openai_response_with_instructions_and_previous_response(
|
||||||
|
get_previous_response_with_input, openai_responses_impl, mock_inference_api
|
||||||
|
):
|
||||||
|
"""Test prepending both instructions and previous response."""
|
||||||
|
|
||||||
|
input_item_message = OpenAIResponseMessage(
|
||||||
|
id="123",
|
||||||
|
content="Name some towns in Ireland",
|
||||||
|
role="user",
|
||||||
|
)
|
||||||
|
input_items = OpenAIResponseInputItemList(data=[input_item_message])
|
||||||
|
response_output_message = OpenAIResponseMessage(
|
||||||
|
id="123",
|
||||||
|
content="Galway, Longford, Sligo",
|
||||||
|
status="completed",
|
||||||
|
role="assistant",
|
||||||
|
)
|
||||||
|
response = OpenAIResponseObject(
|
||||||
|
created_at=1,
|
||||||
|
id="resp_123",
|
||||||
|
model="fake_model",
|
||||||
|
output=[response_output_message],
|
||||||
|
status="completed",
|
||||||
|
)
|
||||||
|
previous_response = OpenAIResponsePreviousResponseWithInputItems(
|
||||||
|
input_items=input_items,
|
||||||
|
response=response,
|
||||||
|
)
|
||||||
|
get_previous_response_with_input.return_value = previous_response
|
||||||
|
|
||||||
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
||||||
|
instructions = "You are a geography expert. Provide concise answers."
|
||||||
|
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
||||||
|
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
|
||||||
|
|
||||||
|
# Execute
|
||||||
|
await openai_responses_impl.create_openai_response(
|
||||||
|
input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
mock_inference_api.openai_chat_completion.assert_called_once()
|
||||||
|
call_args = mock_inference_api.openai_chat_completion.call_args
|
||||||
|
sent_messages = call_args.kwargs["messages"]
|
||||||
|
|
||||||
|
# Check that instructions were prepended as a system message
|
||||||
|
assert len(sent_messages) == 4
|
||||||
|
assert sent_messages[0].role == "system"
|
||||||
|
assert sent_messages[0].content == instructions
|
||||||
|
|
||||||
|
# Check the rest of the messages were converted correctly
|
||||||
|
assert sent_messages[1].role == "user"
|
||||||
|
assert sent_messages[1].content == "Name some towns in Ireland"
|
||||||
|
assert sent_messages[2].role == "assistant"
|
||||||
|
assert sent_messages[2].content == "Galway, Longford, Sligo"
|
||||||
|
assert sent_messages[3].role == "user"
|
||||||
|
assert sent_messages[3].content == "Which is the largest?"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue