mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
fix: responses <> chat completion input conversion (#3645)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 2s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 1s
Python Package Build Test / build (3.12) (push) Failing after 2s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 5s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
API Conformance Tests / check-schema-compatibility (push) Successful in 10s
Vector IO Integration Tests / test-matrix (push) Failing after 5s
Python Package Build Test / build (3.13) (push) Failing after 3s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 9s
Test External API and Providers / test-external (venv) (push) Failing after 6s
Unit Tests / unit-tests (3.12) (push) Failing after 5s
Unit Tests / unit-tests (3.13) (push) Failing after 6s
UI Tests / ui-tests (22) (push) Successful in 33s
Pre-commit / pre-commit (push) Successful in 1m27s
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 2s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 1s
Python Package Build Test / build (3.12) (push) Failing after 2s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 5s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
API Conformance Tests / check-schema-compatibility (push) Successful in 10s
Vector IO Integration Tests / test-matrix (push) Failing after 5s
Python Package Build Test / build (3.13) (push) Failing after 3s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 9s
Test External API and Providers / test-external (venv) (push) Failing after 6s
Unit Tests / unit-tests (3.12) (push) Failing after 5s
Unit Tests / unit-tests (3.13) (push) Failing after 6s
UI Tests / ui-tests (22) (push) Successful in 33s
Pre-commit / pre-commit (push) Successful in 1m27s
# What does this PR do? closes #3268 closes #3498 When resuming from previous response ID, currently we attempt to convert from the stored responses input to chat completion messages, which is not always possible, e.g. for tool calls where some data is lost once converted from chat completion message to repsonses input format. This PR stores the chat completion messages that correspond to the _last_ call to chat completion, which is sufficient to be resumed from in the next responses API call, where we load these saved messages and skip conversion entirely. Separate issue to optimize storage: https://github.com/llamastack/llama-stack/issues/3646 ## Test Plan existing CI tests
This commit is contained in:
parent
ef0736527d
commit
14a94e9894
7 changed files with 202 additions and 58 deletions
|
@ -22,7 +22,6 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseInputToolFunction,
|
||||
OpenAIResponseInputToolWebSearch,
|
||||
OpenAIResponseMessage,
|
||||
OpenAIResponseObjectWithInput,
|
||||
OpenAIResponseOutputMessageContentOutputText,
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
|
@ -45,7 +44,10 @@ from llama_stack.core.datatypes import ResponsesStoreConfig
|
|||
from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
|
||||
OpenAIResponsesImpl,
|
||||
)
|
||||
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
||||
from llama_stack.providers.utils.responses.responses_store import (
|
||||
ResponsesStore,
|
||||
_OpenAIResponseObjectWithInputAndMessages,
|
||||
)
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
||||
from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
|
||||
|
||||
|
@ -499,13 +501,6 @@ async def test_create_openai_response_with_multiple_messages(openai_responses_im
|
|||
assert isinstance(inference_messages[i], OpenAIDeveloperMessageParam)
|
||||
|
||||
|
||||
async def test_prepend_previous_response_none(openai_responses_impl):
|
||||
"""Test prepending no previous response to a new response."""
|
||||
|
||||
input = await openai_responses_impl._prepend_previous_response("fake_input", None)
|
||||
assert input == "fake_input"
|
||||
|
||||
|
||||
async def test_prepend_previous_response_basic(openai_responses_impl, mock_responses_store):
|
||||
"""Test prepending a basic previous response to a new response."""
|
||||
|
||||
|
@ -520,7 +515,7 @@ async def test_prepend_previous_response_basic(openai_responses_impl, mock_respo
|
|||
status="completed",
|
||||
role="assistant",
|
||||
)
|
||||
previous_response = OpenAIResponseObjectWithInput(
|
||||
previous_response = _OpenAIResponseObjectWithInputAndMessages(
|
||||
created_at=1,
|
||||
id="resp_123",
|
||||
model="fake_model",
|
||||
|
@ -528,10 +523,11 @@ async def test_prepend_previous_response_basic(openai_responses_impl, mock_respo
|
|||
status="completed",
|
||||
text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
|
||||
input=[input_item_message],
|
||||
messages=[OpenAIUserMessageParam(content="fake_previous_input")],
|
||||
)
|
||||
mock_responses_store.get_response_object.return_value = previous_response
|
||||
|
||||
input = await openai_responses_impl._prepend_previous_response("fake_input", "resp_123")
|
||||
input = await openai_responses_impl._prepend_previous_response("fake_input", previous_response)
|
||||
|
||||
assert len(input) == 3
|
||||
# Check for previous input
|
||||
|
@ -562,7 +558,7 @@ async def test_prepend_previous_response_web_search(openai_responses_impl, mock_
|
|||
status="completed",
|
||||
role="assistant",
|
||||
)
|
||||
response = OpenAIResponseObjectWithInput(
|
||||
response = _OpenAIResponseObjectWithInputAndMessages(
|
||||
created_at=1,
|
||||
id="resp_123",
|
||||
model="fake_model",
|
||||
|
@ -570,11 +566,12 @@ async def test_prepend_previous_response_web_search(openai_responses_impl, mock_
|
|||
status="completed",
|
||||
text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
|
||||
input=[input_item_message],
|
||||
messages=[OpenAIUserMessageParam(content="test input")],
|
||||
)
|
||||
mock_responses_store.get_response_object.return_value = response
|
||||
|
||||
input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
|
||||
input = await openai_responses_impl._prepend_previous_response(input_messages, "resp_123")
|
||||
input = await openai_responses_impl._prepend_previous_response(input_messages, response)
|
||||
|
||||
assert len(input) == 4
|
||||
# Check for previous input
|
||||
|
@ -609,7 +606,7 @@ async def test_prepend_previous_response_mcp_tool_call(openai_responses_impl, mo
|
|||
status="completed",
|
||||
role="assistant",
|
||||
)
|
||||
response = OpenAIResponseObjectWithInput(
|
||||
response = _OpenAIResponseObjectWithInputAndMessages(
|
||||
created_at=1,
|
||||
id="resp_123",
|
||||
model="fake_model",
|
||||
|
@ -617,11 +614,12 @@ async def test_prepend_previous_response_mcp_tool_call(openai_responses_impl, mo
|
|||
status="completed",
|
||||
text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
|
||||
input=[input_item_message],
|
||||
messages=[OpenAIUserMessageParam(content="test input")],
|
||||
)
|
||||
mock_responses_store.get_response_object.return_value = response
|
||||
|
||||
input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
|
||||
input = await openai_responses_impl._prepend_previous_response(input_messages, "resp_123")
|
||||
input = await openai_responses_impl._prepend_previous_response(input_messages, response)
|
||||
|
||||
assert len(input) == 4
|
||||
# Check for previous input
|
||||
|
@ -725,7 +723,7 @@ async def test_create_openai_response_with_instructions_and_previous_response(
|
|||
status="completed",
|
||||
role="assistant",
|
||||
)
|
||||
response = OpenAIResponseObjectWithInput(
|
||||
response = _OpenAIResponseObjectWithInputAndMessages(
|
||||
created_at=1,
|
||||
id="resp_123",
|
||||
model="fake_model",
|
||||
|
@ -733,6 +731,10 @@ async def test_create_openai_response_with_instructions_and_previous_response(
|
|||
status="completed",
|
||||
text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
|
||||
input=[input_item_message],
|
||||
messages=[
|
||||
OpenAIUserMessageParam(content="Name some towns in Ireland"),
|
||||
OpenAIAssistantMessageParam(content="Galway, Longford, Sligo"),
|
||||
],
|
||||
)
|
||||
mock_responses_store.get_response_object.return_value = response
|
||||
|
||||
|
@ -818,7 +820,7 @@ async def test_responses_store_list_input_items_logic():
|
|||
OpenAIResponseMessage(id="msg_4", content="Fourth message", role="user"),
|
||||
]
|
||||
|
||||
response_with_input = OpenAIResponseObjectWithInput(
|
||||
response_with_input = _OpenAIResponseObjectWithInputAndMessages(
|
||||
id="resp_123",
|
||||
model="test_model",
|
||||
created_at=1234567890,
|
||||
|
@ -827,6 +829,7 @@ async def test_responses_store_list_input_items_logic():
|
|||
output=[],
|
||||
text=OpenAIResponseText(format=(OpenAIResponseTextFormat(type="text"))),
|
||||
input=input_items,
|
||||
messages=[OpenAIUserMessageParam(content="First message")],
|
||||
)
|
||||
|
||||
# Mock the get_response_object method to return our test data
|
||||
|
@ -887,7 +890,7 @@ async def test_store_response_uses_rehydrated_input_with_previous_response(
|
|||
rather than just the original input when previous_response_id is provided."""
|
||||
|
||||
# Setup - Create a previous response that should be included in the stored input
|
||||
previous_response = OpenAIResponseObjectWithInput(
|
||||
previous_response = _OpenAIResponseObjectWithInputAndMessages(
|
||||
id="resp-previous-123",
|
||||
object="response",
|
||||
created_at=1234567890,
|
||||
|
@ -906,6 +909,10 @@ async def test_store_response_uses_rehydrated_input_with_previous_response(
|
|||
content=[OpenAIResponseOutputMessageContentOutputText(text="2+2 equals 4.")],
|
||||
)
|
||||
],
|
||||
messages=[
|
||||
OpenAIUserMessageParam(content="What is 2+2?"),
|
||||
OpenAIAssistantMessageParam(content="2+2 equals 4."),
|
||||
],
|
||||
)
|
||||
|
||||
mock_responses_store.get_response_object.return_value = previous_response
|
||||
|
|
|
@ -14,6 +14,7 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseInput,
|
||||
OpenAIResponseObject,
|
||||
)
|
||||
from llama_stack.apis.inference import OpenAIMessageParam, OpenAIUserMessageParam
|
||||
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
||||
|
||||
|
@ -44,6 +45,11 @@ def create_test_response_input(content: str, input_id: str) -> OpenAIResponseInp
|
|||
)
|
||||
|
||||
|
||||
def create_test_messages(content: str) -> list[OpenAIMessageParam]:
|
||||
"""Helper to create test messages for chat completion."""
|
||||
return [OpenAIUserMessageParam(content=content)]
|
||||
|
||||
|
||||
async def test_responses_store_pagination_basic():
|
||||
"""Test basic pagination functionality for responses store."""
|
||||
with TemporaryDirectory() as tmp_dir:
|
||||
|
@ -65,7 +71,8 @@ async def test_responses_store_pagination_basic():
|
|||
for response_id, timestamp in test_data:
|
||||
response = create_test_response_object(response_id, timestamp)
|
||||
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
|
||||
await store.store_response_object(response, input_list)
|
||||
messages = create_test_messages(f"Input for {response_id}")
|
||||
await store.store_response_object(response, input_list, messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
@ -111,7 +118,8 @@ async def test_responses_store_pagination_ascending():
|
|||
for response_id, timestamp in test_data:
|
||||
response = create_test_response_object(response_id, timestamp)
|
||||
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
|
||||
await store.store_response_object(response, input_list)
|
||||
messages = create_test_messages(f"Input for {response_id}")
|
||||
await store.store_response_object(response, input_list, messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
@ -149,7 +157,8 @@ async def test_responses_store_pagination_with_model_filter():
|
|||
for response_id, timestamp, model in test_data:
|
||||
response = create_test_response_object(response_id, timestamp, model)
|
||||
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
|
||||
await store.store_response_object(response, input_list)
|
||||
messages = create_test_messages(f"Input for {response_id}")
|
||||
await store.store_response_object(response, input_list, messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
@ -199,7 +208,8 @@ async def test_responses_store_pagination_no_limit():
|
|||
for response_id, timestamp in test_data:
|
||||
response = create_test_response_object(response_id, timestamp)
|
||||
input_list = [create_test_response_input(f"Input for {response_id}", f"input-{response_id}")]
|
||||
await store.store_response_object(response, input_list)
|
||||
messages = create_test_messages(f"Input for {response_id}")
|
||||
await store.store_response_object(response, input_list, messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
@ -222,7 +232,8 @@ async def test_responses_store_get_response_object():
|
|||
# Store a test response
|
||||
response = create_test_response_object("test-resp", int(time.time()))
|
||||
input_list = [create_test_response_input("Test input content", "input-test-resp")]
|
||||
await store.store_response_object(response, input_list)
|
||||
messages = create_test_messages("Test input content")
|
||||
await store.store_response_object(response, input_list, messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
@ -255,7 +266,8 @@ async def test_responses_store_input_items_pagination():
|
|||
create_test_response_input("Fourth input", "input-4"),
|
||||
create_test_response_input("Fifth input", "input-5"),
|
||||
]
|
||||
await store.store_response_object(response, input_list)
|
||||
messages = create_test_messages("First input")
|
||||
await store.store_response_object(response, input_list, messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
@ -335,7 +347,8 @@ async def test_responses_store_input_items_before_pagination():
|
|||
create_test_response_input("Fourth input", "before-4"),
|
||||
create_test_response_input("Fifth input", "before-5"),
|
||||
]
|
||||
await store.store_response_object(response, input_list)
|
||||
messages = create_test_messages("First input")
|
||||
await store.store_response_object(response, input_list, messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue