forked from phoenix-oss/llama-stack-mirror
We must store the full (re-hydrated) input not just the original input in the Response object. Of course, this is not very space efficient and we should likely find a better storage scheme so that we can only store unique entries in the database and then re-hydrate them efficiently later. But that can be done safely later. Closes https://github.com/meta-llama/llama-stack/issues/2299 ## Test Plan Unit test
696 lines
26 KiB
Python
696 lines
26 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
from openai.types.chat.chat_completion_chunk import (
|
|
ChatCompletionChunk,
|
|
Choice,
|
|
ChoiceDelta,
|
|
ChoiceDeltaToolCall,
|
|
ChoiceDeltaToolCallFunction,
|
|
)
|
|
|
|
from llama_stack.apis.agents import Order
|
|
from llama_stack.apis.agents.openai_responses import (
|
|
ListOpenAIResponseInputItem,
|
|
OpenAIResponseInputMessageContentText,
|
|
OpenAIResponseInputToolFunction,
|
|
OpenAIResponseInputToolWebSearch,
|
|
OpenAIResponseMessage,
|
|
OpenAIResponseObjectWithInput,
|
|
OpenAIResponseOutputMessageContentOutputText,
|
|
OpenAIResponseOutputMessageWebSearchToolCall,
|
|
)
|
|
from llama_stack.apis.inference.inference import (
|
|
OpenAIAssistantMessageParam,
|
|
OpenAIChatCompletionContentPartTextParam,
|
|
OpenAIDeveloperMessageParam,
|
|
OpenAIUserMessageParam,
|
|
)
|
|
from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
|
|
from llama_stack.providers.inline.agents.meta_reference.openai_responses import (
|
|
OpenAIResponsesImpl,
|
|
)
|
|
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
|
from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_inference_api():
|
|
inference_api = AsyncMock()
|
|
return inference_api
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_tool_groups_api():
|
|
tool_groups_api = AsyncMock(spec=ToolGroups)
|
|
return tool_groups_api
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_tool_runtime_api():
|
|
tool_runtime_api = AsyncMock(spec=ToolRuntime)
|
|
return tool_runtime_api
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_responses_store():
|
|
responses_store = AsyncMock(spec=ResponsesStore)
|
|
return responses_store
|
|
|
|
|
|
@pytest.fixture
|
|
def openai_responses_impl(mock_inference_api, mock_tool_groups_api, mock_tool_runtime_api, mock_responses_store):
|
|
return OpenAIResponsesImpl(
|
|
inference_api=mock_inference_api,
|
|
tool_groups_api=mock_tool_groups_api,
|
|
tool_runtime_api=mock_tool_runtime_api,
|
|
responses_store=mock_responses_store,
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_create_openai_response_with_string_input(openai_responses_impl, mock_inference_api):
|
|
"""Test creating an OpenAI response with a simple string input."""
|
|
# Setup
|
|
input_text = "What is the capital of Ireland?"
|
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
|
|
|
# Load the chat completion fixture
|
|
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
|
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
|
|
|
|
# Execute
|
|
result = await openai_responses_impl.create_openai_response(
|
|
input=input_text,
|
|
model=model,
|
|
temperature=0.1,
|
|
)
|
|
|
|
# Verify
|
|
mock_inference_api.openai_chat_completion.assert_called_once_with(
|
|
model=model,
|
|
messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
|
|
tools=None,
|
|
stream=False,
|
|
temperature=0.1,
|
|
)
|
|
openai_responses_impl.responses_store.store_response_object.assert_called_once()
|
|
assert result.model == model
|
|
assert len(result.output) == 1
|
|
assert isinstance(result.output[0], OpenAIResponseMessage)
|
|
assert result.output[0].content[0].text == "Dublin"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_create_openai_response_with_string_input_with_tools(openai_responses_impl, mock_inference_api):
|
|
"""Test creating an OpenAI response with a simple string input and tools."""
|
|
# Setup
|
|
input_text = "What is the capital of Ireland?"
|
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
|
|
|
# Load the chat completion fixtures
|
|
tool_call_completion = load_chat_completion_fixture("tool_call_completion.yaml")
|
|
tool_response_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
|
|
|
mock_inference_api.openai_chat_completion.side_effect = [
|
|
tool_call_completion,
|
|
tool_response_completion,
|
|
]
|
|
|
|
openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
|
|
identifier="web_search",
|
|
provider_id="client",
|
|
toolgroup_id="web_search",
|
|
tool_host="client",
|
|
description="Search the web for information",
|
|
parameters=[
|
|
ToolParameter(name="query", parameter_type="string", description="The query to search for", required=True)
|
|
],
|
|
)
|
|
|
|
openai_responses_impl.tool_runtime_api.invoke_tool.return_value = ToolInvocationResult(
|
|
status="completed",
|
|
content="Dublin",
|
|
)
|
|
|
|
# Execute
|
|
result = await openai_responses_impl.create_openai_response(
|
|
input=input_text,
|
|
model=model,
|
|
temperature=0.1,
|
|
tools=[
|
|
OpenAIResponseInputToolWebSearch(
|
|
name="web_search",
|
|
)
|
|
],
|
|
)
|
|
|
|
# Verify
|
|
first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
|
|
assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
|
|
assert first_call.kwargs["tools"] is not None
|
|
assert first_call.kwargs["temperature"] == 0.1
|
|
|
|
second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
|
|
assert second_call.kwargs["messages"][-1].content == "Dublin"
|
|
assert second_call.kwargs["temperature"] == 0.1
|
|
|
|
openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
|
|
openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
|
|
tool_name="web_search",
|
|
kwargs={"query": "What is the capital of Ireland?"},
|
|
)
|
|
|
|
openai_responses_impl.responses_store.store_response_object.assert_called_once()
|
|
|
|
# Check that we got the content from our mocked tool execution result
|
|
assert len(result.output) >= 1
|
|
assert isinstance(result.output[1], OpenAIResponseMessage)
|
|
assert result.output[1].content[0].text == "Dublin"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_create_openai_response_with_tool_call_type_none(openai_responses_impl, mock_inference_api):
|
|
"""Test creating an OpenAI response with a tool call response that has a type of None."""
|
|
# Setup
|
|
input_text = "How hot it is in San Francisco today?"
|
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
|
|
|
async def fake_stream():
|
|
yield ChatCompletionChunk(
|
|
id="123",
|
|
choices=[
|
|
Choice(
|
|
index=0,
|
|
delta=ChoiceDelta(
|
|
tool_calls=[
|
|
ChoiceDeltaToolCall(
|
|
index=0,
|
|
id="tc_123",
|
|
function=ChoiceDeltaToolCallFunction(name="get_weather", arguments="{}"),
|
|
type=None,
|
|
)
|
|
]
|
|
),
|
|
),
|
|
],
|
|
created=1,
|
|
model=model,
|
|
object="chat.completion.chunk",
|
|
)
|
|
|
|
mock_inference_api.openai_chat_completion.return_value = fake_stream()
|
|
|
|
# Execute
|
|
result = await openai_responses_impl.create_openai_response(
|
|
input=input_text,
|
|
model=model,
|
|
stream=True,
|
|
temperature=0.1,
|
|
tools=[
|
|
OpenAIResponseInputToolFunction(
|
|
name="get_weather",
|
|
description="Get current temperature for a given location.",
|
|
parameters={
|
|
"location": "string",
|
|
},
|
|
)
|
|
],
|
|
)
|
|
|
|
# Verify
|
|
first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
|
|
assert first_call.kwargs["messages"][0].content == input_text
|
|
assert first_call.kwargs["tools"] is not None
|
|
assert first_call.kwargs["temperature"] == 0.1
|
|
|
|
# Check that we got the content from our mocked tool execution result
|
|
chunks = [chunk async for chunk in result]
|
|
assert len(chunks) == 2 # Should have response.created and response.completed
|
|
|
|
# Check response.created event (should have empty output)
|
|
assert chunks[0].type == "response.created"
|
|
assert len(chunks[0].response.output) == 0
|
|
|
|
# Check response.completed event (should have the tool call)
|
|
assert chunks[1].type == "response.completed"
|
|
assert len(chunks[1].response.output) == 1
|
|
assert chunks[1].response.output[0].type == "function_call"
|
|
assert chunks[1].response.output[0].name == "get_weather"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
|
|
"""Test creating an OpenAI response with multiple messages."""
|
|
# Setup
|
|
input_messages = [
|
|
OpenAIResponseMessage(role="developer", content="You are a helpful assistant", name=None),
|
|
OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
|
|
OpenAIResponseMessage(
|
|
role="assistant",
|
|
content=[
|
|
OpenAIResponseInputMessageContentText(text="Galway, Longford, Sligo"),
|
|
OpenAIResponseInputMessageContentText(text="Dublin"),
|
|
],
|
|
name=None,
|
|
),
|
|
OpenAIResponseMessage(role="user", content="Which is the largest town in Ireland?", name=None),
|
|
]
|
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
|
|
|
mock_inference_api.openai_chat_completion.return_value = load_chat_completion_fixture("simple_chat_completion.yaml")
|
|
|
|
# Execute
|
|
await openai_responses_impl.create_openai_response(
|
|
input=input_messages,
|
|
model=model,
|
|
temperature=0.1,
|
|
)
|
|
|
|
# Verify the the correct messages were sent to the inference API i.e.
|
|
# All of the responses message were convered to the chat completion message objects
|
|
inference_messages = mock_inference_api.openai_chat_completion.call_args_list[0].kwargs["messages"]
|
|
for i, m in enumerate(input_messages):
|
|
if isinstance(m.content, str):
|
|
assert inference_messages[i].content == m.content
|
|
else:
|
|
assert inference_messages[i].content[0].text == m.content[0].text
|
|
assert isinstance(inference_messages[i].content[0], OpenAIChatCompletionContentPartTextParam)
|
|
assert inference_messages[i].role == m.role
|
|
if m.role == "user":
|
|
assert isinstance(inference_messages[i], OpenAIUserMessageParam)
|
|
elif m.role == "assistant":
|
|
assert isinstance(inference_messages[i], OpenAIAssistantMessageParam)
|
|
else:
|
|
assert isinstance(inference_messages[i], OpenAIDeveloperMessageParam)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_prepend_previous_response_none(openai_responses_impl):
|
|
"""Test prepending no previous response to a new response."""
|
|
|
|
input = await openai_responses_impl._prepend_previous_response("fake_input", None)
|
|
assert input == "fake_input"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_prepend_previous_response_basic(openai_responses_impl, mock_responses_store):
|
|
"""Test prepending a basic previous response to a new response."""
|
|
|
|
input_item_message = OpenAIResponseMessage(
|
|
id="123",
|
|
content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
|
|
role="user",
|
|
)
|
|
response_output_message = OpenAIResponseMessage(
|
|
id="123",
|
|
content=[OpenAIResponseOutputMessageContentOutputText(text="fake_response")],
|
|
status="completed",
|
|
role="assistant",
|
|
)
|
|
previous_response = OpenAIResponseObjectWithInput(
|
|
created_at=1,
|
|
id="resp_123",
|
|
model="fake_model",
|
|
output=[response_output_message],
|
|
status="completed",
|
|
input=[input_item_message],
|
|
)
|
|
mock_responses_store.get_response_object.return_value = previous_response
|
|
|
|
input = await openai_responses_impl._prepend_previous_response("fake_input", "resp_123")
|
|
|
|
assert len(input) == 3
|
|
# Check for previous input
|
|
assert isinstance(input[0], OpenAIResponseMessage)
|
|
assert input[0].content[0].text == "fake_previous_input"
|
|
# Check for previous output
|
|
assert isinstance(input[1], OpenAIResponseMessage)
|
|
assert input[1].content[0].text == "fake_response"
|
|
# Check for new input
|
|
assert isinstance(input[2], OpenAIResponseMessage)
|
|
assert input[2].content == "fake_input"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_prepend_previous_response_web_search(openai_responses_impl, mock_responses_store):
|
|
"""Test prepending a web search previous response to a new response."""
|
|
input_item_message = OpenAIResponseMessage(
|
|
id="123",
|
|
content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
|
|
role="user",
|
|
)
|
|
output_web_search = OpenAIResponseOutputMessageWebSearchToolCall(
|
|
id="ws_123",
|
|
status="completed",
|
|
)
|
|
output_message = OpenAIResponseMessage(
|
|
id="123",
|
|
content=[OpenAIResponseOutputMessageContentOutputText(text="fake_web_search_response")],
|
|
status="completed",
|
|
role="assistant",
|
|
)
|
|
response = OpenAIResponseObjectWithInput(
|
|
created_at=1,
|
|
id="resp_123",
|
|
model="fake_model",
|
|
output=[output_web_search, output_message],
|
|
status="completed",
|
|
input=[input_item_message],
|
|
)
|
|
mock_responses_store.get_response_object.return_value = response
|
|
|
|
input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
|
|
input = await openai_responses_impl._prepend_previous_response(input_messages, "resp_123")
|
|
|
|
assert len(input) == 4
|
|
# Check for previous input
|
|
assert isinstance(input[0], OpenAIResponseMessage)
|
|
assert input[0].content[0].text == "fake_previous_input"
|
|
# Check for previous output web search tool call
|
|
assert isinstance(input[1], OpenAIResponseOutputMessageWebSearchToolCall)
|
|
# Check for previous output web search response
|
|
assert isinstance(input[2], OpenAIResponseMessage)
|
|
assert input[2].content[0].text == "fake_web_search_response"
|
|
# Check for new input
|
|
assert isinstance(input[3], OpenAIResponseMessage)
|
|
assert input[3].content == "fake_input"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_create_openai_response_with_instructions(openai_responses_impl, mock_inference_api):
|
|
# Setup
|
|
input_text = "What is the capital of Ireland?"
|
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
|
instructions = "You are a geography expert. Provide concise answers."
|
|
|
|
# Load the chat completion fixture
|
|
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
|
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
|
|
|
|
# Execute
|
|
await openai_responses_impl.create_openai_response(
|
|
input=input_text,
|
|
model=model,
|
|
instructions=instructions,
|
|
)
|
|
|
|
# Verify
|
|
mock_inference_api.openai_chat_completion.assert_called_once()
|
|
call_args = mock_inference_api.openai_chat_completion.call_args
|
|
sent_messages = call_args.kwargs["messages"]
|
|
|
|
# Check that instructions were prepended as a system message
|
|
assert len(sent_messages) == 2
|
|
assert sent_messages[0].role == "system"
|
|
assert sent_messages[0].content == instructions
|
|
assert sent_messages[1].role == "user"
|
|
assert sent_messages[1].content == input_text
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_create_openai_response_with_instructions_and_multiple_messages(
|
|
openai_responses_impl, mock_inference_api
|
|
):
|
|
# Setup
|
|
input_messages = [
|
|
OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
|
|
OpenAIResponseMessage(
|
|
role="assistant",
|
|
content="Galway, Longford, Sligo",
|
|
name=None,
|
|
),
|
|
OpenAIResponseMessage(role="user", content="Which is the largest?", name=None),
|
|
]
|
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
|
instructions = "You are a geography expert. Provide concise answers."
|
|
|
|
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
|
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
|
|
|
|
# Execute
|
|
await openai_responses_impl.create_openai_response(
|
|
input=input_messages,
|
|
model=model,
|
|
instructions=instructions,
|
|
)
|
|
|
|
# Verify
|
|
mock_inference_api.openai_chat_completion.assert_called_once()
|
|
call_args = mock_inference_api.openai_chat_completion.call_args
|
|
sent_messages = call_args.kwargs["messages"]
|
|
|
|
# Check that instructions were prepended as a system message
|
|
assert len(sent_messages) == 4 # 1 system + 3 input messages
|
|
assert sent_messages[0].role == "system"
|
|
assert sent_messages[0].content == instructions
|
|
|
|
# Check the rest of the messages were converted correctly
|
|
assert sent_messages[1].role == "user"
|
|
assert sent_messages[1].content == "Name some towns in Ireland"
|
|
assert sent_messages[2].role == "assistant"
|
|
assert sent_messages[2].content == "Galway, Longford, Sligo"
|
|
assert sent_messages[3].role == "user"
|
|
assert sent_messages[3].content == "Which is the largest?"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_create_openai_response_with_instructions_and_previous_response(
|
|
openai_responses_impl, mock_responses_store, mock_inference_api
|
|
):
|
|
"""Test prepending both instructions and previous response."""
|
|
|
|
input_item_message = OpenAIResponseMessage(
|
|
id="123",
|
|
content="Name some towns in Ireland",
|
|
role="user",
|
|
)
|
|
response_output_message = OpenAIResponseMessage(
|
|
id="123",
|
|
content="Galway, Longford, Sligo",
|
|
status="completed",
|
|
role="assistant",
|
|
)
|
|
response = OpenAIResponseObjectWithInput(
|
|
created_at=1,
|
|
id="resp_123",
|
|
model="fake_model",
|
|
output=[response_output_message],
|
|
status="completed",
|
|
input=[input_item_message],
|
|
)
|
|
mock_responses_store.get_response_object.return_value = response
|
|
|
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
|
instructions = "You are a geography expert. Provide concise answers."
|
|
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
|
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
|
|
|
|
# Execute
|
|
await openai_responses_impl.create_openai_response(
|
|
input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123"
|
|
)
|
|
|
|
# Verify
|
|
mock_inference_api.openai_chat_completion.assert_called_once()
|
|
call_args = mock_inference_api.openai_chat_completion.call_args
|
|
sent_messages = call_args.kwargs["messages"]
|
|
|
|
# Check that instructions were prepended as a system message
|
|
assert len(sent_messages) == 4, sent_messages
|
|
assert sent_messages[0].role == "system"
|
|
assert sent_messages[0].content == instructions
|
|
|
|
# Check the rest of the messages were converted correctly
|
|
assert sent_messages[1].role == "user"
|
|
assert sent_messages[1].content == "Name some towns in Ireland"
|
|
assert sent_messages[2].role == "assistant"
|
|
assert sent_messages[2].content == "Galway, Longford, Sligo"
|
|
assert sent_messages[3].role == "user"
|
|
assert sent_messages[3].content == "Which is the largest?"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_list_openai_response_input_items_delegation(openai_responses_impl, mock_responses_store):
|
|
"""Test that list_openai_response_input_items properly delegates to responses_store with correct parameters."""
|
|
# Setup
|
|
response_id = "resp_123"
|
|
after = "msg_after"
|
|
before = "msg_before"
|
|
include = ["metadata"]
|
|
limit = 5
|
|
order = Order.asc
|
|
|
|
input_message = OpenAIResponseMessage(
|
|
id="msg_123",
|
|
content="Test message",
|
|
role="user",
|
|
)
|
|
|
|
expected_result = ListOpenAIResponseInputItem(data=[input_message])
|
|
mock_responses_store.list_response_input_items.return_value = expected_result
|
|
|
|
# Execute with all parameters to test delegation
|
|
result = await openai_responses_impl.list_openai_response_input_items(
|
|
response_id, after=after, before=before, include=include, limit=limit, order=order
|
|
)
|
|
|
|
# Verify all parameters are passed through correctly to the store
|
|
mock_responses_store.list_response_input_items.assert_called_once_with(
|
|
response_id, after, before, include, limit, order
|
|
)
|
|
|
|
# Verify the result is returned as-is from the store
|
|
assert result.object == "list"
|
|
assert len(result.data) == 1
|
|
assert result.data[0].id == "msg_123"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_responses_store_list_input_items_logic():
|
|
"""Test ResponsesStore list_response_input_items logic - mocks get_response_object to test actual ordering/limiting."""
|
|
|
|
# Create mock store and response store
|
|
mock_sql_store = AsyncMock()
|
|
responses_store = ResponsesStore(sql_store_config=None)
|
|
responses_store.sql_store = mock_sql_store
|
|
|
|
# Setup test data - multiple input items
|
|
input_items = [
|
|
OpenAIResponseMessage(id="msg_1", content="First message", role="user"),
|
|
OpenAIResponseMessage(id="msg_2", content="Second message", role="user"),
|
|
OpenAIResponseMessage(id="msg_3", content="Third message", role="user"),
|
|
OpenAIResponseMessage(id="msg_4", content="Fourth message", role="user"),
|
|
]
|
|
|
|
response_with_input = OpenAIResponseObjectWithInput(
|
|
id="resp_123",
|
|
model="test_model",
|
|
created_at=1234567890,
|
|
object="response",
|
|
status="completed",
|
|
output=[],
|
|
input=input_items,
|
|
)
|
|
|
|
# Mock the get_response_object method to return our test data
|
|
mock_sql_store.fetch_one.return_value = {"response_object": response_with_input.model_dump()}
|
|
|
|
# Test 1: Default behavior (no limit, desc order)
|
|
result = await responses_store.list_response_input_items("resp_123")
|
|
assert result.object == "list"
|
|
assert len(result.data) == 4
|
|
# Should be reversed for desc order
|
|
assert result.data[0].id == "msg_4"
|
|
assert result.data[1].id == "msg_3"
|
|
assert result.data[2].id == "msg_2"
|
|
assert result.data[3].id == "msg_1"
|
|
|
|
# Test 2: With limit=2, desc order
|
|
result = await responses_store.list_response_input_items("resp_123", limit=2, order=Order.desc)
|
|
assert result.object == "list"
|
|
assert len(result.data) == 2
|
|
# Should be first 2 items in desc order
|
|
assert result.data[0].id == "msg_4"
|
|
assert result.data[1].id == "msg_3"
|
|
|
|
# Test 3: With limit=2, asc order
|
|
result = await responses_store.list_response_input_items("resp_123", limit=2, order=Order.asc)
|
|
assert result.object == "list"
|
|
assert len(result.data) == 2
|
|
# Should be first 2 items in original order (asc)
|
|
assert result.data[0].id == "msg_1"
|
|
assert result.data[1].id == "msg_2"
|
|
|
|
# Test 4: Asc order without limit
|
|
result = await responses_store.list_response_input_items("resp_123", order=Order.asc)
|
|
assert result.object == "list"
|
|
assert len(result.data) == 4
|
|
# Should be in original order (asc)
|
|
assert result.data[0].id == "msg_1"
|
|
assert result.data[1].id == "msg_2"
|
|
assert result.data[2].id == "msg_3"
|
|
assert result.data[3].id == "msg_4"
|
|
|
|
# Test 5: Large limit (larger than available items)
|
|
result = await responses_store.list_response_input_items("resp_123", limit=10, order=Order.desc)
|
|
assert result.object == "list"
|
|
assert len(result.data) == 4 # Should return all available items
|
|
assert result.data[0].id == "msg_4"
|
|
|
|
# Test 6: Zero limit edge case
|
|
result = await responses_store.list_response_input_items("resp_123", limit=0, order=Order.asc)
|
|
assert result.object == "list"
|
|
assert len(result.data) == 0 # Should return no items
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_store_response_uses_rehydrated_input_with_previous_response(
|
|
openai_responses_impl, mock_responses_store, mock_inference_api
|
|
):
|
|
"""Test that _store_response uses the full re-hydrated input (including previous responses)
|
|
rather than just the original input when previous_response_id is provided."""
|
|
|
|
# Setup - Create a previous response that should be included in the stored input
|
|
previous_response = OpenAIResponseObjectWithInput(
|
|
id="resp-previous-123",
|
|
object="response",
|
|
created_at=1234567890,
|
|
model="meta-llama/Llama-3.1-8B-Instruct",
|
|
status="completed",
|
|
input=[
|
|
OpenAIResponseMessage(
|
|
id="msg-prev-user", role="user", content=[OpenAIResponseInputMessageContentText(text="What is 2+2?")]
|
|
)
|
|
],
|
|
output=[
|
|
OpenAIResponseMessage(
|
|
id="msg-prev-assistant",
|
|
role="assistant",
|
|
content=[OpenAIResponseOutputMessageContentOutputText(text="2+2 equals 4.")],
|
|
)
|
|
],
|
|
)
|
|
|
|
mock_responses_store.get_response_object.return_value = previous_response
|
|
|
|
current_input = "Now what is 3+3?"
|
|
model = "meta-llama/Llama-3.1-8B-Instruct"
|
|
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
|
|
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
|
|
|
|
# Execute - Create response with previous_response_id
|
|
result = await openai_responses_impl.create_openai_response(
|
|
input=current_input,
|
|
model=model,
|
|
previous_response_id="resp-previous-123",
|
|
store=True,
|
|
)
|
|
|
|
store_call_args = mock_responses_store.store_response_object.call_args
|
|
stored_input = store_call_args.kwargs["input"]
|
|
|
|
# Verify that the stored input contains the full re-hydrated conversation:
|
|
# 1. Previous user message
|
|
# 2. Previous assistant response
|
|
# 3. Current user message
|
|
assert len(stored_input) == 3
|
|
|
|
assert stored_input[0].role == "user"
|
|
assert stored_input[0].content[0].text == "What is 2+2?"
|
|
|
|
assert stored_input[1].role == "assistant"
|
|
assert stored_input[1].content[0].text == "2+2 equals 4."
|
|
|
|
assert stored_input[2].role == "user"
|
|
assert stored_input[2].content == "Now what is 3+3?"
|
|
|
|
# Verify the response itself is correct
|
|
assert result.model == model
|
|
assert result.status == "completed"
|