list responses

# What does this PR do? ## Test Plan
2025-12-28 15:22:00 +00:00 · 2025-05-23 13:00:58 -07:00 · 2025-05-23 13:00:58 -07:00 · f39d1732ea
commit f39d1732ea
parent 558d109ab7
47 changed files with 704 additions and 77 deletions
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@ -0,0 +1,97 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from urllib.parse import urljoin
+
+import pytest
+import requests
+from openai import OpenAI
+
+from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+
+
+@pytest.fixture
+def openai_client(client_with_models):
+    base_url = f"{client_with_models.base_url}/v1/openai/v1"
+    return OpenAI(base_url=base_url, api_key="bar")
+
+
+@pytest.mark.parametrize(
+    "stream",
+    [
+        True,
+        False,
+    ],
+)
+@pytest.mark.parametrize(
+    "tools",
+    [
+        [],
+        [
+            {
+                "type": "function",
+                "name": "get_weather",
+                "description": "Get the weather in a given city",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "The city to get the weather for"},
+                    },
+                },
+            }
+        ],
+    ],
+)
+def test_responses_store(openai_client, client_with_models, text_model_id, stream, tools):
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+    message = "What's the weather in Tokyo?" + (
+        " YOU MUST USE THE get_weather function to get the weather." if tools else ""
+    )
+    response = client.responses.create(
+        model=text_model_id,
+        input=[
+            {
+                "role": "user",
+                "content": message,
+            }
+        ],
+        stream=stream,
+        tools=tools,
+    )
+    if stream:
+        # accumulate the streamed content
+        content = ""
+        response_id = None
+        for chunk in response:
+            if response_id is None:
+                response_id = chunk.response.id
+            if not tools:
+                if chunk.type == "response.completed":
+                    response_id = chunk.response.id
+                    content = chunk.response.output[0].content[0].text
+    else:
+        response_id = response.id
+        if not tools:
+            content = response.output[0].content[0].text
+
+    # list responses is not available in the SDK
+    url = urljoin(str(client.base_url), "responses")
+    response = requests.get(url, headers={"Authorization": f"Bearer {client.api_key}"})
+    assert response.status_code == 200
+    data = response.json()["data"]
+    assert response_id in [r["id"] for r in data]
+
+    # test retrieve response
+    retrieved_response = client.responses.retrieve(response_id)
+    assert retrieved_response.id == response_id
+    assert retrieved_response.model == text_model_id
+    if tools:
+        assert retrieved_response.output[0].type == "function_call"
+    else:
+        assert retrieved_response.output[0].content[0].text == content
--- a/tests/unit/providers/agent/test_meta_reference_agent.py
+++ b/tests/unit/providers/agent/test_meta_reference_agent.py
@ -43,6 +43,10 @@ def config(tmp_path):
            "type": "sqlite",
            "db_path": str(tmp_path / "test.db"),
        },
+        responses_store={
+            "type": "sqlite",
+            "db_path": str(tmp_path / "test.db"),
+        },
    )


--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock

 import pytest
 from openai.types.chat.chat_completion_chunk import (
@ -16,12 +16,11 @@ from openai.types.chat.chat_completion_chunk import (
 )

 from llama_stack.apis.agents.openai_responses import (
-    OpenAIResponseInputItemList,
    OpenAIResponseInputMessageContentText,
    OpenAIResponseInputToolFunction,
    OpenAIResponseInputToolWebSearch,
    OpenAIResponseMessage,
-    OpenAIResponseObject,
+    OpenAIResponseObjectWithInput,
    OpenAIResponseOutputMessageContentOutputText,
    OpenAIResponseOutputMessageWebSearchToolCall,
 )
@ -33,19 +32,12 @@ from llama_stack.apis.inference.inference import (
 )
 from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
 from llama_stack.providers.inline.agents.meta_reference.openai_responses import (
-    OpenAIResponsePreviousResponseWithInputItems,
    OpenAIResponsesImpl,
 )
-from llama_stack.providers.utils.kvstore import KVStore
+from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture


-@pytest.fixture
-def mock_kvstore():
-    kvstore = AsyncMock(spec=KVStore)
-    return kvstore
-
-
@pytest.fixture
 def mock_inference_api():
    inference_api = AsyncMock()
@ -65,12 +57,18 @@ def mock_tool_runtime_api():


@pytest.fixture
-def openai_responses_impl(mock_kvstore, mock_inference_api, mock_tool_groups_api, mock_tool_runtime_api):
+def mock_responses_store():
+    responses_store = AsyncMock(spec=ResponsesStore)
+    return responses_store
+
+
+@pytest.fixture
+def openai_responses_impl(mock_inference_api, mock_tool_groups_api, mock_tool_runtime_api, mock_responses_store):
    return OpenAIResponsesImpl(
-        persistence_store=mock_kvstore,
        inference_api=mock_inference_api,
        tool_groups_api=mock_tool_groups_api,
        tool_runtime_api=mock_tool_runtime_api,
+        responses_store=mock_responses_store,
    )


@ -100,7 +98,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
        stream=False,
        temperature=0.1,
    )
-    openai_responses_impl.persistence_store.set.assert_called_once()
+    openai_responses_impl.responses_store.store_response_object.assert_called_once()
    assert result.model == model
    assert len(result.output) == 1
    assert isinstance(result.output[0], OpenAIResponseMessage)
@ -167,7 +165,7 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
        kwargs={"query": "What is the capital of Ireland?"},
    )

-    openai_responses_impl.persistence_store.set.assert_called_once()
+    openai_responses_impl.responses_store.store_response_object.assert_called_once()

    # Check that we got the content from our mocked tool execution result
    assert len(result.output) >= 1
@ -292,8 +290,7 @@ async def test_prepend_previous_response_none(openai_responses_impl):


@pytest.mark.asyncio
-@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
-async def test_prepend_previous_response_basic(get_previous_response_with_input, openai_responses_impl):
+async def test_prepend_previous_response_basic(openai_responses_impl, mock_responses_store):
    """Test prepending a basic previous response to a new response."""

    input_item_message = OpenAIResponseMessage(
@ -301,25 +298,21 @@ async def test_prepend_previous_response_basic(get_previous_response_with_input,
        content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
        role="user",
    )
-    input_items = OpenAIResponseInputItemList(data=[input_item_message])
    response_output_message = OpenAIResponseMessage(
        id="123",
        content=[OpenAIResponseOutputMessageContentOutputText(text="fake_response")],
        status="completed",
        role="assistant",
    )
-    response = OpenAIResponseObject(
+    previous_response = OpenAIResponseObjectWithInput(
        created_at=1,
        id="resp_123",
        model="fake_model",
        output=[response_output_message],
        status="completed",
+        input=[input_item_message],
    )
-    previous_response = OpenAIResponsePreviousResponseWithInputItems(
-        input_items=input_items,
-        response=response,
-    )
-    get_previous_response_with_input.return_value = previous_response
+    mock_responses_store.get_response_object.return_value = previous_response

    input = await openai_responses_impl._prepend_previous_response("fake_input", "resp_123")

@ -336,16 +329,13 @@ async def test_prepend_previous_response_basic(get_previous_response_with_input,


@pytest.mark.asyncio
-@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
-async def test_prepend_previous_response_web_search(get_previous_response_with_input, openai_responses_impl):
+async def test_prepend_previous_response_web_search(openai_responses_impl, mock_responses_store):
    """Test prepending a web search previous response to a new response."""
-
    input_item_message = OpenAIResponseMessage(
        id="123",
        content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
        role="user",
    )
-    input_items = OpenAIResponseInputItemList(data=[input_item_message])
    output_web_search = OpenAIResponseOutputMessageWebSearchToolCall(
        id="ws_123",
        status="completed",
@ -356,18 +346,15 @@ async def test_prepend_previous_response_web_search(get_previous_response_with_i
        status="completed",
        role="assistant",
    )
-    response = OpenAIResponseObject(
+    response = OpenAIResponseObjectWithInput(
        created_at=1,
        id="resp_123",
        model="fake_model",
        output=[output_web_search, output_message],
        status="completed",
+        input=[input_item_message],
    )
-    previous_response = OpenAIResponsePreviousResponseWithInputItems(
-        input_items=input_items,
-        response=response,
-    )
-    get_previous_response_with_input.return_value = previous_response
+    mock_responses_store.get_response_object.return_value = response

    input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
    input = await openai_responses_impl._prepend_previous_response(input_messages, "resp_123")
@ -464,9 +451,8 @@ async def test_create_openai_response_with_instructions_and_multiple_messages(


@pytest.mark.asyncio
-@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
 async def test_create_openai_response_with_instructions_and_previous_response(
-    get_previous_response_with_input, openai_responses_impl, mock_inference_api
+    openai_responses_impl, mock_responses_store, mock_inference_api
 ):
    """Test prepending both instructions and previous response."""

@ -475,25 +461,21 @@ async def test_create_openai_response_with_instructions_and_previous_response(
        content="Name some towns in Ireland",
        role="user",
    )
-    input_items = OpenAIResponseInputItemList(data=[input_item_message])
    response_output_message = OpenAIResponseMessage(
        id="123",
        content="Galway, Longford, Sligo",
        status="completed",
        role="assistant",
    )
-    response = OpenAIResponseObject(
+    response = OpenAIResponseObjectWithInput(
        created_at=1,
        id="resp_123",
        model="fake_model",
        output=[response_output_message],
        status="completed",
+        input=[input_item_message],
    )
-    previous_response = OpenAIResponsePreviousResponseWithInputItems(
-        input_items=input_items,
-        response=response,
-    )
-    get_previous_response_with_input.return_value = previous_response
+    mock_responses_store.get_response_object.return_value = response

    model = "meta-llama/Llama-3.1-8B-Instruct"
    instructions = "You are a geography expert. Provide concise answers."
@ -511,7 +493,7 @@ async def test_create_openai_response_with_instructions_and_previous_response(
    sent_messages = call_args.kwargs["messages"]

    # Check that instructions were prepended as a system message
-    assert len(sent_messages) == 4
+    assert len(sent_messages) == 4, sent_messages
    assert sent_messages[0].role == "system"
    assert sent_messages[0].content == instructions

--- a/tests/verifications/openai-api-verification-run.yaml
+++ b/tests/verifications/openai-api-verification-run.yaml
@ -63,6 +63,9 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/responses_store.db
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search