diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index b3e982029..d21ff81fd 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -518,6 +518,74 @@
             }
         },
         "/v1/openai/v1/responses": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A ListOpenAIResponseObject.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListOpenAIResponseObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Agents"
+                ],
+                "description": "List all OpenAI responses.",
+                "parameters": [
+                    {
+                        "name": "after",
+                        "in": "query",
+                        "description": "The ID of the last response to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "The number of responses to return.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "model",
+                        "in": "query",
+                        "description": "The model to filter responses by.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "order",
+                        "in": "query",
+                        "description": "The order to sort responses by when sorted by created_at ('asc' or 'desc').",
+                        "required": false,
+                        "schema": {
+                            "$ref": "#/components/schemas/Order"
+                        }
+                    }
+                ]
+            },
             "post": {
                 "responses": {
                     "200": {
@@ -10179,6 +10247,108 @@
                 ],
                 "title": "ListModelsResponse"
             },
+            "ListOpenAIResponseObject": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseObjectWithInput"
+                        }
+                    },
+                    "has_more": {
+                        "type": "boolean"
+                    },
+                    "first_id": {
+                        "type": "string"
+                    },
+                    "last_id": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "has_more",
+                    "first_id",
+                    "last_id",
+                    "object"
+                ],
+                "title": "ListOpenAIResponseObject"
+            },
+            "OpenAIResponseObjectWithInput": {
+                "type": "object",
+                "properties": {
+                    "created_at": {
+                        "type": "integer"
+                    },
+                    "error": {
+                        "$ref": "#/components/schemas/OpenAIResponseError"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "response",
+                        "default": "response"
+                    },
+                    "output": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseOutput"
+                        }
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean",
+                        "default": false
+                    },
+                    "previous_response_id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    },
+                    "temperature": {
+                        "type": "number"
+                    },
+                    "top_p": {
+                        "type": "number"
+                    },
+                    "truncation": {
+                        "type": "string"
+                    },
+                    "user": {
+                        "type": "string"
+                    },
+                    "input": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseInput"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "created_at",
+                    "id",
+                    "model",
+                    "object",
+                    "output",
+                    "parallel_tool_calls",
+                    "status",
+                    "input"
+                ],
+                "title": "OpenAIResponseObjectWithInput"
+            },
             "ListProvidersResponse": {
                 "type": "object",
                 "properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 18cd2b046..8a936fcee 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -349,6 +349,53 @@ paths:
               $ref: '#/components/schemas/CreateAgentTurnRequest'
         required: true
   /v1/openai/v1/responses:
+    get:
+      responses:
+        '200':
+          description: A ListOpenAIResponseObject.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListOpenAIResponseObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Agents
+      description: List all OpenAI responses.
+      parameters:
+        - name: after
+          in: query
+          description: The ID of the last response to return.
+          required: false
+          schema:
+            type: string
+        - name: limit
+          in: query
+          description: The number of responses to return.
+          required: false
+          schema:
+            type: integer
+        - name: model
+          in: query
+          description: The model to filter responses by.
+          required: false
+          schema:
+            type: string
+        - name: order
+          in: query
+          description: >-
+            The order to sort responses by when sorted by created_at ('asc' or 'desc').
+          required: false
+          schema:
+            $ref: '#/components/schemas/Order'
     post:
       responses:
         '200':
@@ -7106,6 +7153,80 @@ components:
       required:
         - data
       title: ListModelsResponse
+    ListOpenAIResponseObject:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseObjectWithInput'
+        has_more:
+          type: boolean
+        first_id:
+          type: string
+        last_id:
+          type: string
+        object:
+          type: string
+          const: list
+          default: list
+      additionalProperties: false
+      required:
+        - data
+        - has_more
+        - first_id
+        - last_id
+        - object
+      title: ListOpenAIResponseObject
+    OpenAIResponseObjectWithInput:
+      type: object
+      properties:
+        created_at:
+          type: integer
+        error:
+          $ref: '#/components/schemas/OpenAIResponseError'
+        id:
+          type: string
+        model:
+          type: string
+        object:
+          type: string
+          const: response
+          default: response
+        output:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseOutput'
+        parallel_tool_calls:
+          type: boolean
+          default: false
+        previous_response_id:
+          type: string
+        status:
+          type: string
+        temperature:
+          type: number
+        top_p:
+          type: number
+        truncation:
+          type: string
+        user:
+          type: string
+        input:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseInput'
+      additionalProperties: false
+      required:
+        - created_at
+        - id
+        - model
+        - object
+        - output
+        - parallel_tool_calls
+        - status
+        - input
+      title: OpenAIResponseObjectWithInput
     ListProvidersResponse:
       type: object
       properties:
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index 5e857c895..bb185b8a3 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -13,7 +13,7 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, ConfigDict, Field
 
 from llama_stack.apis.common.content_types import URL, ContentDelta, InterleavedContent
-from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack.apis.common.responses import Order, PaginatedResponse
 from llama_stack.apis.inference import (
     CompletionMessage,
     ResponseFormat,
@@ -31,6 +31,7 @@ from llama_stack.apis.tools import ToolDef
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 from .openai_responses import (
+    ListOpenAIResponseObject,
     OpenAIResponseInput,
     OpenAIResponseInputTool,
     OpenAIResponseObject,
@@ -611,3 +612,21 @@ class Agents(Protocol):
         :returns: An OpenAIResponseObject.
         """
         ...
+
+    @webmethod(route="/openai/v1/responses", method="GET")
+    async def list_openai_responses(
+        self,
+        after: str | None = None,
+        limit: int | None = 50,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseObject:
+        """List all OpenAI responses.
+
+        :param after: The ID of the last response to return.
+        :param limit: The number of responses to return.
+        :param model: The model to filter responses by.
+        :param order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
+        :returns: A ListOpenAIResponseObject.
+        """
+        ...
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index bb463bd57..5d8f2b80b 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -219,3 +219,17 @@ register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
 class OpenAIResponseInputItemList(BaseModel):
     data: list[OpenAIResponseInput]
     object: Literal["list"] = "list"
+
+
+@json_schema_type
+class OpenAIResponseObjectWithInput(OpenAIResponseObject):
+    input: list[OpenAIResponseInput]
+
+
+@json_schema_type
+class ListOpenAIResponseObject(BaseModel):
+    data: list[OpenAIResponseObjectWithInput]
+    has_more: bool
+    first_id: str
+    last_id: str
+    object: Literal["list"] = "list"
diff --git a/llama_stack/apis/common/responses.py b/llama_stack/apis/common/responses.py
index b3bb5cb6b..5cb41e23d 100644
--- a/llama_stack/apis/common/responses.py
+++ b/llama_stack/apis/common/responses.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from enum import Enum
 from typing import Any
 
 from pydantic import BaseModel
@@ -11,6 +12,11 @@ from pydantic import BaseModel
 from llama_stack.schema_utils import json_schema_type
 
 
+class Order(Enum):
+    asc = "asc"
+    desc = "desc"
+
+
 @json_schema_type
 class PaginatedResponse(BaseModel):
     """A generic paginated response that follows a simple format.
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 7f8b20952..e79dc6d94 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -19,6 +19,7 @@ from pydantic import BaseModel, Field, field_validator
 from typing_extensions import TypedDict
 
 from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
+from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
 from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
 from llama_stack.models.llama.datatypes import (
@@ -833,11 +834,6 @@ class ListOpenAIChatCompletionResponse(BaseModel):
     object: Literal["list"] = "list"
 
 
-class Order(Enum):
-    asc = "asc"
-    desc = "desc"
-
-
 @runtime_checkable
 @trace_protocol
 class InferenceProvider(Protocol):
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index e98799ae6..787135488 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -20,9 +20,11 @@ from llama_stack.apis.agents import (
     AgentTurnCreateRequest,
     AgentTurnResumeRequest,
     Document,
+    ListOpenAIResponseObject,
     OpenAIResponseInput,
     OpenAIResponseInputTool,
     OpenAIResponseObject,
+    Order,
     Session,
     Turn,
 )
@@ -39,6 +41,7 @@ from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
+from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 
 from .agent_instance import ChatAgent
 from .config import MetaReferenceAgentsImplConfig
@@ -66,15 +69,17 @@ class MetaReferenceAgentsImpl(Agents):
         self.tool_groups_api = tool_groups_api
 
         self.in_memory_store = InmemoryKVStoreImpl()
-        self.openai_responses_impl = None
+        self.openai_responses_impl: OpenAIResponsesImpl | None = None
 
     async def initialize(self) -> None:
         self.persistence_store = await kvstore_impl(self.config.persistence_store)
+        self.responses_store = ResponsesStore(self.config.responses_store)
+        await self.responses_store.initialize()
         self.openai_responses_impl = OpenAIResponsesImpl(
-            self.persistence_store,
             inference_api=self.inference_api,
             tool_groups_api=self.tool_groups_api,
             tool_runtime_api=self.tool_runtime_api,
+            responses_store=self.responses_store,
         )
 
     async def create_agent(
@@ -323,3 +328,12 @@ class MetaReferenceAgentsImpl(Agents):
         return await self.openai_responses_impl.create_openai_response(
             input, model, instructions, previous_response_id, store, stream, temperature, tools
         )
+
+    async def list_openai_responses(
+        self,
+        after: str | None = None,
+        limit: int | None = 50,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseObject:
+        return await self.openai_responses_impl.list_openai_responses(after, limit, model, order)
diff --git a/llama_stack/providers/inline/agents/meta_reference/config.py b/llama_stack/providers/inline/agents/meta_reference/config.py
index c860e6df1..1c392f29c 100644
--- a/llama_stack/providers/inline/agents/meta_reference/config.py
+++ b/llama_stack/providers/inline/agents/meta_reference/config.py
@@ -10,10 +10,12 @@ from pydantic import BaseModel
 
 from llama_stack.providers.utils.kvstore import KVStoreConfig
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
 
 
 class MetaReferenceAgentsImplConfig(BaseModel):
     persistence_store: KVStoreConfig
+    responses_store: SqlStoreConfig
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
@@ -21,5 +23,9 @@ class MetaReferenceAgentsImplConfig(BaseModel):
             "persistence_store": SqliteKVStoreConfig.sample_run_config(
                 __distro_dir__=__distro_dir__,
                 db_name="agents_store.db",
-            )
+            ),
+            "responses_store": SqliteSqlStoreConfig.sample_run_config(
+                __distro_dir__=__distro_dir__,
+                db_name="responses_store.db",
+            ),
         }
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index 92345a12f..939282005 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -12,7 +12,9 @@ from typing import Any, cast
 from openai.types.chat import ChatCompletionToolParam
 from pydantic import BaseModel
 
+from llama_stack.apis.agents import Order
 from llama_stack.apis.agents.openai_responses import (
+    ListOpenAIResponseObject,
     OpenAIResponseInput,
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseInputItemList,
@@ -53,7 +55,7 @@ from llama_stack.apis.tools.tools import ToolGroups, ToolInvocationResult, ToolR
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
 from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
-from llama_stack.providers.utils.kvstore import KVStore
+from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 
 logger = get_logger(name=__name__, category="openai_responses")
 
@@ -169,34 +171,27 @@ class OpenAIResponsePreviousResponseWithInputItems(BaseModel):
 class OpenAIResponsesImpl:
     def __init__(
         self,
-        persistence_store: KVStore,
         inference_api: Inference,
         tool_groups_api: ToolGroups,
         tool_runtime_api: ToolRuntime,
+        responses_store: ResponsesStore,
     ):
-        self.persistence_store = persistence_store
         self.inference_api = inference_api
         self.tool_groups_api = tool_groups_api
         self.tool_runtime_api = tool_runtime_api
-
-    async def _get_previous_response_with_input(self, id: str) -> OpenAIResponsePreviousResponseWithInputItems:
-        key = f"{OPENAI_RESPONSES_PREFIX}{id}"
-        response_json = await self.persistence_store.get(key=key)
-        if response_json is None:
-            raise ValueError(f"OpenAI response with id '{id}' not found")
-        return OpenAIResponsePreviousResponseWithInputItems.model_validate_json(response_json)
+        self.responses_store = responses_store
 
     async def _prepend_previous_response(
         self, input: str | list[OpenAIResponseInput], previous_response_id: str | None = None
     ):
         if previous_response_id:
-            previous_response_with_input = await self._get_previous_response_with_input(previous_response_id)
+            previous_response_with_input = await self.responses_store.get_response_object(previous_response_id)
 
             # previous response input items
-            new_input_items = previous_response_with_input.input_items.data
+            new_input_items = previous_response_with_input.input
 
             # previous response output items
-            new_input_items.extend(previous_response_with_input.response.output)
+            new_input_items.extend(previous_response_with_input.output)
 
             # new input items from the current request
             if isinstance(input, str):
@@ -216,8 +211,17 @@ class OpenAIResponsesImpl:
         self,
         response_id: str,
     ) -> OpenAIResponseObject:
-        response_with_input = await self._get_previous_response_with_input(response_id)
-        return response_with_input.response
+        response_with_input = await self.responses_store.get_response_object(response_id)
+        return OpenAIResponseObject(**{k: v for k, v in response_with_input.model_dump().items() if k != "input"})
+
+    async def list_openai_responses(
+        self,
+        after: str | None = None,
+        limit: int | None = 50,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseObject:
+        return await self.responses_store.list_responses(after, limit, model, order)
 
     async def create_openai_response(
         self,
@@ -360,15 +364,9 @@ class OpenAIResponsesImpl:
                     else:
                         input_items_data.append(input_item)
 
-            input_items = OpenAIResponseInputItemList(data=input_items_data)
-            prev_response = OpenAIResponsePreviousResponseWithInputItems(
-                input_items=input_items,
-                response=response,
-            )
-            key = f"{OPENAI_RESPONSES_PREFIX}{response.id}"
-            await self.persistence_store.set(
-                key=key,
-                value=prev_response.model_dump_json(),
+            await self.responses_store.store_response_object(
+                response_object=response,
+                input=input_items_data,
             )
 
         if stream:
diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py
new file mode 100644
index 000000000..19da6785a
--- /dev/null
+++ b/llama_stack/providers/utils/responses/responses_store.py
@@ -0,0 +1,98 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.apis.agents import (
+    Order,
+)
+from llama_stack.apis.agents.openai_responses import (
+    ListOpenAIResponseObject,
+    OpenAIResponseInput,
+    OpenAIResponseObject,
+    OpenAIResponseObjectWithInput,
+)
+from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
+
+from ..sqlstore.api import ColumnDefinition, ColumnType
+from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl
+
+
+class ResponsesStore:
+    def __init__(self, sql_store_config: SqlStoreConfig):
+        if not sql_store_config:
+            sql_store_config = SqliteSqlStoreConfig(
+                db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
+            )
+        self.sql_store = sqlstore_impl(sql_store_config)
+
+    async def initialize(self):
+        """Create the necessary tables if they don't exist."""
+        await self.sql_store.create_table(
+            "openai_responses",
+            {
+                "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
+                "created_at": ColumnType.INTEGER,
+                "response_object": ColumnType.JSON,
+                "model": ColumnType.STRING,
+            },
+        )
+
+    async def store_response_object(
+        self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput]
+    ) -> None:
+        data = response_object.model_dump()
+        data["input"] = [input_item.model_dump() for input_item in input]
+
+        await self.sql_store.insert(
+            "openai_responses",
+            {
+                "id": data["id"],
+                "created_at": data["created_at"],
+                "model": data["model"],
+                "response_object": data,
+            },
+        )
+
+    async def list_responses(
+        self,
+        after: str | None = None,
+        limit: int | None = 50,
+        model: str | None = None,
+        order: Order | None = Order.desc,
+    ) -> ListOpenAIResponseObject:
+        """
+        List responses from the database.
+
+        :param after: The ID of the last response to return.
+        :param limit: The maximum number of responses to return.
+        :param model: The model to filter by.
+        :param order: The order to sort the responses by.
+        """
+        # TODO: support after
+        if after:
+            raise NotImplementedError("After is not supported for SQLite")
+        if not order:
+            order = Order.desc
+
+        rows = await self.sql_store.fetch_all(
+            "openai_responses",
+            where={"model": model} if model else None,
+            order_by=[("created_at", order.value)],
+            limit=limit,
+        )
+
+        data = [OpenAIResponseObjectWithInput(**row["response_object"]) for row in rows]
+        return ListOpenAIResponseObject(
+            data=data,
+            # TODO: implement has_more
+            has_more=False,
+            first_id=data[0].id if data else "",
+            last_id=data[-1].id if data else "",
+        )
+
+    async def get_response_object(self, response_id: str) -> OpenAIResponseObjectWithInput:
+        row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id})
+        if not row:
+            raise ValueError(f"Response with id {response_id} not found") from None
+        return OpenAIResponseObjectWithInput(**row["response_object"])
diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml
index c39b08ff9..a58068a60 100644
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@@ -35,6 +35,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml
index 025033f59..c080536b7 100644
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@@ -41,6 +41,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml
index 342388b78..368187d3a 100644
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@@ -38,6 +38,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml
index 77843858c..5c6072245 100644
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@@ -41,6 +41,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml
index fd0d4a1f6..ffaa0bf2f 100644
--- a/llama_stack/templates/dell/run.yaml
+++ b/llama_stack/templates/dell/run.yaml
@@ -37,6 +37,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml
index 1f66983f4..41500f6f6 100644
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@@ -46,6 +46,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml
index 1fbf4be6e..b1fa03306 100644
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@@ -41,6 +41,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml
index 7d257d379..db7ebffee 100644
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@@ -41,6 +41,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
index b3938bf93..15cf2a47f 100644
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@@ -46,6 +46,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml
index 1e60dd25c..428edf9a2 100644
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@@ -41,6 +41,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml
index 640506632..ab461c6c3 100644
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@@ -46,6 +46,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml
index a8b46a0aa..d238506fb 100644
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@@ -41,6 +41,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml
index 1d5739fe2..a7f2b0769 100644
--- a/llama_stack/templates/llama_api/run.yaml
+++ b/llama_stack/templates/llama_api/run.yaml
@@ -50,6 +50,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
index bbf7ad767..2b751a514 100644
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@@ -56,6 +56,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml
index 9ce69c209..a24c5fec5 100644
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@@ -46,6 +46,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml
index 32359b805..c431e12f2 100644
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@@ -46,6 +46,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
index d4e935727..5b244081d 100644
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@@ -41,6 +41,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index a19ac73c6..d63c5e366 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -40,6 +40,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 551af3a99..d208cd7f0 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -38,6 +38,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml
index 7b43ce6e7..0e5edf728 100644
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@@ -64,6 +64,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml
index cddda39fa..bbf5d9a52 100644
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ b/llama_stack/templates/passthrough/run-with-safety.yaml
@@ -46,6 +46,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml
index 1fc3914a6..146906d9b 100644
--- a/llama_stack/templates/passthrough/run.yaml
+++ b/llama_stack/templates/passthrough/run.yaml
@@ -41,6 +41,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml
index 89f3aa082..e83162a4f 100644
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@@ -50,6 +50,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
index 4d4395fd7..4cdf88c6b 100644
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@@ -43,6 +43,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml
index 907bc013e..8c2a933ab 100644
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@@ -51,6 +51,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml
index 3327e576c..04425ed35 100644
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@@ -72,6 +72,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml
index bd197b93f..c797b93aa 100644
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@@ -41,6 +41,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml
index 230fe9a5a..7e91d20bd 100644
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@@ -40,6 +40,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml
index 1c05e5e42..190a0400b 100644
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@@ -46,6 +46,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
index aebf4e1a2..ce9542130 100644
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@@ -41,6 +41,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/verification/run.yaml b/llama_stack/templates/verification/run.yaml
index de8b0d850..58b3c576c 100644
--- a/llama_stack/templates/verification/run.yaml
+++ b/llama_stack/templates/verification/run.yaml
@@ -74,6 +74,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml
index a0257f704..6937e2bac 100644
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@@ -45,6 +45,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml
index 86ec01953..e7222fd57 100644
--- a/llama_stack/templates/watsonx/run.yaml
+++ b/llama_stack/templates/watsonx/run.yaml
@@ -42,6 +42,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py
new file mode 100644
index 000000000..8af1c1870
--- /dev/null
+++ b/tests/integration/agents/test_openai_responses.py
@@ -0,0 +1,97 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from urllib.parse import urljoin
+
+import pytest
+import requests
+from openai import OpenAI
+
+from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+
+
+@pytest.fixture
+def openai_client(client_with_models):
+    base_url = f"{client_with_models.base_url}/v1/openai/v1"
+    return OpenAI(base_url=base_url, api_key="bar")
+
+
+@pytest.mark.parametrize(
+    "stream",
+    [
+        True,
+        False,
+    ],
+)
+@pytest.mark.parametrize(
+    "tools",
+    [
+        [],
+        [
+            {
+                "type": "function",
+                "name": "get_weather",
+                "description": "Get the weather in a given city",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "The city to get the weather for"},
+                    },
+                },
+            }
+        ],
+    ],
+)
+def test_responses_store(openai_client, client_with_models, text_model_id, stream, tools):
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+    message = "What's the weather in Tokyo?" + (
+        " YOU MUST USE THE get_weather function to get the weather." if tools else ""
+    )
+    response = client.responses.create(
+        model=text_model_id,
+        input=[
+            {
+                "role": "user",
+                "content": message,
+            }
+        ],
+        stream=stream,
+        tools=tools,
+    )
+    if stream:
+        # accumulate the streamed content
+        content = ""
+        response_id = None
+        for chunk in response:
+            if response_id is None:
+                response_id = chunk.response.id
+            if not tools:
+                if chunk.type == "response.completed":
+                    response_id = chunk.response.id
+                    content = chunk.response.output[0].content[0].text
+    else:
+        response_id = response.id
+        if not tools:
+            content = response.output[0].content[0].text
+
+    # list responses is not available in the SDK
+    url = urljoin(str(client.base_url), "responses")
+    response = requests.get(url, headers={"Authorization": f"Bearer {client.api_key}"})
+    assert response.status_code == 200
+    data = response.json()["data"]
+    assert response_id in [r["id"] for r in data]
+
+    # test retrieve response
+    retrieved_response = client.responses.retrieve(response_id)
+    assert retrieved_response.id == response_id
+    assert retrieved_response.model == text_model_id
+    if tools:
+        assert retrieved_response.output[0].type == "function_call"
+    else:
+        assert retrieved_response.output[0].content[0].text == content
diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py
index bef24e123..9549f6df6 100644
--- a/tests/unit/providers/agent/test_meta_reference_agent.py
+++ b/tests/unit/providers/agent/test_meta_reference_agent.py
@@ -43,6 +43,10 @@ def config(tmp_path):
             "type": "sqlite",
             "db_path": str(tmp_path / "test.db"),
         },
+        responses_store={
+            "type": "sqlite",
+            "db_path": str(tmp_path / "test.db"),
+        },
     )
 
 
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 0a8d59306..bf36d7b64 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock
 
 import pytest
 from openai.types.chat.chat_completion_chunk import (
@@ -16,12 +16,11 @@ from openai.types.chat.chat_completion_chunk import (
 )
 
 from llama_stack.apis.agents.openai_responses import (
-    OpenAIResponseInputItemList,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputToolFunction,
     OpenAIResponseInputToolWebSearch,
     OpenAIResponseMessage,
-    OpenAIResponseObject,
+    OpenAIResponseObjectWithInput,
     OpenAIResponseOutputMessageContentOutputText,
     OpenAIResponseOutputMessageWebSearchToolCall,
 )
@@ -33,19 +32,12 @@ from llama_stack.apis.inference.inference import (
 )
 from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
 from llama_stack.providers.inline.agents.meta_reference.openai_responses import (
-    OpenAIResponsePreviousResponseWithInputItems,
     OpenAIResponsesImpl,
 )
-from llama_stack.providers.utils.kvstore import KVStore
+from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
 
 
-@pytest.fixture
-def mock_kvstore():
-    kvstore = AsyncMock(spec=KVStore)
-    return kvstore
-
-
 @pytest.fixture
 def mock_inference_api():
     inference_api = AsyncMock()
@@ -65,12 +57,18 @@ def mock_tool_runtime_api():
 
 
 @pytest.fixture
-def openai_responses_impl(mock_kvstore, mock_inference_api, mock_tool_groups_api, mock_tool_runtime_api):
+def mock_responses_store():
+    responses_store = AsyncMock(spec=ResponsesStore)
+    return responses_store
+
+
+@pytest.fixture
+def openai_responses_impl(mock_inference_api, mock_tool_groups_api, mock_tool_runtime_api, mock_responses_store):
     return OpenAIResponsesImpl(
-        persistence_store=mock_kvstore,
         inference_api=mock_inference_api,
         tool_groups_api=mock_tool_groups_api,
         tool_runtime_api=mock_tool_runtime_api,
+        responses_store=mock_responses_store,
     )
 
 
@@ -100,7 +98,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
         stream=False,
         temperature=0.1,
     )
-    openai_responses_impl.persistence_store.set.assert_called_once()
+    openai_responses_impl.responses_store.store_response_object.assert_called_once()
     assert result.model == model
     assert len(result.output) == 1
     assert isinstance(result.output[0], OpenAIResponseMessage)
@@ -167,7 +165,7 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
         kwargs={"query": "What is the capital of Ireland?"},
     )
 
-    openai_responses_impl.persistence_store.set.assert_called_once()
+    openai_responses_impl.responses_store.store_response_object.assert_called_once()
 
     # Check that we got the content from our mocked tool execution result
     assert len(result.output) >= 1
@@ -292,8 +290,7 @@ async def test_prepend_previous_response_none(openai_responses_impl):
 
 
 @pytest.mark.asyncio
-@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
-async def test_prepend_previous_response_basic(get_previous_response_with_input, openai_responses_impl):
+async def test_prepend_previous_response_basic(openai_responses_impl, mock_responses_store):
     """Test prepending a basic previous response to a new response."""
 
     input_item_message = OpenAIResponseMessage(
@@ -301,25 +298,21 @@ async def test_prepend_previous_response_basic(get_previous_response_with_input,
         content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
         role="user",
     )
-    input_items = OpenAIResponseInputItemList(data=[input_item_message])
     response_output_message = OpenAIResponseMessage(
         id="123",
         content=[OpenAIResponseOutputMessageContentOutputText(text="fake_response")],
         status="completed",
         role="assistant",
     )
-    response = OpenAIResponseObject(
+    previous_response = OpenAIResponseObjectWithInput(
         created_at=1,
         id="resp_123",
         model="fake_model",
         output=[response_output_message],
         status="completed",
+        input=[input_item_message],
     )
-    previous_response = OpenAIResponsePreviousResponseWithInputItems(
-        input_items=input_items,
-        response=response,
-    )
-    get_previous_response_with_input.return_value = previous_response
+    mock_responses_store.get_response_object.return_value = previous_response
 
     input = await openai_responses_impl._prepend_previous_response("fake_input", "resp_123")
 
@@ -336,16 +329,13 @@ async def test_prepend_previous_response_basic(get_previous_response_with_input,
 
 
 @pytest.mark.asyncio
-@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
-async def test_prepend_previous_response_web_search(get_previous_response_with_input, openai_responses_impl):
+async def test_prepend_previous_response_web_search(openai_responses_impl, mock_responses_store):
     """Test prepending a web search previous response to a new response."""
-
     input_item_message = OpenAIResponseMessage(
         id="123",
         content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
         role="user",
     )
-    input_items = OpenAIResponseInputItemList(data=[input_item_message])
     output_web_search = OpenAIResponseOutputMessageWebSearchToolCall(
         id="ws_123",
         status="completed",
@@ -356,18 +346,15 @@ async def test_prepend_previous_response_web_search(get_previous_response_with_i
         status="completed",
         role="assistant",
     )
-    response = OpenAIResponseObject(
+    response = OpenAIResponseObjectWithInput(
         created_at=1,
         id="resp_123",
         model="fake_model",
         output=[output_web_search, output_message],
         status="completed",
+        input=[input_item_message],
     )
-    previous_response = OpenAIResponsePreviousResponseWithInputItems(
-        input_items=input_items,
-        response=response,
-    )
-    get_previous_response_with_input.return_value = previous_response
+    mock_responses_store.get_response_object.return_value = response
 
     input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
     input = await openai_responses_impl._prepend_previous_response(input_messages, "resp_123")
@@ -464,9 +451,8 @@ async def test_create_openai_response_with_instructions_and_multiple_messages(
 
 
 @pytest.mark.asyncio
-@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
 async def test_create_openai_response_with_instructions_and_previous_response(
-    get_previous_response_with_input, openai_responses_impl, mock_inference_api
+    openai_responses_impl, mock_responses_store, mock_inference_api
 ):
     """Test prepending both instructions and previous response."""
 
@@ -475,25 +461,21 @@ async def test_create_openai_response_with_instructions_and_previous_response(
         content="Name some towns in Ireland",
         role="user",
     )
-    input_items = OpenAIResponseInputItemList(data=[input_item_message])
     response_output_message = OpenAIResponseMessage(
         id="123",
         content="Galway, Longford, Sligo",
         status="completed",
         role="assistant",
     )
-    response = OpenAIResponseObject(
+    response = OpenAIResponseObjectWithInput(
         created_at=1,
         id="resp_123",
         model="fake_model",
         output=[response_output_message],
         status="completed",
+        input=[input_item_message],
     )
-    previous_response = OpenAIResponsePreviousResponseWithInputItems(
-        input_items=input_items,
-        response=response,
-    )
-    get_previous_response_with_input.return_value = previous_response
+    mock_responses_store.get_response_object.return_value = response
 
     model = "meta-llama/Llama-3.1-8B-Instruct"
     instructions = "You are a geography expert. Provide concise answers."
@@ -511,7 +493,7 @@ async def test_create_openai_response_with_instructions_and_previous_response(
     sent_messages = call_args.kwargs["messages"]
 
     # Check that instructions were prepended as a system message
-    assert len(sent_messages) == 4
+    assert len(sent_messages) == 4, sent_messages
     assert sent_messages[0].role == "system"
     assert sent_messages[0].content == instructions
 
diff --git a/tests/verifications/openai-api-verification-run.yaml b/tests/verifications/openai-api-verification-run.yaml
index 4c322af28..d6d8cd07d 100644
--- a/tests/verifications/openai-api-verification-run.yaml
+++ b/tests/verifications/openai-api-verification-run.yaml
@@ -63,6 +63,9 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/responses_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search