feat: add list responses API (#2233)

# What does this PR do?
This is not part of the official OpenAI API, but we'll use this for the
logs UI.
In order to support more filtering options, I'm adopting the newly
introduced sql store in in place of the kv store.

## Test Plan
Added integration/unit tests.
This commit is contained in:
ehhuang 2025-05-23 13:16:48 -07:00 committed by GitHub
parent 6463ee7633
commit 5844c2da68
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
47 changed files with 704 additions and 77 deletions

View file

@ -20,9 +20,11 @@ from llama_stack.apis.agents import (
AgentTurnCreateRequest,
AgentTurnResumeRequest,
Document,
ListOpenAIResponseObject,
OpenAIResponseInput,
OpenAIResponseInputTool,
OpenAIResponseObject,
Order,
Session,
Turn,
)
@ -39,6 +41,7 @@ from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
from llama_stack.providers.utils.pagination import paginate_records
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
from .agent_instance import ChatAgent
from .config import MetaReferenceAgentsImplConfig
@ -66,15 +69,17 @@ class MetaReferenceAgentsImpl(Agents):
self.tool_groups_api = tool_groups_api
self.in_memory_store = InmemoryKVStoreImpl()
self.openai_responses_impl = None
self.openai_responses_impl: OpenAIResponsesImpl | None = None
async def initialize(self) -> None:
self.persistence_store = await kvstore_impl(self.config.persistence_store)
self.responses_store = ResponsesStore(self.config.responses_store)
await self.responses_store.initialize()
self.openai_responses_impl = OpenAIResponsesImpl(
self.persistence_store,
inference_api=self.inference_api,
tool_groups_api=self.tool_groups_api,
tool_runtime_api=self.tool_runtime_api,
responses_store=self.responses_store,
)
async def create_agent(
@ -323,3 +328,12 @@ class MetaReferenceAgentsImpl(Agents):
return await self.openai_responses_impl.create_openai_response(
input, model, instructions, previous_response_id, store, stream, temperature, tools
)
async def list_openai_responses(
self,
after: str | None = None,
limit: int | None = 50,
model: str | None = None,
order: Order | None = Order.desc,
) -> ListOpenAIResponseObject:
return await self.openai_responses_impl.list_openai_responses(after, limit, model, order)

View file

@ -10,10 +10,12 @@ from pydantic import BaseModel
from llama_stack.providers.utils.kvstore import KVStoreConfig
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
class MetaReferenceAgentsImplConfig(BaseModel):
persistence_store: KVStoreConfig
responses_store: SqlStoreConfig
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
@ -21,5 +23,9 @@ class MetaReferenceAgentsImplConfig(BaseModel):
"persistence_store": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="agents_store.db",
)
),
"responses_store": SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="responses_store.db",
),
}

View file

@ -12,7 +12,9 @@ from typing import Any, cast
from openai.types.chat import ChatCompletionToolParam
from pydantic import BaseModel
from llama_stack.apis.agents import Order
from llama_stack.apis.agents.openai_responses import (
ListOpenAIResponseObject,
OpenAIResponseInput,
OpenAIResponseInputFunctionToolCallOutput,
OpenAIResponseInputItemList,
@ -53,7 +55,7 @@ from llama_stack.apis.tools.tools import ToolGroups, ToolInvocationResult, ToolR
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
from llama_stack.providers.utils.kvstore import KVStore
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
logger = get_logger(name=__name__, category="openai_responses")
@ -169,34 +171,27 @@ class OpenAIResponsePreviousResponseWithInputItems(BaseModel):
class OpenAIResponsesImpl:
def __init__(
self,
persistence_store: KVStore,
inference_api: Inference,
tool_groups_api: ToolGroups,
tool_runtime_api: ToolRuntime,
responses_store: ResponsesStore,
):
self.persistence_store = persistence_store
self.inference_api = inference_api
self.tool_groups_api = tool_groups_api
self.tool_runtime_api = tool_runtime_api
async def _get_previous_response_with_input(self, id: str) -> OpenAIResponsePreviousResponseWithInputItems:
key = f"{OPENAI_RESPONSES_PREFIX}{id}"
response_json = await self.persistence_store.get(key=key)
if response_json is None:
raise ValueError(f"OpenAI response with id '{id}' not found")
return OpenAIResponsePreviousResponseWithInputItems.model_validate_json(response_json)
self.responses_store = responses_store
async def _prepend_previous_response(
self, input: str | list[OpenAIResponseInput], previous_response_id: str | None = None
):
if previous_response_id:
previous_response_with_input = await self._get_previous_response_with_input(previous_response_id)
previous_response_with_input = await self.responses_store.get_response_object(previous_response_id)
# previous response input items
new_input_items = previous_response_with_input.input_items.data
new_input_items = previous_response_with_input.input
# previous response output items
new_input_items.extend(previous_response_with_input.response.output)
new_input_items.extend(previous_response_with_input.output)
# new input items from the current request
if isinstance(input, str):
@ -216,8 +211,17 @@ class OpenAIResponsesImpl:
self,
response_id: str,
) -> OpenAIResponseObject:
response_with_input = await self._get_previous_response_with_input(response_id)
return response_with_input.response
response_with_input = await self.responses_store.get_response_object(response_id)
return OpenAIResponseObject(**{k: v for k, v in response_with_input.model_dump().items() if k != "input"})
async def list_openai_responses(
self,
after: str | None = None,
limit: int | None = 50,
model: str | None = None,
order: Order | None = Order.desc,
) -> ListOpenAIResponseObject:
return await self.responses_store.list_responses(after, limit, model, order)
async def create_openai_response(
self,
@ -360,15 +364,9 @@ class OpenAIResponsesImpl:
else:
input_items_data.append(input_item)
input_items = OpenAIResponseInputItemList(data=input_items_data)
prev_response = OpenAIResponsePreviousResponseWithInputItems(
input_items=input_items,
response=response,
)
key = f"{OPENAI_RESPONSES_PREFIX}{response.id}"
await self.persistence_store.set(
key=key,
value=prev_response.model_dump_json(),
await self.responses_store.store_response_object(
response_object=response,
input=input_items_data,
)
if stream:

View file

@ -0,0 +1,98 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.agents import (
Order,
)
from llama_stack.apis.agents.openai_responses import (
ListOpenAIResponseObject,
OpenAIResponseInput,
OpenAIResponseObject,
OpenAIResponseObjectWithInput,
)
from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
from ..sqlstore.api import ColumnDefinition, ColumnType
from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl
class ResponsesStore:
def __init__(self, sql_store_config: SqlStoreConfig):
if not sql_store_config:
sql_store_config = SqliteSqlStoreConfig(
db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
)
self.sql_store = sqlstore_impl(sql_store_config)
async def initialize(self):
"""Create the necessary tables if they don't exist."""
await self.sql_store.create_table(
"openai_responses",
{
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
"created_at": ColumnType.INTEGER,
"response_object": ColumnType.JSON,
"model": ColumnType.STRING,
},
)
async def store_response_object(
self, response_object: OpenAIResponseObject, input: list[OpenAIResponseInput]
) -> None:
data = response_object.model_dump()
data["input"] = [input_item.model_dump() for input_item in input]
await self.sql_store.insert(
"openai_responses",
{
"id": data["id"],
"created_at": data["created_at"],
"model": data["model"],
"response_object": data,
},
)
async def list_responses(
self,
after: str | None = None,
limit: int | None = 50,
model: str | None = None,
order: Order | None = Order.desc,
) -> ListOpenAIResponseObject:
"""
List responses from the database.
:param after: The ID of the last response to return.
:param limit: The maximum number of responses to return.
:param model: The model to filter by.
:param order: The order to sort the responses by.
"""
# TODO: support after
if after:
raise NotImplementedError("After is not supported for SQLite")
if not order:
order = Order.desc
rows = await self.sql_store.fetch_all(
"openai_responses",
where={"model": model} if model else None,
order_by=[("created_at", order.value)],
limit=limit,
)
data = [OpenAIResponseObjectWithInput(**row["response_object"]) for row in rows]
return ListOpenAIResponseObject(
data=data,
# TODO: implement has_more
has_more=False,
first_id=data[0].id if data else "",
last_id=data[-1].id if data else "",
)
async def get_response_object(self, response_id: str) -> OpenAIResponseObjectWithInput:
row = await self.sql_store.fetch_one("openai_responses", where={"id": response_id})
if not row:
raise ValueError(f"Response with id {response_id} not found") from None
return OpenAIResponseObjectWithInput(**row["response_object"])