feat: add list responses API (#2233)

# What does this PR do?
This is not part of the official OpenAI API, but we'll use this for the
logs UI.
In order to support more filtering options, I'm adopting the newly
introduced sql store in in place of the kv store.

## Test Plan
Added integration/unit tests.
This commit is contained in:
ehhuang 2025-05-23 13:16:48 -07:00 committed by GitHub
parent 6463ee7633
commit 5844c2da68
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
47 changed files with 704 additions and 77 deletions

View file

@ -20,9 +20,11 @@ from llama_stack.apis.agents import (
AgentTurnCreateRequest,
AgentTurnResumeRequest,
Document,
ListOpenAIResponseObject,
OpenAIResponseInput,
OpenAIResponseInputTool,
OpenAIResponseObject,
Order,
Session,
Turn,
)
@ -39,6 +41,7 @@ from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
from llama_stack.providers.utils.pagination import paginate_records
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
from .agent_instance import ChatAgent
from .config import MetaReferenceAgentsImplConfig
@ -66,15 +69,17 @@ class MetaReferenceAgentsImpl(Agents):
self.tool_groups_api = tool_groups_api
self.in_memory_store = InmemoryKVStoreImpl()
self.openai_responses_impl = None
self.openai_responses_impl: OpenAIResponsesImpl | None = None
async def initialize(self) -> None:
self.persistence_store = await kvstore_impl(self.config.persistence_store)
self.responses_store = ResponsesStore(self.config.responses_store)
await self.responses_store.initialize()
self.openai_responses_impl = OpenAIResponsesImpl(
self.persistence_store,
inference_api=self.inference_api,
tool_groups_api=self.tool_groups_api,
tool_runtime_api=self.tool_runtime_api,
responses_store=self.responses_store,
)
async def create_agent(
@ -323,3 +328,12 @@ class MetaReferenceAgentsImpl(Agents):
return await self.openai_responses_impl.create_openai_response(
input, model, instructions, previous_response_id, store, stream, temperature, tools
)
async def list_openai_responses(
self,
after: str | None = None,
limit: int | None = 50,
model: str | None = None,
order: Order | None = Order.desc,
) -> ListOpenAIResponseObject:
return await self.openai_responses_impl.list_openai_responses(after, limit, model, order)

View file

@ -10,10 +10,12 @@ from pydantic import BaseModel
from llama_stack.providers.utils.kvstore import KVStoreConfig
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
class MetaReferenceAgentsImplConfig(BaseModel):
persistence_store: KVStoreConfig
responses_store: SqlStoreConfig
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
@ -21,5 +23,9 @@ class MetaReferenceAgentsImplConfig(BaseModel):
"persistence_store": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="agents_store.db",
)
),
"responses_store": SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="responses_store.db",
),
}

View file

@ -12,7 +12,9 @@ from typing import Any, cast
from openai.types.chat import ChatCompletionToolParam
from pydantic import BaseModel
from llama_stack.apis.agents import Order
from llama_stack.apis.agents.openai_responses import (
ListOpenAIResponseObject,
OpenAIResponseInput,
OpenAIResponseInputFunctionToolCallOutput,
OpenAIResponseInputItemList,
@ -53,7 +55,7 @@ from llama_stack.apis.tools.tools import ToolGroups, ToolInvocationResult, ToolR
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
from llama_stack.providers.utils.kvstore import KVStore
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
logger = get_logger(name=__name__, category="openai_responses")
@ -169,34 +171,27 @@ class OpenAIResponsePreviousResponseWithInputItems(BaseModel):
class OpenAIResponsesImpl:
def __init__(
self,
persistence_store: KVStore,
inference_api: Inference,
tool_groups_api: ToolGroups,
tool_runtime_api: ToolRuntime,
responses_store: ResponsesStore,
):
self.persistence_store = persistence_store
self.inference_api = inference_api
self.tool_groups_api = tool_groups_api
self.tool_runtime_api = tool_runtime_api
async def _get_previous_response_with_input(self, id: str) -> OpenAIResponsePreviousResponseWithInputItems:
key = f"{OPENAI_RESPONSES_PREFIX}{id}"
response_json = await self.persistence_store.get(key=key)
if response_json is None:
raise ValueError(f"OpenAI response with id '{id}' not found")
return OpenAIResponsePreviousResponseWithInputItems.model_validate_json(response_json)
self.responses_store = responses_store
async def _prepend_previous_response(
self, input: str | list[OpenAIResponseInput], previous_response_id: str | None = None
):
if previous_response_id:
previous_response_with_input = await self._get_previous_response_with_input(previous_response_id)
previous_response_with_input = await self.responses_store.get_response_object(previous_response_id)
# previous response input items
new_input_items = previous_response_with_input.input_items.data
new_input_items = previous_response_with_input.input
# previous response output items
new_input_items.extend(previous_response_with_input.response.output)
new_input_items.extend(previous_response_with_input.output)
# new input items from the current request
if isinstance(input, str):
@ -216,8 +211,17 @@ class OpenAIResponsesImpl:
self,
response_id: str,
) -> OpenAIResponseObject:
response_with_input = await self._get_previous_response_with_input(response_id)
return response_with_input.response
response_with_input = await self.responses_store.get_response_object(response_id)
return OpenAIResponseObject(**{k: v for k, v in response_with_input.model_dump().items() if k != "input"})
async def list_openai_responses(
self,
after: str | None = None,
limit: int | None = 50,
model: str | None = None,
order: Order | None = Order.desc,
) -> ListOpenAIResponseObject:
return await self.responses_store.list_responses(after, limit, model, order)
async def create_openai_response(
self,
@ -360,15 +364,9 @@ class OpenAIResponsesImpl:
else:
input_items_data.append(input_item)
input_items = OpenAIResponseInputItemList(data=input_items_data)
prev_response = OpenAIResponsePreviousResponseWithInputItems(
input_items=input_items,
response=response,
)
key = f"{OPENAI_RESPONSES_PREFIX}{response.id}"
await self.persistence_store.set(
key=key,
value=prev_response.model_dump_json(),
await self.responses_store.store_response_object(
response_object=response,
input=input_items_data,
)
if stream: