feat: add list responses API (#2233)

# What does this PR do?
This is not part of the official OpenAI API, but we'll use this for the
logs UI.
In order to support more filtering options, I'm adopting the newly
introduced sql store in in place of the kv store.

## Test Plan
Added integration/unit tests.
This commit is contained in:
ehhuang 2025-05-23 13:16:48 -07:00 committed by GitHub
parent 6463ee7633
commit 5844c2da68
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
47 changed files with 704 additions and 77 deletions

View file

@ -13,7 +13,7 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, ConfigDict, Field
from llama_stack.apis.common.content_types import URL, ContentDelta, InterleavedContent
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.common.responses import Order, PaginatedResponse
from llama_stack.apis.inference import (
CompletionMessage,
ResponseFormat,
@ -31,6 +31,7 @@ from llama_stack.apis.tools import ToolDef
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from .openai_responses import (
ListOpenAIResponseObject,
OpenAIResponseInput,
OpenAIResponseInputTool,
OpenAIResponseObject,
@ -611,3 +612,21 @@ class Agents(Protocol):
:returns: An OpenAIResponseObject.
"""
...
@webmethod(route="/openai/v1/responses", method="GET")
async def list_openai_responses(
self,
after: str | None = None,
limit: int | None = 50,
model: str | None = None,
order: Order | None = Order.desc,
) -> ListOpenAIResponseObject:
"""List all OpenAI responses.
:param after: The ID of the last response to return.
:param limit: The number of responses to return.
:param model: The model to filter responses by.
:param order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
:returns: A ListOpenAIResponseObject.
"""
...

View file

@ -219,3 +219,17 @@ register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
class OpenAIResponseInputItemList(BaseModel):
data: list[OpenAIResponseInput]
object: Literal["list"] = "list"
@json_schema_type
class OpenAIResponseObjectWithInput(OpenAIResponseObject):
input: list[OpenAIResponseInput]
@json_schema_type
class ListOpenAIResponseObject(BaseModel):
data: list[OpenAIResponseObjectWithInput]
has_more: bool
first_id: str
last_id: str
object: Literal["list"] = "list"

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from enum import Enum
from typing import Any
from pydantic import BaseModel
@ -11,6 +12,11 @@ from pydantic import BaseModel
from llama_stack.schema_utils import json_schema_type
class Order(Enum):
asc = "asc"
desc = "desc"
@json_schema_type
class PaginatedResponse(BaseModel):
"""A generic paginated response that follows a simple format.

View file

@ -19,6 +19,7 @@ from pydantic import BaseModel, Field, field_validator
from typing_extensions import TypedDict
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
from llama_stack.apis.common.responses import Order
from llama_stack.apis.models import Model
from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
from llama_stack.models.llama.datatypes import (
@ -833,11 +834,6 @@ class ListOpenAIChatCompletionResponse(BaseModel):
object: Literal["list"] = "list"
class Order(Enum):
asc = "asc"
desc = "desc"
@runtime_checkable
@trace_protocol
class InferenceProvider(Protocol):