fold openai responses into the Agents API

2025-12-29 23:09:32 +00:00 · 2025-04-28 10:27:28 -07:00 · 2025-04-28 10:27:28 -07:00 · abd6280cb8
commit abd6280cb8
parent 207224a811
25 changed files with 967 additions and 199 deletions
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -38,6 +38,13 @@ from llama_stack.apis.safety import SafetyViolation
 from llama_stack.apis.tools import ToolDef
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod

+from .openai_responses import (
+    OpenAIResponseInputMessage,
+    OpenAIResponseInputTool,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStream,
+)
+

 class Attachment(BaseModel):
    """An attachment to an agent turn.
@ -593,3 +600,39 @@ class Agents(Protocol):
        :returns: A ListAgentSessionsResponse.
        """
        ...
+
+    # We situate the OpenAI Responses API in the Agents API just like we did things
+    # for Inference. The Responses API, in its intent, serves the same purpose as
+    # the Agents API above -- it is essentially a lightweight "agentic loop" with
+    # integrated tool calling.
+    #
+    # Both of these APIs are inherently stateful.
+
+    @webmethod(route="/openai/v1/responses/{id}", method="GET")
+    async def get_openai_response(
+        self,
+        id: str,
+    ) -> OpenAIResponseObject:
+        """Retrieve an OpenAI response by its ID.
+
+        :param id: The ID of the OpenAI response to retrieve.
+        :returns: An OpenAIResponseObject.
+        """
+        ...
+
+    @webmethod(route="/openai/v1/responses", method="POST")
+    async def create_openai_response(
+        self,
+        input: Union[str, List[OpenAIResponseInputMessage]],
+        model: str,
+        previous_response_id: Optional[str] = None,
+        store: Optional[bool] = True,
+        stream: Optional[bool] = False,
+        tools: Optional[List[OpenAIResponseInputTool]] = None,
+    ) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]:
+        """Create a new OpenAI response.
+
+        :param input: Input message(s) to create the response.
+        :param model: The underlying LLM used for completions.
+        :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
+        """
--- a/llama_stack/apis/openai_responses/openai_responses.py
+++ b/llama_stack/apis/openai_responses/openai_responses.py
@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import AsyncIterator, List, Literal, Optional, Protocol, Union, runtime_checkable
+from typing import List, Literal, Optional, Union

 from pydantic import BaseModel, Field
 from typing_extensions import Annotated

-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack.schema_utils import json_schema_type, register_schema


@json_schema_type
@ -104,7 +104,7 @@ class OpenAIResponseInputMessageContentText(BaseModel):

@json_schema_type
 class OpenAIResponseInputMessageContentImage(BaseModel):
-    detail: Literal["low", "high", "auto"] = "auto"
+    detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
    type: Literal["input_image"] = "input_image"
    # TODO: handle file_id
    image_url: Optional[str] = None
@ -121,13 +121,13 @@ register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMess
@json_schema_type
 class OpenAIResponseInputMessage(BaseModel):
    content: Union[str, List[OpenAIResponseInputMessageContent]]
-    role: Literal["system", "developer", "user", "assistant"]
+    role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
    type: Optional[Literal["message"]] = "message"


@json_schema_type
 class OpenAIResponseInputToolWebSearch(BaseModel):
-    type: Literal["web_search", "web_search_preview_2025_03_11"] = "web_search"
+    type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
    # TODO: actually use search_context_size somewhere...
    search_context_size: Optional[str] = Field(default="medium", pattern="^low|medium|high$")
    # TODO: add user_location
@ -138,27 +138,3 @@ OpenAIResponseInputTool = Annotated[
    Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
-
-
-@runtime_checkable
-class OpenAIResponses(Protocol):
-    """
-    OpenAI Responses API implementation.
-    """
-
-    @webmethod(route="/openai/v1/responses/{id}", method="GET")
-    async def get_openai_response(
-        self,
-        id: str,
-    ) -> OpenAIResponseObject: ...
-
-    @webmethod(route="/openai/v1/responses", method="POST")
-    async def create_openai_response(
-        self,
-        input: Union[str, List[OpenAIResponseInputMessage]],
-        model: str,
-        previous_response_id: Optional[str] = None,
-        store: Optional[bool] = True,
-        stream: Optional[bool] = False,
-        tools: Optional[List[OpenAIResponseInputTool]] = None,
-    ) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]: ...
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@ -24,7 +24,6 @@ class Api(Enum):
    eval = "eval"
    post_training = "post_training"
    tool_runtime = "tool_runtime"
-    openai_responses = "openai_responses"

    telemetry = "telemetry"

--- a/llama_stack/apis/openai_responses/init.py
+++ b/llama_stack/apis/openai_responses/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .openai_responses import *  # noqa: F401 F403