diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 4c5393947..49c402d37 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -497,6 +497,54 @@
}
}
},
+ "/v1/openai/v1/responses": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "Runtime representation of an annotated type.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/OpenAIResponseObject"
+ }
+ },
+ "text/event-stream": {
+ "schema": {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStream"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Agents"
+ ],
+ "description": "Create a new OpenAI response.",
+ "parameters": [],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/CreateOpenaiResponseRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/v1/files": {
"get": {
"responses": {
@@ -1278,6 +1326,49 @@
]
}
},
+ "/v1/openai/v1/responses/{id}": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "An OpenAIResponseObject.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/OpenAIResponseObject"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Agents"
+ ],
+ "description": "Retrieve an OpenAI response by its ID.",
+ "parameters": [
+ {
+ "name": "id",
+ "in": "path",
+ "description": "The ID of the OpenAI response to retrieve.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/v1/scoring-functions/{scoring_fn_id}": {
"get": {
"responses": {
@@ -6192,6 +6283,427 @@
],
"title": "AgentTurnResponseTurnStartPayload"
},
+ "OpenAIResponseInputMessage": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
+ }
+ }
+ ]
+ },
+ "role": {
+ "oneOf": [
+ {
+ "type": "string",
+ "const": "system"
+ },
+ {
+ "type": "string",
+ "const": "developer"
+ },
+ {
+ "type": "string",
+ "const": "user"
+ },
+ {
+ "type": "string",
+ "const": "assistant"
+ }
+ ]
+ },
+ "type": {
+ "type": "string",
+ "const": "message",
+ "default": "message"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "content",
+ "role"
+ ],
+ "title": "OpenAIResponseInputMessage"
+ },
+ "OpenAIResponseInputMessageContent": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
+ "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+ }
+ }
+ },
+ "OpenAIResponseInputMessageContentImage": {
+ "type": "object",
+ "properties": {
+ "detail": {
+ "oneOf": [
+ {
+ "type": "string",
+ "const": "low"
+ },
+ {
+ "type": "string",
+ "const": "high"
+ },
+ {
+ "type": "string",
+ "const": "auto"
+ }
+ ],
+ "default": "auto"
+ },
+ "type": {
+ "type": "string",
+ "const": "input_image",
+ "default": "input_image"
+ },
+ "image_url": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "detail",
+ "type"
+ ],
+ "title": "OpenAIResponseInputMessageContentImage"
+ },
+ "OpenAIResponseInputMessageContentText": {
+ "type": "object",
+ "properties": {
+ "text": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "input_text",
+ "default": "input_text"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "text",
+ "type"
+ ],
+ "title": "OpenAIResponseInputMessageContentText"
+ },
+ "OpenAIResponseInputTool": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "oneOf": [
+ {
+ "type": "string",
+ "const": "web_search"
+ },
+ {
+ "type": "string",
+ "const": "web_search_preview_2025_03_11"
+ }
+ ],
+ "default": "web_search"
+ },
+ "search_context_size": {
+ "type": "string",
+ "default": "medium"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ],
+ "title": "OpenAIResponseInputToolWebSearch"
+ },
+ "CreateOpenaiResponseRequest": {
+ "type": "object",
+ "properties": {
+ "input": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIResponseInputMessage"
+ }
+ }
+ ],
+ "description": "Input message(s) to create the response."
+ },
+ "model": {
+ "type": "string",
+ "description": "The underlying LLM used for completions."
+ },
+ "previous_response_id": {
+ "type": "string",
+ "description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
+ },
+ "store": {
+ "type": "boolean"
+ },
+ "stream": {
+ "type": "boolean"
+ },
+ "tools": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIResponseInputTool"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "input",
+ "model"
+ ],
+ "title": "CreateOpenaiResponseRequest"
+ },
+ "OpenAIResponseError": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string"
+ },
+ "message": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "code",
+ "message"
+ ],
+ "title": "OpenAIResponseError"
+ },
+ "OpenAIResponseObject": {
+ "type": "object",
+ "properties": {
+ "created_at": {
+ "type": "integer"
+ },
+ "error": {
+ "$ref": "#/components/schemas/OpenAIResponseError"
+ },
+ "id": {
+ "type": "string"
+ },
+ "model": {
+ "type": "string"
+ },
+ "object": {
+ "type": "string",
+ "const": "response",
+ "default": "response"
+ },
+ "output": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIResponseOutput"
+ }
+ },
+ "parallel_tool_calls": {
+ "type": "boolean",
+ "default": false
+ },
+ "previous_response_id": {
+ "type": "string"
+ },
+ "status": {
+ "type": "string"
+ },
+ "temperature": {
+ "type": "number"
+ },
+ "top_p": {
+ "type": "number"
+ },
+ "truncation": {
+ "type": "string"
+ },
+ "user": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "created_at",
+ "id",
+ "model",
+ "object",
+ "output",
+ "parallel_tool_calls",
+ "status"
+ ],
+ "title": "OpenAIResponseObject"
+ },
+ "OpenAIResponseOutput": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/OpenAIResponseOutputMessage"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "message": "#/components/schemas/OpenAIResponseOutputMessage",
+ "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+ }
+ }
+ },
+ "OpenAIResponseOutputMessage": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string"
+ },
+ "content": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
+ }
+ },
+ "role": {
+ "type": "string",
+ "const": "assistant",
+ "default": "assistant"
+ },
+ "status": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "message",
+ "default": "message"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "id",
+ "content",
+ "role",
+ "status",
+ "type"
+ ],
+ "title": "OpenAIResponseOutputMessage"
+ },
+ "OpenAIResponseOutputMessageContent": {
+ "type": "object",
+ "properties": {
+ "text": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "output_text",
+ "default": "output_text"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "text",
+ "type"
+ ],
+ "title": "OpenAIResponseOutputMessageContentOutputText"
+ },
+ "OpenAIResponseOutputMessageWebSearchToolCall": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string"
+ },
+ "status": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "web_search_call",
+ "default": "web_search_call"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "id",
+ "status",
+ "type"
+ ],
+ "title": "OpenAIResponseOutputMessageWebSearchToolCall"
+ },
+ "OpenAIResponseObjectStream": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
+ "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
+ }
+ }
+ },
+ "OpenAIResponseObjectStreamResponseCompleted": {
+ "type": "object",
+ "properties": {
+ "response": {
+ "$ref": "#/components/schemas/OpenAIResponseObject"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.completed",
+ "default": "response.completed"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "response",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseCompleted"
+ },
+ "OpenAIResponseObjectStreamResponseCreated": {
+ "type": "object",
+ "properties": {
+ "response": {
+ "$ref": "#/components/schemas/OpenAIResponseObject"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.created",
+ "default": "response.created"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "response",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseCreated"
+ },
"CreateUploadSessionRequest": {
"type": "object",
"properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index a24f1a9db..e5bfad623 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -330,6 +330,39 @@ paths:
schema:
$ref: '#/components/schemas/CreateAgentTurnRequest'
required: true
+ /v1/openai/v1/responses:
+ post:
+ responses:
+ '200':
+ description: >-
+ Runtime representation of an annotated type.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/OpenAIResponseObject'
+ text/event-stream:
+ schema:
+ $ref: '#/components/schemas/OpenAIResponseObjectStream'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Agents
+ description: Create a new OpenAI response.
+ parameters: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/CreateOpenaiResponseRequest'
+ required: true
/v1/files:
get:
responses:
@@ -875,6 +908,36 @@ paths:
required: true
schema:
type: string
+ /v1/openai/v1/responses/{id}:
+ get:
+ responses:
+ '200':
+ description: An OpenAIResponseObject.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/OpenAIResponseObject'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Agents
+ description: Retrieve an OpenAI response by its ID.
+ parameters:
+ - name: id
+ in: path
+ description: >-
+ The ID of the OpenAI response to retrieve.
+ required: true
+ schema:
+ type: string
/v1/scoring-functions/{scoring_fn_id}:
get:
responses:
@@ -4329,6 +4392,293 @@ components:
- event_type
- turn_id
title: AgentTurnResponseTurnStartPayload
+ OpenAIResponseInputMessage:
+ type: object
+ properties:
+ content:
+ oneOf:
+ - type: string
+ - type: array
+ items:
+ $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
+ role:
+ oneOf:
+ - type: string
+ const: system
+ - type: string
+ const: developer
+ - type: string
+ const: user
+ - type: string
+ const: assistant
+ type:
+ type: string
+ const: message
+ default: message
+ additionalProperties: false
+ required:
+ - content
+ - role
+ title: OpenAIResponseInputMessage
+ OpenAIResponseInputMessageContent:
+ oneOf:
+ - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+ - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+ discriminator:
+ propertyName: type
+ mapping:
+ input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+ input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+ OpenAIResponseInputMessageContentImage:
+ type: object
+ properties:
+ detail:
+ oneOf:
+ - type: string
+ const: low
+ - type: string
+ const: high
+ - type: string
+ const: auto
+ default: auto
+ type:
+ type: string
+ const: input_image
+ default: input_image
+ image_url:
+ type: string
+ additionalProperties: false
+ required:
+ - detail
+ - type
+ title: OpenAIResponseInputMessageContentImage
+ OpenAIResponseInputMessageContentText:
+ type: object
+ properties:
+ text:
+ type: string
+ type:
+ type: string
+ const: input_text
+ default: input_text
+ additionalProperties: false
+ required:
+ - text
+ - type
+ title: OpenAIResponseInputMessageContentText
+ OpenAIResponseInputTool:
+ type: object
+ properties:
+ type:
+ oneOf:
+ - type: string
+ const: web_search
+ - type: string
+ const: web_search_preview_2025_03_11
+ default: web_search
+ search_context_size:
+ type: string
+ default: medium
+ additionalProperties: false
+ required:
+ - type
+ title: OpenAIResponseInputToolWebSearch
+ CreateOpenaiResponseRequest:
+ type: object
+ properties:
+ input:
+ oneOf:
+ - type: string
+ - type: array
+ items:
+ $ref: '#/components/schemas/OpenAIResponseInputMessage'
+ description: Input message(s) to create the response.
+ model:
+ type: string
+ description: The underlying LLM used for completions.
+ previous_response_id:
+ type: string
+ description: >-
+ (Optional) if specified, the new response will be a continuation of the
+ previous response. This can be used to easily fork-off new responses from
+ existing responses.
+ store:
+ type: boolean
+ stream:
+ type: boolean
+ tools:
+ type: array
+ items:
+ $ref: '#/components/schemas/OpenAIResponseInputTool'
+ additionalProperties: false
+ required:
+ - input
+ - model
+ title: CreateOpenaiResponseRequest
+ OpenAIResponseError:
+ type: object
+ properties:
+ code:
+ type: string
+ message:
+ type: string
+ additionalProperties: false
+ required:
+ - code
+ - message
+ title: OpenAIResponseError
+ OpenAIResponseObject:
+ type: object
+ properties:
+ created_at:
+ type: integer
+ error:
+ $ref: '#/components/schemas/OpenAIResponseError'
+ id:
+ type: string
+ model:
+ type: string
+ object:
+ type: string
+ const: response
+ default: response
+ output:
+ type: array
+ items:
+ $ref: '#/components/schemas/OpenAIResponseOutput'
+ parallel_tool_calls:
+ type: boolean
+ default: false
+ previous_response_id:
+ type: string
+ status:
+ type: string
+ temperature:
+ type: number
+ top_p:
+ type: number
+ truncation:
+ type: string
+ user:
+ type: string
+ additionalProperties: false
+ required:
+ - created_at
+ - id
+ - model
+ - object
+ - output
+ - parallel_tool_calls
+ - status
+ title: OpenAIResponseObject
+ OpenAIResponseOutput:
+ oneOf:
+ - $ref: '#/components/schemas/OpenAIResponseOutputMessage'
+ - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+ discriminator:
+ propertyName: type
+ mapping:
+ message: '#/components/schemas/OpenAIResponseOutputMessage'
+ web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+ OpenAIResponseOutputMessage:
+ type: object
+ properties:
+ id:
+ type: string
+ content:
+ type: array
+ items:
+ $ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
+ role:
+ type: string
+ const: assistant
+ default: assistant
+ status:
+ type: string
+ type:
+ type: string
+ const: message
+ default: message
+ additionalProperties: false
+ required:
+ - id
+ - content
+ - role
+ - status
+ - type
+ title: OpenAIResponseOutputMessage
+ OpenAIResponseOutputMessageContent:
+ type: object
+ properties:
+ text:
+ type: string
+ type:
+ type: string
+ const: output_text
+ default: output_text
+ additionalProperties: false
+ required:
+ - text
+ - type
+ title: >-
+ OpenAIResponseOutputMessageContentOutputText
+ "OpenAIResponseOutputMessageWebSearchToolCall":
+ type: object
+ properties:
+ id:
+ type: string
+ status:
+ type: string
+ type:
+ type: string
+ const: web_search_call
+ default: web_search_call
+ additionalProperties: false
+ required:
+ - id
+ - status
+ - type
+ title: >-
+ OpenAIResponseOutputMessageWebSearchToolCall
+ OpenAIResponseObjectStream:
+ oneOf:
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+ discriminator:
+ propertyName: type
+ mapping:
+ response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+ response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+ "OpenAIResponseObjectStreamResponseCompleted":
+ type: object
+ properties:
+ response:
+ $ref: '#/components/schemas/OpenAIResponseObject'
+ type:
+ type: string
+ const: response.completed
+ default: response.completed
+ additionalProperties: false
+ required:
+ - response
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseCompleted
+ "OpenAIResponseObjectStreamResponseCreated":
+ type: object
+ properties:
+ response:
+ $ref: '#/components/schemas/OpenAIResponseObject'
+ type:
+ type: string
+ const: response.created
+ default: response.created
+ additionalProperties: false
+ required:
+ - response
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseCreated
CreateUploadSessionRequest:
type: object
properties:
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 3936bb3c4..6d5e48a46 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -179,7 +179,7 @@ class ContentBuilder:
"Creates the content subtree for a request or response."
def is_iterator_type(t):
- return "StreamChunk" in str(t)
+ return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t)
def get_media_type(t):
if is_generic_list(t):
diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
index 74365722d..46df56008 100644
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -18,7 +18,6 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::vllm`, `inline::sentence-transformers` |
-| openai_responses | `inline::openai-responses` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md
index 5da0ee980..3ebb1f59e 100644
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@@ -19,7 +19,6 @@ The `llamastack/distribution-together` distribution consists of the following pr
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::together`, `inline::sentence-transformers` |
-| openai_responses | `inline::openai-responses` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index dec43280b..4db6e2226 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -38,6 +38,13 @@ from llama_stack.apis.safety import SafetyViolation
from llama_stack.apis.tools import ToolDef
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from .openai_responses import (
+ OpenAIResponseInputMessage,
+ OpenAIResponseInputTool,
+ OpenAIResponseObject,
+ OpenAIResponseObjectStream,
+)
+
class Attachment(BaseModel):
"""An attachment to an agent turn.
@@ -593,3 +600,39 @@ class Agents(Protocol):
:returns: A ListAgentSessionsResponse.
"""
...
+
+ # We situate the OpenAI Responses API in the Agents API just like we did things
+ # for Inference. The Responses API, in its intent, serves the same purpose as
+ # the Agents API above -- it is essentially a lightweight "agentic loop" with
+ # integrated tool calling.
+ #
+ # Both of these APIs are inherently stateful.
+
+ @webmethod(route="/openai/v1/responses/{id}", method="GET")
+ async def get_openai_response(
+ self,
+ id: str,
+ ) -> OpenAIResponseObject:
+ """Retrieve an OpenAI response by its ID.
+
+ :param id: The ID of the OpenAI response to retrieve.
+ :returns: An OpenAIResponseObject.
+ """
+ ...
+
+ @webmethod(route="/openai/v1/responses", method="POST")
+ async def create_openai_response(
+ self,
+ input: Union[str, List[OpenAIResponseInputMessage]],
+ model: str,
+ previous_response_id: Optional[str] = None,
+ store: Optional[bool] = True,
+ stream: Optional[bool] = False,
+ tools: Optional[List[OpenAIResponseInputTool]] = None,
+ ) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]:
+ """Create a new OpenAI response.
+
+ :param input: Input message(s) to create the response.
+ :param model: The underlying LLM used for completions.
+ :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
+ """
diff --git a/llama_stack/apis/openai_responses/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
similarity index 78%
rename from llama_stack/apis/openai_responses/openai_responses.py
rename to llama_stack/apis/agents/openai_responses.py
index 0b21f3f28..72f16e224 100644
--- a/llama_stack/apis/openai_responses/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -4,12 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-from typing import AsyncIterator, List, Literal, Optional, Protocol, Union, runtime_checkable
+from typing import List, Literal, Optional, Union
from pydantic import BaseModel, Field
from typing_extensions import Annotated
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack.schema_utils import json_schema_type, register_schema
@json_schema_type
@@ -104,7 +104,7 @@ class OpenAIResponseInputMessageContentText(BaseModel):
@json_schema_type
class OpenAIResponseInputMessageContentImage(BaseModel):
- detail: Literal["low", "high", "auto"] = "auto"
+ detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
type: Literal["input_image"] = "input_image"
# TODO: handle file_id
image_url: Optional[str] = None
@@ -121,13 +121,13 @@ register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMess
@json_schema_type
class OpenAIResponseInputMessage(BaseModel):
content: Union[str, List[OpenAIResponseInputMessageContent]]
- role: Literal["system", "developer", "user", "assistant"]
+ role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
type: Optional[Literal["message"]] = "message"
@json_schema_type
class OpenAIResponseInputToolWebSearch(BaseModel):
- type: Literal["web_search", "web_search_preview_2025_03_11"] = "web_search"
+ type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
# TODO: actually use search_context_size somewhere...
search_context_size: Optional[str] = Field(default="medium", pattern="^low|medium|high$")
# TODO: add user_location
@@ -138,27 +138,3 @@ OpenAIResponseInputTool = Annotated[
Field(discriminator="type"),
]
register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
-
-
-@runtime_checkable
-class OpenAIResponses(Protocol):
- """
- OpenAI Responses API implementation.
- """
-
- @webmethod(route="/openai/v1/responses/{id}", method="GET")
- async def get_openai_response(
- self,
- id: str,
- ) -> OpenAIResponseObject: ...
-
- @webmethod(route="/openai/v1/responses", method="POST")
- async def create_openai_response(
- self,
- input: Union[str, List[OpenAIResponseInputMessage]],
- model: str,
- previous_response_id: Optional[str] = None,
- store: Optional[bool] = True,
- stream: Optional[bool] = False,
- tools: Optional[List[OpenAIResponseInputTool]] = None,
- ) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]: ...
diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py
index 85c0ecc6b..25f3ab1ab 100644
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@@ -24,7 +24,6 @@ class Api(Enum):
eval = "eval"
post_training = "post_training"
tool_runtime = "tool_runtime"
- openai_responses = "openai_responses"
telemetry = "telemetry"
diff --git a/llama_stack/apis/openai_responses/__init__.py b/llama_stack/apis/openai_responses/__init__.py
deleted file mode 100644
index a3b32ff71..000000000
--- a/llama_stack/apis/openai_responses/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .openai_responses import * # noqa: F401 F403
diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py
index 25c91fca1..e9a594eba 100644
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@@ -16,7 +16,6 @@ from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference
from llama_stack.apis.inspect import Inspect
from llama_stack.apis.models import Models
-from llama_stack.apis.openai_responses.openai_responses import OpenAIResponses
from llama_stack.apis.post_training import PostTraining
from llama_stack.apis.providers import Providers as ProvidersAPI
from llama_stack.apis.safety import Safety
@@ -81,7 +80,6 @@ def api_protocol_map() -> Dict[Api, Any]:
Api.tool_groups: ToolGroups,
Api.tool_runtime: ToolRuntime,
Api.files: Files,
- Api.openai_responses: OpenAIResponses,
}
diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py
index 50416f338..18b0c891f 100644
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@@ -149,8 +149,6 @@ class CommonRoutingTableImpl(RoutingTable):
p.benchmark_store = self
elif api == Api.tool_runtime:
p.tool_store = self
- elif api == Api.openai_responses:
- p.model_store = self
async def shutdown(self) -> None:
for p in self.impls_by_provider_id.values():
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 656178773..38aa6fd97 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -23,6 +23,9 @@ from llama_stack.apis.agents import (
Document,
ListAgentSessionsResponse,
ListAgentsResponse,
+ OpenAIResponseInputMessage,
+ OpenAIResponseInputTool,
+ OpenAIResponseObject,
Session,
Turn,
)
@@ -40,6 +43,7 @@ from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_imp
from .agent_instance import ChatAgent
from .config import MetaReferenceAgentsImplConfig
+from .openai_responses import OpenAIResponsesImpl
logger = logging.getLogger()
logger.setLevel(logging.INFO)
@@ -63,9 +67,16 @@ class MetaReferenceAgentsImpl(Agents):
self.tool_groups_api = tool_groups_api
self.in_memory_store = InmemoryKVStoreImpl()
+ self.openai_responses_impl = None
async def initialize(self) -> None:
self.persistence_store = await kvstore_impl(self.config.persistence_store)
+ self.openai_responses_impl = OpenAIResponsesImpl(
+ self.persistence_store,
+ inference_api=self.inference_api,
+ tool_groups_api=self.tool_groups_api,
+ tool_runtime_api=self.tool_runtime_api,
+ )
# check if "bwrap" is available
if not shutil.which("bwrap"):
@@ -244,3 +255,23 @@ class MetaReferenceAgentsImpl(Agents):
agent_id: str,
) -> ListAgentSessionsResponse:
pass
+
+ # OpenAI responses
+ async def get_openai_response(
+ self,
+ id: str,
+ ) -> OpenAIResponseObject:
+ return await self.openai_responses_impl.get_openai_response(id)
+
+ async def create_openai_response(
+ self,
+ input: Union[str, List[OpenAIResponseInputMessage]],
+ model: str,
+ previous_response_id: Optional[str] = None,
+ store: Optional[bool] = True,
+ stream: Optional[bool] = False,
+ tools: Optional[List[OpenAIResponseInputTool]] = None,
+ ) -> OpenAIResponseObject:
+ return await self.openai_responses_impl.create_openai_response(
+ input, model, previous_response_id, store, stream, tools
+ )
diff --git a/llama_stack/providers/inline/openai_responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
similarity index 91%
rename from llama_stack/providers/inline/openai_responses/openai_responses.py
rename to llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index c7d767f73..db1e32f8b 100644
--- a/llama_stack/providers/inline/openai_responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -10,6 +10,20 @@ from typing import AsyncIterator, List, Optional, Union, cast
from openai.types.chat import ChatCompletionToolParam
+from llama_stack.apis.agents.openai_responses import (
+ OpenAIResponseInputMessage,
+ OpenAIResponseInputMessageContentImage,
+ OpenAIResponseInputMessageContentText,
+ OpenAIResponseInputTool,
+ OpenAIResponseObject,
+ OpenAIResponseObjectStream,
+ OpenAIResponseObjectStreamResponseCompleted,
+ OpenAIResponseObjectStreamResponseCreated,
+ OpenAIResponseOutput,
+ OpenAIResponseOutputMessage,
+ OpenAIResponseOutputMessageContentOutputText,
+ OpenAIResponseOutputMessageWebSearchToolCall,
+)
from llama_stack.apis.inference.inference import (
Inference,
OpenAIAssistantMessageParam,
@@ -24,29 +38,11 @@ from llama_stack.apis.inference.inference import (
OpenAIToolMessageParam,
OpenAIUserMessageParam,
)
-from llama_stack.apis.models.models import Models, ModelType
-from llama_stack.apis.openai_responses import OpenAIResponses
-from llama_stack.apis.openai_responses.openai_responses import (
- OpenAIResponseInputMessage,
- OpenAIResponseInputMessageContentImage,
- OpenAIResponseInputMessageContentText,
- OpenAIResponseInputTool,
- OpenAIResponseObject,
- OpenAIResponseObjectStream,
- OpenAIResponseObjectStreamResponseCompleted,
- OpenAIResponseObjectStreamResponseCreated,
- OpenAIResponseOutput,
- OpenAIResponseOutputMessage,
- OpenAIResponseOutputMessageContentOutputText,
- OpenAIResponseOutputMessageWebSearchToolCall,
-)
from llama_stack.apis.tools.tools import ToolGroups, ToolInvocationResult, ToolRuntime
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
-from llama_stack.providers.utils.kvstore import kvstore_impl
-
-from .config import OpenAIResponsesImplConfig
+from llama_stack.providers.utils.kvstore import KVStore
logger = get_logger(name=__name__, category="openai_responses")
@@ -80,34 +76,25 @@ async def _openai_choices_to_output_messages(choices: List[OpenAIChoice]) -> Lis
return output_messages
-class OpenAIResponsesImpl(OpenAIResponses):
+class OpenAIResponsesImpl:
def __init__(
self,
- config: OpenAIResponsesImplConfig,
- models_api: Models,
+ persistence_store: KVStore,
inference_api: Inference,
tool_groups_api: ToolGroups,
tool_runtime_api: ToolRuntime,
):
- self.config = config
- self.models_api = models_api
+ self.persistence_store = persistence_store
self.inference_api = inference_api
self.tool_groups_api = tool_groups_api
self.tool_runtime_api = tool_runtime_api
- async def initialize(self) -> None:
- self.kvstore = await kvstore_impl(self.config.kvstore)
-
- async def shutdown(self) -> None:
- logger.debug("OpenAIResponsesImpl.shutdown")
- pass
-
async def get_openai_response(
self,
id: str,
) -> OpenAIResponseObject:
key = f"{OPENAI_RESPONSES_PREFIX}{id}"
- response_json = await self.kvstore.get(key=key)
+ response_json = await self.persistence_store.get(key=key)
if response_json is None:
raise ValueError(f"OpenAI response with id '{id}' not found")
return OpenAIResponseObject.model_validate_json(response_json)
@@ -122,11 +109,6 @@ class OpenAIResponsesImpl(OpenAIResponses):
tools: Optional[List[OpenAIResponseInputTool]] = None,
):
stream = False if stream is None else stream
- model_obj = await self.models_api.get_model(model)
- if model_obj is None:
- raise ValueError(f"Model '{model}' not found")
- if model_obj.model_type == ModelType.embedding:
- raise ValueError(f"Model '{model}' is an embedding model and does not support chat completions")
messages: List[OpenAIMessageParam] = []
if previous_response_id:
@@ -155,7 +137,7 @@ class OpenAIResponsesImpl(OpenAIResponses):
chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None
chat_response = await self.inference_api.openai_chat_completion(
- model=model_obj.identifier,
+ model=model,
messages=messages,
tools=chat_tools,
stream=stream,
@@ -198,14 +180,14 @@ class OpenAIResponsesImpl(OpenAIResponses):
output_messages: List[OpenAIResponseOutput] = []
if chat_response.choices[0].finish_reason == "tool_calls":
output_messages.extend(
- await self._execute_tool_and_return_final_output(model_obj.identifier, stream, chat_response, messages)
+ await self._execute_tool_and_return_final_output(model, stream, chat_response, messages)
)
else:
output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices))
response = OpenAIResponseObject(
created_at=chat_response.created,
id=f"resp-{uuid.uuid4()}",
- model=model_obj.identifier,
+ model=model,
object="response",
status="completed",
output=output_messages,
@@ -214,7 +196,7 @@ class OpenAIResponsesImpl(OpenAIResponses):
if store:
# Store in kvstore
key = f"{OPENAI_RESPONSES_PREFIX}{response.id}"
- await self.kvstore.set(
+ await self.persistence_store.set(
key=key,
value=response.model_dump_json(),
)
diff --git a/llama_stack/providers/inline/openai_responses/__init__.py b/llama_stack/providers/inline/openai_responses/__init__.py
deleted file mode 100644
index 76f15d478..000000000
--- a/llama_stack/providers/inline/openai_responses/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any, Dict
-
-from llama_stack.apis.datatypes import Api
-
-from .config import OpenAIResponsesImplConfig
-
-
-async def get_provider_impl(config: OpenAIResponsesImplConfig, deps: Dict[Api, Any]):
- from .openai_responses import OpenAIResponsesImpl
-
- impl = OpenAIResponsesImpl(
- config, deps[Api.models], deps[Api.inference], deps[Api.tool_groups], deps[Api.tool_runtime]
- )
- await impl.initialize()
- return impl
diff --git a/llama_stack/providers/inline/openai_responses/config.py b/llama_stack/providers/inline/openai_responses/config.py
deleted file mode 100644
index f97b2fe68..000000000
--- a/llama_stack/providers/inline/openai_responses/config.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any, Dict
-
-from pydantic import BaseModel
-
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
-
-
-class OpenAIResponsesImplConfig(BaseModel):
- kvstore: KVStoreConfig
-
- @classmethod
- def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
- return {
- "kvstore": SqliteKVStoreConfig.sample_run_config(
- __distro_dir__=__distro_dir__,
- db_name="openai_responses.db",
- )
- }
diff --git a/llama_stack/providers/registry/openai_responses.py b/llama_stack/providers/registry/openai_responses.py
deleted file mode 100644
index b7f8d17a0..000000000
--- a/llama_stack/providers/registry/openai_responses.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import List
-
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
-
-
-def available_providers() -> List[ProviderSpec]:
- return [
- InlineProviderSpec(
- api=Api.openai_responses,
- provider_type="inline::openai-responses",
- pip_packages=[],
- module="llama_stack.providers.inline.openai_responses",
- config_class="llama_stack.providers.inline.openai_responses.config.OpenAIResponsesImplConfig",
- api_dependencies=[
- Api.models,
- Api.inference,
- Api.tool_groups,
- Api.tool_runtime,
- ],
- ),
- ]
diff --git a/llama_stack/strong_typing/schema.py b/llama_stack/strong_typing/schema.py
index 0f5121906..e755b4c12 100644
--- a/llama_stack/strong_typing/schema.py
+++ b/llama_stack/strong_typing/schema.py
@@ -478,6 +478,8 @@ class JsonSchemaGenerator:
}
return ret
elif origin_type is Literal:
+ if len(typing.get_args(typ)) != 1:
+ print(f"Literal type {typ} has {len(typing.get_args(typ))} arguments")
(literal_value,) = typing.get_args(typ) # unpack value of literal type
schema = self.type_to_schema(type(literal_value))
schema["const"] = literal_value
diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml
index b344f5e5a..b2bbf853a 100644
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ b/llama_stack/templates/remote-vllm/build.yaml
@@ -24,8 +24,6 @@ distribution_spec:
- inline::braintrust
telemetry:
- inline::meta-reference
- openai_responses:
- - inline::openai-responses
tool_runtime:
- remote::brave-search
- remote::tavily-search
diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml
index a58417714..bb69496aa 100644
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@@ -5,7 +5,6 @@ apis:
- datasetio
- eval
- inference
-- openai_responses
- safety
- scoring
- telemetry
@@ -92,14 +91,6 @@ providers:
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
- openai_responses:
- - provider_id: openai-responses
- provider_type: inline::openai-responses
- config:
- kvstore:
- type: sqlite
- namespace: null
- db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/openai_responses.db
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
index 58087bba3..14f2da37e 100644
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@@ -5,7 +5,6 @@ apis:
- datasetio
- eval
- inference
-- openai_responses
- safety
- scoring
- telemetry
@@ -85,14 +84,6 @@ providers:
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
- openai_responses:
- - provider_id: openai-responses
- provider_type: inline::openai-responses
- config:
- kvstore:
- type: sqlite
- namespace: null
- db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/openai_responses.db
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py
index 12515d1ad..0f6c7659e 100644
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@@ -31,7 +31,6 @@ def get_distribution_template() -> DistributionTemplate:
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"telemetry": ["inline::meta-reference"],
- "openai_responses": ["inline::openai-responses"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml
index 81a47c5cd..834a3ecaf 100644
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@@ -24,8 +24,6 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
- openai_responses:
- - inline::openai-responses
tool_runtime:
- remote::brave-search
- remote::tavily-search
diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml
index fbeafce19..105ce896d 100644
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@@ -5,7 +5,6 @@ apis:
- datasetio
- eval
- inference
-- openai_responses
- safety
- scoring
- telemetry
@@ -88,14 +87,6 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
- openai_responses:
- - provider_id: openai-responses
- provider_type: inline::openai-responses
- config:
- kvstore:
- type: sqlite
- namespace: null
- db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/openai_responses.db
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
index 0c5d82c13..1f1613655 100644
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@@ -5,7 +5,6 @@ apis:
- datasetio
- eval
- inference
-- openai_responses
- safety
- scoring
- telemetry
@@ -83,14 +82,6 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
- openai_responses:
- - provider_id: openai-responses
- provider_type: inline::openai-responses
- config:
- kvstore:
- type: sqlite
- namespace: null
- db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/openai_responses.db
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py
index 85b7645b3..a2bd87c97 100644
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@@ -36,7 +36,6 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
- "openai_responses": ["inline::openai-responses"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",