feat: OpenAI Responses API (#1989)

# What does this PR do?

This provides an initial [OpenAI Responses
API](https://platform.openai.com/docs/api-reference/responses)
implementation. The API is not yet complete, and this is more a
proof-of-concept to show how we can store responses in our key-value
stores and use them to support the Responses API concepts like
`previous_response_id`.

## Test Plan

I've added a new
`tests/integration/openai_responses/test_openai_responses.py` as part of
a test-driven development for this new API. I'm only testing this
locally with the remote-vllm provider for now, but it should work with
any of our inference providers since the only API it requires out of the
inference provider is the `openai_chat_completion` endpoint.

```
VLLM_URL="http://localhost:8000/v1" \
INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" \
llama stack build --template remote-vllm --image-type venv --run
```

```
LLAMA_STACK_CONFIG="http://localhost:8321" \
python -m pytest -v \
  tests/integration/openai_responses/test_openai_responses.py \
  --text-model "meta-llama/Llama-3.2-3B-Instruct"
 ```

---------

Signed-off-by: Ben Browning <bbrownin@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
Ben Browning 2025-04-28 17:06:00 -04:00 committed by GitHub
parent 79851d93aa
commit 8dfce2f596
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1766 additions and 59 deletions

View file

@ -497,6 +497,54 @@
}
}
},
"/v1/openai/v1/responses": {
"post": {
"responses": {
"200": {
"description": "Runtime representation of an annotated type.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenAIResponseObject"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/OpenAIResponseObjectStream"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "Create a new OpenAI response.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateOpenaiResponseRequest"
}
}
},
"required": true
}
}
},
"/v1/files": {
"get": {
"responses": {
@ -1278,6 +1326,49 @@
]
}
},
"/v1/openai/v1/responses/{id}": {
"get": {
"responses": {
"200": {
"description": "An OpenAIResponseObject.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenAIResponseObject"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "Retrieve an OpenAI response by its ID.",
"parameters": [
{
"name": "id",
"in": "path",
"description": "The ID of the OpenAI response to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/scoring-functions/{scoring_fn_id}": {
"get": {
"responses": {
@ -6192,6 +6283,427 @@
],
"title": "AgentTurnResponseTurnStartPayload"
},
"OpenAIResponseInputMessage": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
}
}
]
},
"role": {
"oneOf": [
{
"type": "string",
"const": "system"
},
{
"type": "string",
"const": "developer"
},
{
"type": "string",
"const": "user"
},
{
"type": "string",
"const": "assistant"
}
]
},
"type": {
"type": "string",
"const": "message",
"default": "message"
}
},
"additionalProperties": false,
"required": [
"content",
"role"
],
"title": "OpenAIResponseInputMessage"
},
"OpenAIResponseInputMessageContent": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
}
}
},
"OpenAIResponseInputMessageContentImage": {
"type": "object",
"properties": {
"detail": {
"oneOf": [
{
"type": "string",
"const": "low"
},
{
"type": "string",
"const": "high"
},
{
"type": "string",
"const": "auto"
}
],
"default": "auto"
},
"type": {
"type": "string",
"const": "input_image",
"default": "input_image"
},
"image_url": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"detail",
"type"
],
"title": "OpenAIResponseInputMessageContentImage"
},
"OpenAIResponseInputMessageContentText": {
"type": "object",
"properties": {
"text": {
"type": "string"
},
"type": {
"type": "string",
"const": "input_text",
"default": "input_text"
}
},
"additionalProperties": false,
"required": [
"text",
"type"
],
"title": "OpenAIResponseInputMessageContentText"
},
"OpenAIResponseInputTool": {
"type": "object",
"properties": {
"type": {
"oneOf": [
{
"type": "string",
"const": "web_search"
},
{
"type": "string",
"const": "web_search_preview_2025_03_11"
}
],
"default": "web_search"
},
"search_context_size": {
"type": "string",
"default": "medium"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "OpenAIResponseInputToolWebSearch"
},
"CreateOpenaiResponseRequest": {
"type": "object",
"properties": {
"input": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputMessage"
}
}
],
"description": "Input message(s) to create the response."
},
"model": {
"type": "string",
"description": "The underlying LLM used for completions."
},
"previous_response_id": {
"type": "string",
"description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
},
"store": {
"type": "boolean"
},
"stream": {
"type": "boolean"
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputTool"
}
}
},
"additionalProperties": false,
"required": [
"input",
"model"
],
"title": "CreateOpenaiResponseRequest"
},
"OpenAIResponseError": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"message": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"code",
"message"
],
"title": "OpenAIResponseError"
},
"OpenAIResponseObject": {
"type": "object",
"properties": {
"created_at": {
"type": "integer"
},
"error": {
"$ref": "#/components/schemas/OpenAIResponseError"
},
"id": {
"type": "string"
},
"model": {
"type": "string"
},
"object": {
"type": "string",
"const": "response",
"default": "response"
},
"output": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseOutput"
}
},
"parallel_tool_calls": {
"type": "boolean",
"default": false
},
"previous_response_id": {
"type": "string"
},
"status": {
"type": "string"
},
"temperature": {
"type": "number"
},
"top_p": {
"type": "number"
},
"truncation": {
"type": "string"
},
"user": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"created_at",
"id",
"model",
"object",
"output",
"parallel_tool_calls",
"status"
],
"title": "OpenAIResponseObject"
},
"OpenAIResponseOutput": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessage"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"message": "#/components/schemas/OpenAIResponseOutputMessage",
"web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
}
}
},
"OpenAIResponseOutputMessage": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"content": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
}
},
"role": {
"type": "string",
"const": "assistant",
"default": "assistant"
},
"status": {
"type": "string"
},
"type": {
"type": "string",
"const": "message",
"default": "message"
}
},
"additionalProperties": false,
"required": [
"id",
"content",
"role",
"status",
"type"
],
"title": "OpenAIResponseOutputMessage"
},
"OpenAIResponseOutputMessageContent": {
"type": "object",
"properties": {
"text": {
"type": "string"
},
"type": {
"type": "string",
"const": "output_text",
"default": "output_text"
}
},
"additionalProperties": false,
"required": [
"text",
"type"
],
"title": "OpenAIResponseOutputMessageContentOutputText"
},
"OpenAIResponseOutputMessageWebSearchToolCall": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"status": {
"type": "string"
},
"type": {
"type": "string",
"const": "web_search_call",
"default": "web_search_call"
}
},
"additionalProperties": false,
"required": [
"id",
"status",
"type"
],
"title": "OpenAIResponseOutputMessageWebSearchToolCall"
},
"OpenAIResponseObjectStream": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
},
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
"response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
}
},
"OpenAIResponseObjectStreamResponseCompleted": {
"type": "object",
"properties": {
"response": {
"$ref": "#/components/schemas/OpenAIResponseObject"
},
"type": {
"type": "string",
"const": "response.completed",
"default": "response.completed"
}
},
"additionalProperties": false,
"required": [
"response",
"type"
],
"title": "OpenAIResponseObjectStreamResponseCompleted"
},
"OpenAIResponseObjectStreamResponseCreated": {
"type": "object",
"properties": {
"response": {
"$ref": "#/components/schemas/OpenAIResponseObject"
},
"type": {
"type": "string",
"const": "response.created",
"default": "response.created"
}
},
"additionalProperties": false,
"required": [
"response",
"type"
],
"title": "OpenAIResponseObjectStreamResponseCreated"
},
"CreateUploadSessionRequest": {
"type": "object",
"properties": {

View file

@ -330,6 +330,39 @@ paths:
schema:
$ref: '#/components/schemas/CreateAgentTurnRequest'
required: true
/v1/openai/v1/responses:
post:
responses:
'200':
description: >-
Runtime representation of an annotated type.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIResponseObject'
text/event-stream:
schema:
$ref: '#/components/schemas/OpenAIResponseObjectStream'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: Create a new OpenAI response.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
required: true
/v1/files:
get:
responses:
@ -875,6 +908,36 @@ paths:
required: true
schema:
type: string
/v1/openai/v1/responses/{id}:
get:
responses:
'200':
description: An OpenAIResponseObject.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIResponseObject'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: Retrieve an OpenAI response by its ID.
parameters:
- name: id
in: path
description: >-
The ID of the OpenAI response to retrieve.
required: true
schema:
type: string
/v1/scoring-functions/{scoring_fn_id}:
get:
responses:
@ -4329,6 +4392,293 @@ components:
- event_type
- turn_id
title: AgentTurnResponseTurnStartPayload
OpenAIResponseInputMessage:
type: object
properties:
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
role:
oneOf:
- type: string
const: system
- type: string
const: developer
- type: string
const: user
- type: string
const: assistant
type:
type: string
const: message
default: message
additionalProperties: false
required:
- content
- role
title: OpenAIResponseInputMessage
OpenAIResponseInputMessageContent:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
discriminator:
propertyName: type
mapping:
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
OpenAIResponseInputMessageContentImage:
type: object
properties:
detail:
oneOf:
- type: string
const: low
- type: string
const: high
- type: string
const: auto
default: auto
type:
type: string
const: input_image
default: input_image
image_url:
type: string
additionalProperties: false
required:
- detail
- type
title: OpenAIResponseInputMessageContentImage
OpenAIResponseInputMessageContentText:
type: object
properties:
text:
type: string
type:
type: string
const: input_text
default: input_text
additionalProperties: false
required:
- text
- type
title: OpenAIResponseInputMessageContentText
OpenAIResponseInputTool:
type: object
properties:
type:
oneOf:
- type: string
const: web_search
- type: string
const: web_search_preview_2025_03_11
default: web_search
search_context_size:
type: string
default: medium
additionalProperties: false
required:
- type
title: OpenAIResponseInputToolWebSearch
CreateOpenaiResponseRequest:
type: object
properties:
input:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputMessage'
description: Input message(s) to create the response.
model:
type: string
description: The underlying LLM used for completions.
previous_response_id:
type: string
description: >-
(Optional) if specified, the new response will be a continuation of the
previous response. This can be used to easily fork-off new responses from
existing responses.
store:
type: boolean
stream:
type: boolean
tools:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputTool'
additionalProperties: false
required:
- input
- model
title: CreateOpenaiResponseRequest
OpenAIResponseError:
type: object
properties:
code:
type: string
message:
type: string
additionalProperties: false
required:
- code
- message
title: OpenAIResponseError
OpenAIResponseObject:
type: object
properties:
created_at:
type: integer
error:
$ref: '#/components/schemas/OpenAIResponseError'
id:
type: string
model:
type: string
object:
type: string
const: response
default: response
output:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseOutput'
parallel_tool_calls:
type: boolean
default: false
previous_response_id:
type: string
status:
type: string
temperature:
type: number
top_p:
type: number
truncation:
type: string
user:
type: string
additionalProperties: false
required:
- created_at
- id
- model
- object
- output
- parallel_tool_calls
- status
title: OpenAIResponseObject
OpenAIResponseOutput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessage'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
discriminator:
propertyName: type
mapping:
message: '#/components/schemas/OpenAIResponseOutputMessage'
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
OpenAIResponseOutputMessage:
type: object
properties:
id:
type: string
content:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
role:
type: string
const: assistant
default: assistant
status:
type: string
type:
type: string
const: message
default: message
additionalProperties: false
required:
- id
- content
- role
- status
- type
title: OpenAIResponseOutputMessage
OpenAIResponseOutputMessageContent:
type: object
properties:
text:
type: string
type:
type: string
const: output_text
default: output_text
additionalProperties: false
required:
- text
- type
title: >-
OpenAIResponseOutputMessageContentOutputText
"OpenAIResponseOutputMessageWebSearchToolCall":
type: object
properties:
id:
type: string
status:
type: string
type:
type: string
const: web_search_call
default: web_search_call
additionalProperties: false
required:
- id
- status
- type
title: >-
OpenAIResponseOutputMessageWebSearchToolCall
OpenAIResponseObjectStream:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
discriminator:
propertyName: type
mapping:
response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
"OpenAIResponseObjectStreamResponseCompleted":
type: object
properties:
response:
$ref: '#/components/schemas/OpenAIResponseObject'
type:
type: string
const: response.completed
default: response.completed
additionalProperties: false
required:
- response
- type
title: >-
OpenAIResponseObjectStreamResponseCompleted
"OpenAIResponseObjectStreamResponseCreated":
type: object
properties:
response:
$ref: '#/components/schemas/OpenAIResponseObject'
type:
type: string
const: response.created
default: response.created
additionalProperties: false
required:
- response
- type
title: >-
OpenAIResponseObjectStreamResponseCreated
CreateUploadSessionRequest:
type: object
properties: