diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 4c5393947..49c402d37 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -497,6 +497,54 @@
}
}
},
+ "/v1/openai/v1/responses": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "Runtime representation of an annotated type.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/OpenAIResponseObject"
+ }
+ },
+ "text/event-stream": {
+ "schema": {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStream"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Agents"
+ ],
+ "description": "Create a new OpenAI response.",
+ "parameters": [],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/CreateOpenaiResponseRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/v1/files": {
"get": {
"responses": {
@@ -1278,6 +1326,49 @@
]
}
},
+ "/v1/openai/v1/responses/{id}": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "An OpenAIResponseObject.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/OpenAIResponseObject"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Agents"
+ ],
+ "description": "Retrieve an OpenAI response by its ID.",
+ "parameters": [
+ {
+ "name": "id",
+ "in": "path",
+ "description": "The ID of the OpenAI response to retrieve.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/v1/scoring-functions/{scoring_fn_id}": {
"get": {
"responses": {
@@ -6192,6 +6283,427 @@
],
"title": "AgentTurnResponseTurnStartPayload"
},
+ "OpenAIResponseInputMessage": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
+ }
+ }
+ ]
+ },
+ "role": {
+ "oneOf": [
+ {
+ "type": "string",
+ "const": "system"
+ },
+ {
+ "type": "string",
+ "const": "developer"
+ },
+ {
+ "type": "string",
+ "const": "user"
+ },
+ {
+ "type": "string",
+ "const": "assistant"
+ }
+ ]
+ },
+ "type": {
+ "type": "string",
+ "const": "message",
+ "default": "message"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "content",
+ "role"
+ ],
+ "title": "OpenAIResponseInputMessage"
+ },
+ "OpenAIResponseInputMessageContent": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
+ "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
+ }
+ }
+ },
+ "OpenAIResponseInputMessageContentImage": {
+ "type": "object",
+ "properties": {
+ "detail": {
+ "oneOf": [
+ {
+ "type": "string",
+ "const": "low"
+ },
+ {
+ "type": "string",
+ "const": "high"
+ },
+ {
+ "type": "string",
+ "const": "auto"
+ }
+ ],
+ "default": "auto"
+ },
+ "type": {
+ "type": "string",
+ "const": "input_image",
+ "default": "input_image"
+ },
+ "image_url": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "detail",
+ "type"
+ ],
+ "title": "OpenAIResponseInputMessageContentImage"
+ },
+ "OpenAIResponseInputMessageContentText": {
+ "type": "object",
+ "properties": {
+ "text": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "input_text",
+ "default": "input_text"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "text",
+ "type"
+ ],
+ "title": "OpenAIResponseInputMessageContentText"
+ },
+ "OpenAIResponseInputTool": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "oneOf": [
+ {
+ "type": "string",
+ "const": "web_search"
+ },
+ {
+ "type": "string",
+ "const": "web_search_preview_2025_03_11"
+ }
+ ],
+ "default": "web_search"
+ },
+ "search_context_size": {
+ "type": "string",
+ "default": "medium"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ],
+ "title": "OpenAIResponseInputToolWebSearch"
+ },
+ "CreateOpenaiResponseRequest": {
+ "type": "object",
+ "properties": {
+ "input": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIResponseInputMessage"
+ }
+ }
+ ],
+ "description": "Input message(s) to create the response."
+ },
+ "model": {
+ "type": "string",
+ "description": "The underlying LLM used for completions."
+ },
+ "previous_response_id": {
+ "type": "string",
+ "description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
+ },
+ "store": {
+ "type": "boolean"
+ },
+ "stream": {
+ "type": "boolean"
+ },
+ "tools": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIResponseInputTool"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "input",
+ "model"
+ ],
+ "title": "CreateOpenaiResponseRequest"
+ },
+ "OpenAIResponseError": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "string"
+ },
+ "message": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "code",
+ "message"
+ ],
+ "title": "OpenAIResponseError"
+ },
+ "OpenAIResponseObject": {
+ "type": "object",
+ "properties": {
+ "created_at": {
+ "type": "integer"
+ },
+ "error": {
+ "$ref": "#/components/schemas/OpenAIResponseError"
+ },
+ "id": {
+ "type": "string"
+ },
+ "model": {
+ "type": "string"
+ },
+ "object": {
+ "type": "string",
+ "const": "response",
+ "default": "response"
+ },
+ "output": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIResponseOutput"
+ }
+ },
+ "parallel_tool_calls": {
+ "type": "boolean",
+ "default": false
+ },
+ "previous_response_id": {
+ "type": "string"
+ },
+ "status": {
+ "type": "string"
+ },
+ "temperature": {
+ "type": "number"
+ },
+ "top_p": {
+ "type": "number"
+ },
+ "truncation": {
+ "type": "string"
+ },
+ "user": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "created_at",
+ "id",
+ "model",
+ "object",
+ "output",
+ "parallel_tool_calls",
+ "status"
+ ],
+ "title": "OpenAIResponseObject"
+ },
+ "OpenAIResponseOutput": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/OpenAIResponseOutputMessage"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "message": "#/components/schemas/OpenAIResponseOutputMessage",
+ "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
+ }
+ }
+ },
+ "OpenAIResponseOutputMessage": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string"
+ },
+ "content": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
+ }
+ },
+ "role": {
+ "type": "string",
+ "const": "assistant",
+ "default": "assistant"
+ },
+ "status": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "message",
+ "default": "message"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "id",
+ "content",
+ "role",
+ "status",
+ "type"
+ ],
+ "title": "OpenAIResponseOutputMessage"
+ },
+ "OpenAIResponseOutputMessageContent": {
+ "type": "object",
+ "properties": {
+ "text": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "output_text",
+ "default": "output_text"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "text",
+ "type"
+ ],
+ "title": "OpenAIResponseOutputMessageContentOutputText"
+ },
+ "OpenAIResponseOutputMessageWebSearchToolCall": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string"
+ },
+ "status": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "web_search_call",
+ "default": "web_search_call"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "id",
+ "status",
+ "type"
+ ],
+ "title": "OpenAIResponseOutputMessageWebSearchToolCall"
+ },
+ "OpenAIResponseObjectStream": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
+ "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
+ }
+ }
+ },
+ "OpenAIResponseObjectStreamResponseCompleted": {
+ "type": "object",
+ "properties": {
+ "response": {
+ "$ref": "#/components/schemas/OpenAIResponseObject"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.completed",
+ "default": "response.completed"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "response",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseCompleted"
+ },
+ "OpenAIResponseObjectStreamResponseCreated": {
+ "type": "object",
+ "properties": {
+ "response": {
+ "$ref": "#/components/schemas/OpenAIResponseObject"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.created",
+ "default": "response.created"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "response",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseCreated"
+ },
"CreateUploadSessionRequest": {
"type": "object",
"properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index a24f1a9db..e5bfad623 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -330,6 +330,39 @@ paths:
schema:
$ref: '#/components/schemas/CreateAgentTurnRequest'
required: true
+ /v1/openai/v1/responses:
+ post:
+ responses:
+ '200':
+ description: >-
+ Runtime representation of an annotated type.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/OpenAIResponseObject'
+ text/event-stream:
+ schema:
+ $ref: '#/components/schemas/OpenAIResponseObjectStream'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Agents
+ description: Create a new OpenAI response.
+ parameters: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/CreateOpenaiResponseRequest'
+ required: true
/v1/files:
get:
responses:
@@ -875,6 +908,36 @@ paths:
required: true
schema:
type: string
+ /v1/openai/v1/responses/{id}:
+ get:
+ responses:
+ '200':
+ description: An OpenAIResponseObject.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/OpenAIResponseObject'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Agents
+ description: Retrieve an OpenAI response by its ID.
+ parameters:
+ - name: id
+ in: path
+ description: >-
+ The ID of the OpenAI response to retrieve.
+ required: true
+ schema:
+ type: string
/v1/scoring-functions/{scoring_fn_id}:
get:
responses:
@@ -4329,6 +4392,293 @@ components:
- event_type
- turn_id
title: AgentTurnResponseTurnStartPayload
+ OpenAIResponseInputMessage:
+ type: object
+ properties:
+ content:
+ oneOf:
+ - type: string
+ - type: array
+ items:
+ $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
+ role:
+ oneOf:
+ - type: string
+ const: system
+ - type: string
+ const: developer
+ - type: string
+ const: user
+ - type: string
+ const: assistant
+ type:
+ type: string
+ const: message
+ default: message
+ additionalProperties: false
+ required:
+ - content
+ - role
+ title: OpenAIResponseInputMessage
+ OpenAIResponseInputMessageContent:
+ oneOf:
+ - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
+ - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+ discriminator:
+ propertyName: type
+ mapping:
+ input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
+ input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
+ OpenAIResponseInputMessageContentImage:
+ type: object
+ properties:
+ detail:
+ oneOf:
+ - type: string
+ const: low
+ - type: string
+ const: high
+ - type: string
+ const: auto
+ default: auto
+ type:
+ type: string
+ const: input_image
+ default: input_image
+ image_url:
+ type: string
+ additionalProperties: false
+ required:
+ - detail
+ - type
+ title: OpenAIResponseInputMessageContentImage
+ OpenAIResponseInputMessageContentText:
+ type: object
+ properties:
+ text:
+ type: string
+ type:
+ type: string
+ const: input_text
+ default: input_text
+ additionalProperties: false
+ required:
+ - text
+ - type
+ title: OpenAIResponseInputMessageContentText
+ OpenAIResponseInputTool:
+ type: object
+ properties:
+ type:
+ oneOf:
+ - type: string
+ const: web_search
+ - type: string
+ const: web_search_preview_2025_03_11
+ default: web_search
+ search_context_size:
+ type: string
+ default: medium
+ additionalProperties: false
+ required:
+ - type
+ title: OpenAIResponseInputToolWebSearch
+ CreateOpenaiResponseRequest:
+ type: object
+ properties:
+ input:
+ oneOf:
+ - type: string
+ - type: array
+ items:
+ $ref: '#/components/schemas/OpenAIResponseInputMessage'
+ description: Input message(s) to create the response.
+ model:
+ type: string
+ description: The underlying LLM used for completions.
+ previous_response_id:
+ type: string
+ description: >-
+ (Optional) if specified, the new response will be a continuation of the
+ previous response. This can be used to easily fork-off new responses from
+ existing responses.
+ store:
+ type: boolean
+ stream:
+ type: boolean
+ tools:
+ type: array
+ items:
+ $ref: '#/components/schemas/OpenAIResponseInputTool'
+ additionalProperties: false
+ required:
+ - input
+ - model
+ title: CreateOpenaiResponseRequest
+ OpenAIResponseError:
+ type: object
+ properties:
+ code:
+ type: string
+ message:
+ type: string
+ additionalProperties: false
+ required:
+ - code
+ - message
+ title: OpenAIResponseError
+ OpenAIResponseObject:
+ type: object
+ properties:
+ created_at:
+ type: integer
+ error:
+ $ref: '#/components/schemas/OpenAIResponseError'
+ id:
+ type: string
+ model:
+ type: string
+ object:
+ type: string
+ const: response
+ default: response
+ output:
+ type: array
+ items:
+ $ref: '#/components/schemas/OpenAIResponseOutput'
+ parallel_tool_calls:
+ type: boolean
+ default: false
+ previous_response_id:
+ type: string
+ status:
+ type: string
+ temperature:
+ type: number
+ top_p:
+ type: number
+ truncation:
+ type: string
+ user:
+ type: string
+ additionalProperties: false
+ required:
+ - created_at
+ - id
+ - model
+ - object
+ - output
+ - parallel_tool_calls
+ - status
+ title: OpenAIResponseObject
+ OpenAIResponseOutput:
+ oneOf:
+ - $ref: '#/components/schemas/OpenAIResponseOutputMessage'
+ - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+ discriminator:
+ propertyName: type
+ mapping:
+ message: '#/components/schemas/OpenAIResponseOutputMessage'
+ web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+ OpenAIResponseOutputMessage:
+ type: object
+ properties:
+ id:
+ type: string
+ content:
+ type: array
+ items:
+ $ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
+ role:
+ type: string
+ const: assistant
+ default: assistant
+ status:
+ type: string
+ type:
+ type: string
+ const: message
+ default: message
+ additionalProperties: false
+ required:
+ - id
+ - content
+ - role
+ - status
+ - type
+ title: OpenAIResponseOutputMessage
+ OpenAIResponseOutputMessageContent:
+ type: object
+ properties:
+ text:
+ type: string
+ type:
+ type: string
+ const: output_text
+ default: output_text
+ additionalProperties: false
+ required:
+ - text
+ - type
+ title: >-
+ OpenAIResponseOutputMessageContentOutputText
+ "OpenAIResponseOutputMessageWebSearchToolCall":
+ type: object
+ properties:
+ id:
+ type: string
+ status:
+ type: string
+ type:
+ type: string
+ const: web_search_call
+ default: web_search_call
+ additionalProperties: false
+ required:
+ - id
+ - status
+ - type
+ title: >-
+ OpenAIResponseOutputMessageWebSearchToolCall
+ OpenAIResponseObjectStream:
+ oneOf:
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+ discriminator:
+ propertyName: type
+ mapping:
+ response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+ response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
+ "OpenAIResponseObjectStreamResponseCompleted":
+ type: object
+ properties:
+ response:
+ $ref: '#/components/schemas/OpenAIResponseObject'
+ type:
+ type: string
+ const: response.completed
+ default: response.completed
+ additionalProperties: false
+ required:
+ - response
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseCompleted
+ "OpenAIResponseObjectStreamResponseCreated":
+ type: object
+ properties:
+ response:
+ $ref: '#/components/schemas/OpenAIResponseObject'
+ type:
+ type: string
+ const: response.created
+ default: response.created
+ additionalProperties: false
+ required:
+ - response
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseCreated
CreateUploadSessionRequest:
type: object
properties:
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 3936bb3c4..6d5e48a46 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -179,7 +179,7 @@ class ContentBuilder:
"Creates the content subtree for a request or response."
def is_iterator_type(t):
- return "StreamChunk" in str(t)
+ return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t)
def get_media_type(t):
if is_generic_list(t):
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index dec43280b..4db6e2226 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -38,6 +38,13 @@ from llama_stack.apis.safety import SafetyViolation
from llama_stack.apis.tools import ToolDef
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from .openai_responses import (
+ OpenAIResponseInputMessage,
+ OpenAIResponseInputTool,
+ OpenAIResponseObject,
+ OpenAIResponseObjectStream,
+)
+
class Attachment(BaseModel):
"""An attachment to an agent turn.
@@ -593,3 +600,39 @@ class Agents(Protocol):
:returns: A ListAgentSessionsResponse.
"""
...
+
+ # We situate the OpenAI Responses API in the Agents API just like we did things
+ # for Inference. The Responses API, in its intent, serves the same purpose as
+ # the Agents API above -- it is essentially a lightweight "agentic loop" with
+ # integrated tool calling.
+ #
+ # Both of these APIs are inherently stateful.
+
+ @webmethod(route="/openai/v1/responses/{id}", method="GET")
+ async def get_openai_response(
+ self,
+ id: str,
+ ) -> OpenAIResponseObject:
+ """Retrieve an OpenAI response by its ID.
+
+ :param id: The ID of the OpenAI response to retrieve.
+ :returns: An OpenAIResponseObject.
+ """
+ ...
+
+ @webmethod(route="/openai/v1/responses", method="POST")
+ async def create_openai_response(
+ self,
+ input: Union[str, List[OpenAIResponseInputMessage]],
+ model: str,
+ previous_response_id: Optional[str] = None,
+ store: Optional[bool] = True,
+ stream: Optional[bool] = False,
+ tools: Optional[List[OpenAIResponseInputTool]] = None,
+ ) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]:
+ """Create a new OpenAI response.
+
+ :param input: Input message(s) to create the response.
+ :param model: The underlying LLM used for completions.
+ :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
+ """
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
new file mode 100644
index 000000000..72f16e224
--- /dev/null
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -0,0 +1,140 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import List, Literal, Optional, Union
+
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated
+
+from llama_stack.schema_utils import json_schema_type, register_schema
+
+
+@json_schema_type
+class OpenAIResponseError(BaseModel):
+ code: str
+ message: str
+
+
+@json_schema_type
+class OpenAIResponseOutputMessageContentOutputText(BaseModel):
+ text: str
+ type: Literal["output_text"] = "output_text"
+
+
+OpenAIResponseOutputMessageContent = Annotated[
+ Union[OpenAIResponseOutputMessageContentOutputText,],
+ Field(discriminator="type"),
+]
+register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMessageContent")
+
+
+@json_schema_type
+class OpenAIResponseOutputMessage(BaseModel):
+ id: str
+ content: List[OpenAIResponseOutputMessageContent]
+ role: Literal["assistant"] = "assistant"
+ status: str
+ type: Literal["message"] = "message"
+
+
+@json_schema_type
+class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
+ id: str
+ status: str
+ type: Literal["web_search_call"] = "web_search_call"
+
+
+OpenAIResponseOutput = Annotated[
+ Union[
+ OpenAIResponseOutputMessage,
+ OpenAIResponseOutputMessageWebSearchToolCall,
+ ],
+ Field(discriminator="type"),
+]
+register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
+
+
+@json_schema_type
+class OpenAIResponseObject(BaseModel):
+ created_at: int
+ error: Optional[OpenAIResponseError] = None
+ id: str
+ model: str
+ object: Literal["response"] = "response"
+ output: List[OpenAIResponseOutput]
+ parallel_tool_calls: bool = False
+ previous_response_id: Optional[str] = None
+ status: str
+ temperature: Optional[float] = None
+ top_p: Optional[float] = None
+ truncation: Optional[str] = None
+ user: Optional[str] = None
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseCreated(BaseModel):
+ response: OpenAIResponseObject
+ type: Literal["response.created"] = "response.created"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
+ response: OpenAIResponseObject
+ type: Literal["response.completed"] = "response.completed"
+
+
+OpenAIResponseObjectStream = Annotated[
+ Union[
+ OpenAIResponseObjectStreamResponseCreated,
+ OpenAIResponseObjectStreamResponseCompleted,
+ ],
+ Field(discriminator="type"),
+]
+register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
+
+
+@json_schema_type
+class OpenAIResponseInputMessageContentText(BaseModel):
+ text: str
+ type: Literal["input_text"] = "input_text"
+
+
+@json_schema_type
+class OpenAIResponseInputMessageContentImage(BaseModel):
+ detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
+ type: Literal["input_image"] = "input_image"
+ # TODO: handle file_id
+ image_url: Optional[str] = None
+
+
+# TODO: handle file content types
+OpenAIResponseInputMessageContent = Annotated[
+ Union[OpenAIResponseInputMessageContentText, OpenAIResponseInputMessageContentImage],
+ Field(discriminator="type"),
+]
+register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
+
+
+@json_schema_type
+class OpenAIResponseInputMessage(BaseModel):
+ content: Union[str, List[OpenAIResponseInputMessageContent]]
+ role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
+ type: Optional[Literal["message"]] = "message"
+
+
+@json_schema_type
+class OpenAIResponseInputToolWebSearch(BaseModel):
+ type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
+ # TODO: actually use search_context_size somewhere...
+ search_context_size: Optional[str] = Field(default="medium", pattern="^low|medium|high$")
+ # TODO: add user_location
+
+
+OpenAIResponseInputTool = Annotated[
+ Union[OpenAIResponseInputToolWebSearch,],
+ Field(discriminator="type"),
+]
+register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 656178773..38aa6fd97 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -23,6 +23,9 @@ from llama_stack.apis.agents import (
Document,
ListAgentSessionsResponse,
ListAgentsResponse,
+ OpenAIResponseInputMessage,
+ OpenAIResponseInputTool,
+ OpenAIResponseObject,
Session,
Turn,
)
@@ -40,6 +43,7 @@ from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_imp
from .agent_instance import ChatAgent
from .config import MetaReferenceAgentsImplConfig
+from .openai_responses import OpenAIResponsesImpl
logger = logging.getLogger()
logger.setLevel(logging.INFO)
@@ -63,9 +67,16 @@ class MetaReferenceAgentsImpl(Agents):
self.tool_groups_api = tool_groups_api
self.in_memory_store = InmemoryKVStoreImpl()
+ self.openai_responses_impl = None
async def initialize(self) -> None:
self.persistence_store = await kvstore_impl(self.config.persistence_store)
+ self.openai_responses_impl = OpenAIResponsesImpl(
+ self.persistence_store,
+ inference_api=self.inference_api,
+ tool_groups_api=self.tool_groups_api,
+ tool_runtime_api=self.tool_runtime_api,
+ )
# check if "bwrap" is available
if not shutil.which("bwrap"):
@@ -244,3 +255,23 @@ class MetaReferenceAgentsImpl(Agents):
agent_id: str,
) -> ListAgentSessionsResponse:
pass
+
+ # OpenAI responses
+ async def get_openai_response(
+ self,
+ id: str,
+ ) -> OpenAIResponseObject:
+ return await self.openai_responses_impl.get_openai_response(id)
+
+ async def create_openai_response(
+ self,
+ input: Union[str, List[OpenAIResponseInputMessage]],
+ model: str,
+ previous_response_id: Optional[str] = None,
+ store: Optional[bool] = True,
+ stream: Optional[bool] = False,
+ tools: Optional[List[OpenAIResponseInputTool]] = None,
+ ) -> OpenAIResponseObject:
+ return await self.openai_responses_impl.create_openai_response(
+ input, model, previous_response_id, store, stream, tools
+ )
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
new file mode 100644
index 000000000..0690a15fe
--- /dev/null
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -0,0 +1,319 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+import uuid
+from typing import AsyncIterator, List, Optional, Union, cast
+
+from openai.types.chat import ChatCompletionToolParam
+
+from llama_stack.apis.agents.openai_responses import (
+ OpenAIResponseInputMessage,
+ OpenAIResponseInputMessageContentImage,
+ OpenAIResponseInputMessageContentText,
+ OpenAIResponseInputTool,
+ OpenAIResponseObject,
+ OpenAIResponseObjectStream,
+ OpenAIResponseObjectStreamResponseCompleted,
+ OpenAIResponseObjectStreamResponseCreated,
+ OpenAIResponseOutput,
+ OpenAIResponseOutputMessage,
+ OpenAIResponseOutputMessageContentOutputText,
+ OpenAIResponseOutputMessageWebSearchToolCall,
+)
+from llama_stack.apis.inference.inference import (
+ Inference,
+ OpenAIAssistantMessageParam,
+ OpenAIChatCompletion,
+ OpenAIChatCompletionContentPartImageParam,
+ OpenAIChatCompletionContentPartParam,
+ OpenAIChatCompletionContentPartTextParam,
+ OpenAIChatCompletionToolCallFunction,
+ OpenAIChoice,
+ OpenAIImageURL,
+ OpenAIMessageParam,
+ OpenAIToolMessageParam,
+ OpenAIUserMessageParam,
+)
+from llama_stack.apis.tools.tools import ToolGroups, ToolInvocationResult, ToolRuntime
+from llama_stack.log import get_logger
+from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
+from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
+from llama_stack.providers.utils.kvstore import KVStore
+
+logger = get_logger(name=__name__, category="openai_responses")
+
+OPENAI_RESPONSES_PREFIX = "openai_responses:"
+
+
+async def _previous_response_to_messages(previous_response: OpenAIResponseObject) -> List[OpenAIMessageParam]:
+ messages: List[OpenAIMessageParam] = []
+ for output_message in previous_response.output:
+ if isinstance(output_message, OpenAIResponseOutputMessage):
+ messages.append(OpenAIAssistantMessageParam(content=output_message.content[0].text))
+ return messages
+
+
+async def _openai_choices_to_output_messages(choices: List[OpenAIChoice]) -> List[OpenAIResponseOutputMessage]:
+ output_messages = []
+ for choice in choices:
+ output_content = ""
+ if isinstance(choice.message.content, str):
+ output_content = choice.message.content
+ elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam):
+ output_content = choice.message.content.text
+ # TODO: handle image content
+ output_messages.append(
+ OpenAIResponseOutputMessage(
+ id=f"msg_{uuid.uuid4()}",
+ content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
+ status="completed",
+ )
+ )
+ return output_messages
+
+
+class OpenAIResponsesImpl:
+ def __init__(
+ self,
+ persistence_store: KVStore,
+ inference_api: Inference,
+ tool_groups_api: ToolGroups,
+ tool_runtime_api: ToolRuntime,
+ ):
+ self.persistence_store = persistence_store
+ self.inference_api = inference_api
+ self.tool_groups_api = tool_groups_api
+ self.tool_runtime_api = tool_runtime_api
+
+ async def get_openai_response(
+ self,
+ id: str,
+ ) -> OpenAIResponseObject:
+ key = f"{OPENAI_RESPONSES_PREFIX}{id}"
+ response_json = await self.persistence_store.get(key=key)
+ if response_json is None:
+ raise ValueError(f"OpenAI response with id '{id}' not found")
+ return OpenAIResponseObject.model_validate_json(response_json)
+
+ async def create_openai_response(
+ self,
+ input: Union[str, List[OpenAIResponseInputMessage]],
+ model: str,
+ previous_response_id: Optional[str] = None,
+ store: Optional[bool] = True,
+ stream: Optional[bool] = False,
+ tools: Optional[List[OpenAIResponseInputTool]] = None,
+ ):
+ stream = False if stream is None else stream
+
+ messages: List[OpenAIMessageParam] = []
+ if previous_response_id:
+ previous_response = await self.get_openai_response(previous_response_id)
+ messages.extend(await _previous_response_to_messages(previous_response))
+ # TODO: refactor this user_content parsing out into a separate method
+ user_content: Union[str, List[OpenAIChatCompletionContentPartParam]] = ""
+ if isinstance(input, list):
+ user_content = []
+ for user_input in input:
+ if isinstance(user_input.content, list):
+ for user_input_content in user_input.content:
+ if isinstance(user_input_content, OpenAIResponseInputMessageContentText):
+ user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input_content.text))
+ elif isinstance(user_input_content, OpenAIResponseInputMessageContentImage):
+ if user_input_content.image_url:
+ image_url = OpenAIImageURL(
+ url=user_input_content.image_url, detail=user_input_content.detail
+ )
+ user_content.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
+ else:
+ user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input.content))
+ else:
+ user_content = input
+ messages.append(OpenAIUserMessageParam(content=user_content))
+
+ chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None
+ chat_response = await self.inference_api.openai_chat_completion(
+ model=model,
+ messages=messages,
+ tools=chat_tools,
+ stream=stream,
+ )
+
+ if stream:
+ # TODO: refactor this into a separate method that handles streaming
+ chat_response_id = ""
+ chat_response_content = []
+ # TODO: these chunk_ fields are hacky and only take the last chunk into account
+ chunk_created = 0
+ chunk_model = ""
+ chunk_finish_reason = ""
+ async for chunk in chat_response:
+ chat_response_id = chunk.id
+ chunk_created = chunk.created
+ chunk_model = chunk.model
+ for chunk_choice in chunk.choices:
+ # TODO: this only works for text content
+ chat_response_content.append(chunk_choice.delta.content or "")
+ if chunk_choice.finish_reason:
+ chunk_finish_reason = chunk_choice.finish_reason
+ assistant_message = OpenAIAssistantMessageParam(content="".join(chat_response_content))
+ chat_response = OpenAIChatCompletion(
+ id=chat_response_id,
+ choices=[
+ OpenAIChoice(
+ message=assistant_message,
+ finish_reason=chunk_finish_reason,
+ index=0,
+ )
+ ],
+ created=chunk_created,
+ model=chunk_model,
+ )
+ else:
+ # dump and reload to map to our pydantic types
+ chat_response = OpenAIChatCompletion(**chat_response.model_dump())
+
+ output_messages: List[OpenAIResponseOutput] = []
+ if chat_response.choices[0].message.tool_calls:
+ output_messages.extend(
+ await self._execute_tool_and_return_final_output(model, stream, chat_response, messages)
+ )
+ else:
+ output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices))
+ response = OpenAIResponseObject(
+ created_at=chat_response.created,
+ id=f"resp-{uuid.uuid4()}",
+ model=model,
+ object="response",
+ status="completed",
+ output=output_messages,
+ )
+
+ if store:
+ # Store in kvstore
+ key = f"{OPENAI_RESPONSES_PREFIX}{response.id}"
+ await self.persistence_store.set(
+ key=key,
+ value=response.model_dump_json(),
+ )
+
+ if stream:
+
+ async def async_response() -> AsyncIterator[OpenAIResponseObjectStream]:
+ # TODO: response created should actually get emitted much earlier in the process
+ yield OpenAIResponseObjectStreamResponseCreated(response=response)
+ yield OpenAIResponseObjectStreamResponseCompleted(response=response)
+
+ return async_response()
+
+ return response
+
+ async def _convert_response_tools_to_chat_tools(
+ self, tools: List[OpenAIResponseInputTool]
+ ) -> List[ChatCompletionToolParam]:
+ chat_tools: List[ChatCompletionToolParam] = []
+ for input_tool in tools:
+ # TODO: Handle other tool types
+ if input_tool.type == "web_search":
+ tool_name = "web_search"
+ tool = await self.tool_groups_api.get_tool(tool_name)
+ tool_def = ToolDefinition(
+ tool_name=tool_name,
+ description=tool.description,
+ parameters={
+ param.name: ToolParamDefinition(
+ param_type=param.parameter_type,
+ description=param.description,
+ required=param.required,
+ default=param.default,
+ )
+ for param in tool.parameters
+ },
+ )
+ chat_tool = convert_tooldef_to_openai_tool(tool_def)
+ chat_tools.append(chat_tool)
+ else:
+ raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}")
+ return chat_tools
+
+ async def _execute_tool_and_return_final_output(
+ self, model_id: str, stream: bool, chat_response: OpenAIChatCompletion, messages: List[OpenAIMessageParam]
+ ) -> List[OpenAIResponseOutput]:
+ output_messages: List[OpenAIResponseOutput] = []
+ choice = chat_response.choices[0]
+
+ # If the choice is not an assistant message, we don't need to execute any tools
+ if not isinstance(choice.message, OpenAIAssistantMessageParam):
+ return output_messages
+
+ # If the assistant message doesn't have any tool calls, we don't need to execute any tools
+ if not choice.message.tool_calls:
+ return output_messages
+
+ # Add the assistant message with tool_calls response to the messages list
+ messages.append(choice.message)
+
+ for tool_call in choice.message.tool_calls:
+ tool_call_id = tool_call.id
+ function = tool_call.function
+
+ # If for some reason the tool call doesn't have a function or id, we can't execute it
+ if not function or not tool_call_id:
+ continue
+
+ # TODO: telemetry spans for tool calls
+ result = await self._execute_tool_call(function)
+
+ # Handle tool call failure
+ if not result:
+ output_messages.append(
+ OpenAIResponseOutputMessageWebSearchToolCall(
+ id=tool_call_id,
+ status="failed",
+ )
+ )
+ continue
+
+ output_messages.append(
+ OpenAIResponseOutputMessageWebSearchToolCall(
+ id=tool_call_id,
+ status="completed",
+ ),
+ )
+
+ result_content = ""
+ # TODO: handle other result content types and lists
+ if isinstance(result.content, str):
+ result_content = result.content
+ messages.append(OpenAIToolMessageParam(content=result_content, tool_call_id=tool_call_id))
+
+ tool_results_chat_response = await self.inference_api.openai_chat_completion(
+ model=model_id,
+ messages=messages,
+ stream=stream,
+ )
+ # type cast to appease mypy
+ tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response)
+ tool_final_outputs = await _openai_choices_to_output_messages(tool_results_chat_response.choices)
+ # TODO: Wire in annotations with URLs, titles, etc to these output messages
+ output_messages.extend(tool_final_outputs)
+ return output_messages
+
+ async def _execute_tool_call(
+ self,
+ function: OpenAIChatCompletionToolCallFunction,
+ ) -> Optional[ToolInvocationResult]:
+ if not function.name:
+ return None
+ function_args = json.loads(function.arguments) if function.arguments else {}
+ logger.info(f"executing tool call: {function.name} with args: {function_args}")
+ result = await self.tool_runtime_api.invoke_tool(
+ tool_name=function.name,
+ kwargs=function_args,
+ )
+ logger.debug(f"tool call {function.name} completed with result: {result}")
+ return result
diff --git a/llama_stack/strong_typing/schema.py b/llama_stack/strong_typing/schema.py
index 0f5121906..1427c22e6 100644
--- a/llama_stack/strong_typing/schema.py
+++ b/llama_stack/strong_typing/schema.py
@@ -478,6 +478,8 @@ class JsonSchemaGenerator:
}
return ret
elif origin_type is Literal:
+ if len(typing.get_args(typ)) != 1:
+ raise ValueError(f"Literal type {typ} has {len(typing.get_args(typ))} arguments")
(literal_value,) = typing.get_args(typ) # unpack value of literal type
schema = self.type_to_schema(type(literal_value))
schema["const"] = literal_value
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 1878c9e88..809a00897 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -14,6 +14,7 @@ from pathlib import Path
import pytest
import yaml
from llama_stack_client import LlamaStackClient
+from openai import OpenAI
from llama_stack import LlamaStackAsLibraryClient
from llama_stack.apis.datatypes import Api
@@ -207,3 +208,9 @@ def llama_stack_client(request, provider_data, text_model_id):
raise RuntimeError("Initialization failed")
return client
+
+
+@pytest.fixture(scope="session")
+def openai_client(client_with_models):
+ base_url = f"{client_with_models.base_url}/v1/openai/v1"
+ return OpenAI(base_url=base_url, api_key="fake")
diff --git a/tests/integration/test_cases/openai/responses.json b/tests/integration/test_cases/openai/responses.json
new file mode 100644
index 000000000..d17d0cd4f
--- /dev/null
+++ b/tests/integration/test_cases/openai/responses.json
@@ -0,0 +1,37 @@
+{
+ "non_streaming_01": {
+ "data": {
+ "question": "Which planet do humans live on?",
+ "expected": "Earth"
+ }
+ },
+ "non_streaming_02": {
+ "data": {
+ "question": "Which planet has rings around it with a name starting with letter S?",
+ "expected": "Saturn"
+ }
+ },
+ "streaming_01": {
+ "data": {
+ "question": "What's the name of the Sun in latin?",
+ "expected": "Sol"
+ }
+ },
+ "streaming_02": {
+ "data": {
+ "question": "What is the name of the US captial?",
+ "expected": "Washington"
+ }
+ },
+ "tools_web_search_01": {
+ "data": {
+ "input": "How many experts does the Llama 4 Maverick model have?",
+ "tools": [
+ {
+ "type": "web_search"
+ }
+ ],
+ "expected": "128"
+ }
+ }
+}
diff --git a/tests/integration/test_cases/test_case.py b/tests/integration/test_cases/test_case.py
index 8514f3046..2a3c73310 100644
--- a/tests/integration/test_cases/test_case.py
+++ b/tests/integration/test_cases/test_case.py
@@ -12,6 +12,7 @@ class TestCase:
_apis = [
"inference/chat_completion",
"inference/completion",
+ "openai/responses",
]
_jsonblob = {}
diff --git a/tests/verifications/conf/fireworks-llama-stack.yaml b/tests/verifications/conf/fireworks-llama-stack.yaml
index fc78a1377..dffd7c739 100644
--- a/tests/verifications/conf/fireworks-llama-stack.yaml
+++ b/tests/verifications/conf/fireworks-llama-stack.yaml
@@ -13,3 +13,5 @@ test_exclusions:
- test_chat_non_streaming_image
- test_chat_streaming_image
- test_chat_multi_turn_multiple_images
+ - test_response_non_streaming_image
+ - test_response_non_streaming_multi_turn_image
diff --git a/tests/verifications/conf/groq-llama-stack.yaml b/tests/verifications/conf/groq-llama-stack.yaml
index 6958bafc5..786b79c24 100644
--- a/tests/verifications/conf/groq-llama-stack.yaml
+++ b/tests/verifications/conf/groq-llama-stack.yaml
@@ -13,3 +13,5 @@ test_exclusions:
- test_chat_non_streaming_image
- test_chat_streaming_image
- test_chat_multi_turn_multiple_images
+ - test_response_non_streaming_image
+ - test_response_non_streaming_multi_turn_image
diff --git a/tests/verifications/conf/together-llama-stack.yaml b/tests/verifications/conf/together-llama-stack.yaml
index 719e2d776..58cbcfa93 100644
--- a/tests/verifications/conf/together-llama-stack.yaml
+++ b/tests/verifications/conf/together-llama-stack.yaml
@@ -13,3 +13,5 @@ test_exclusions:
- test_chat_non_streaming_image
- test_chat_streaming_image
- test_chat_multi_turn_multiple_images
+ - test_response_non_streaming_image
+ - test_response_non_streaming_multi_turn_image
diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py
index f0894bfce..bdaea3ebf 100755
--- a/tests/verifications/generate_report.py
+++ b/tests/verifications/generate_report.py
@@ -16,7 +16,7 @@ Description:
Configuration:
- - Provider details (models, display names) are loaded from `tests/verifications/config.yaml`.
+ - Provider details (models, display names) are loaded from `tests/verifications/conf/*.yaml`.
- Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`.
- Test results are stored in `tests/verifications/test_results/`.
diff --git a/tests/verifications/openai-api-verification-run.yaml b/tests/verifications/openai-api-verification-run.yaml
index 71885d058..04675577d 100644
--- a/tests/verifications/openai-api-verification-run.yaml
+++ b/tests/verifications/openai-api-verification-run.yaml
@@ -1,10 +1,15 @@
+# This is a temporary run file because model names used by the verification tests
+# are not quite consistent with various pre-existing distributions.
+#
version: '2'
image_name: openai-api-verification
apis:
+- agents
- inference
- telemetry
- tool_runtime
- vector_io
+- safety
providers:
inference:
- provider_id: together
@@ -16,12 +21,12 @@ providers:
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
- api_key: ${env.FIREWORKS_API_KEY}
+ api_key: ${env.FIREWORKS_API_KEY:}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
- api_key: ${env.GROQ_API_KEY}
+ api_key: ${env.GROQ_API_KEY:}
- provider_id: openai
provider_type: remote::openai
config:
@@ -45,6 +50,19 @@ providers:
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/openai/trace_store.db}
+ safety:
+ - provider_id: llama-guard
+ provider_type: inline::llama-guard
+ config:
+ excluded_categories: []
+ agents:
+ - provider_id: meta-reference
+ provider_type: inline::meta-reference
+ config:
+ persistence_store:
+ type: sqlite
+ namespace: null
+ db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/agents_store.db
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
diff --git a/tests/verifications/openai_api/conftest.py b/tests/verifications/openai_api/conftest.py
new file mode 100644
index 000000000..7b4c92f1c
--- /dev/null
+++ b/tests/verifications/openai_api/conftest.py
@@ -0,0 +1,35 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
+
+
+def pytest_generate_tests(metafunc):
+ """Dynamically parametrize tests based on the selected provider and config."""
+ if "model" in metafunc.fixturenames:
+ provider = metafunc.config.getoption("provider")
+ if not provider:
+ print("Warning: --provider not specified. Skipping model parametrization.")
+ metafunc.parametrize("model", [])
+ return
+
+ try:
+ config_data = _load_all_verification_configs()
+ except (FileNotFoundError, IOError) as e:
+ print(f"ERROR loading verification configs: {e}")
+ config_data = {"providers": {}}
+
+ provider_config = config_data.get("providers", {}).get(provider)
+ if provider_config:
+ models = provider_config.get("models", [])
+ if models:
+ metafunc.parametrize("model", models)
+ else:
+ print(f"Warning: No models found for provider '{provider}' in config.")
+ metafunc.parametrize("model", []) # Parametrize empty if no models found
+ else:
+ print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
+ metafunc.parametrize("model", []) # Parametrize empty if provider not found
diff --git a/tests/verifications/openai_api/fixtures/fixtures.py b/tests/verifications/openai_api/fixtures/fixtures.py
index 940b99b2a..2ea73cf26 100644
--- a/tests/verifications/openai_api/fixtures/fixtures.py
+++ b/tests/verifications/openai_api/fixtures/fixtures.py
@@ -5,14 +5,16 @@
# the root directory of this source tree.
import os
+import re
from pathlib import Path
import pytest
import yaml
from openai import OpenAI
+# --- Helper Functions ---
+
-# --- Helper Function to Load Config ---
def _load_all_verification_configs():
"""Load and aggregate verification configs from the conf/ directory."""
# Note: Path is relative to *this* file (fixtures.py)
@@ -44,7 +46,30 @@ def _load_all_verification_configs():
return {"providers": all_provider_configs}
-# --- End Helper Function ---
+def case_id_generator(case):
+ """Generate a test ID from the case's 'case_id' field, or use a default."""
+ case_id = case.get("case_id")
+ if isinstance(case_id, (str, int)):
+ return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
+ return None
+
+
+def should_skip_test(verification_config, provider, model, test_name_base):
+ """Check if a test should be skipped based on config exclusions."""
+ provider_config = verification_config.get("providers", {}).get(provider)
+ if not provider_config:
+ return False # No config for provider, don't skip
+
+ exclusions = provider_config.get("test_exclusions", {}).get(model, [])
+ return test_name_base in exclusions
+
+
+# Helper to get the base test name from the request object
+def get_base_test_name(request):
+ return request.node.originalname
+
+
+# --- End Helper Functions ---
@pytest.fixture(scope="session")
diff --git a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
new file mode 100644
index 000000000..f235b2ea8
--- /dev/null
+++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
@@ -0,0 +1,65 @@
+test_response_basic:
+ test_name: test_response_basic
+ test_params:
+ case:
+ - case_id: "earth"
+ input: "Which planet do humans live on?"
+ output: "earth"
+ - case_id: "saturn"
+ input: "Which planet has rings around it with a name starting with letter S?"
+ output: "saturn"
+
+test_response_multi_turn:
+ test_name: test_response_multi_turn
+ test_params:
+ case:
+ - case_id: "earth"
+ turns:
+ - input: "Which planet do humans live on?"
+ output: "earth"
+ - input: "What is the name of the planet from your previous response?"
+ output: "earth"
+
+test_response_web_search:
+ test_name: test_response_web_search
+ test_params:
+ case:
+ - case_id: "llama_experts"
+ input: "How many experts does the Llama 4 Maverick model have?"
+ tools:
+ - type: web_search
+ search_context_size: "low"
+ output: "128"
+
+test_response_image:
+ test_name: test_response_image
+ test_params:
+ case:
+ - case_id: "llama_image"
+ input:
+ - role: user
+ content:
+ - type: input_text
+ text: "Identify the type of animal in this image."
+ - type: input_image
+ image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
+ output: "llama"
+
+test_response_multi_turn_image:
+ test_name: test_response_multi_turn_image
+ test_params:
+ case:
+ - case_id: "llama_image_search"
+ turns:
+ - input:
+ - role: user
+ content:
+ - type: input_text
+ text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
+ - type: input_image
+ image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
+ output: "llama"
+ - input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick' and 'scout'."
+ tools:
+ - type: web_search
+ output: "model"
diff --git a/tests/verifications/openai_api/test_chat_completion.py b/tests/verifications/openai_api/test_chat_completion.py
index 277eaafa3..64e49d352 100644
--- a/tests/verifications/openai_api/test_chat_completion.py
+++ b/tests/verifications/openai_api/test_chat_completion.py
@@ -7,7 +7,6 @@
import base64
import copy
import json
-import re
from pathlib import Path
from typing import Any
@@ -16,7 +15,9 @@ from openai import APIError
from pydantic import BaseModel
from tests.verifications.openai_api.fixtures.fixtures import (
- _load_all_verification_configs,
+ case_id_generator,
+ get_base_test_name,
+ should_skip_test,
)
from tests.verifications.openai_api.fixtures.load import load_test_cases
@@ -25,57 +26,6 @@ chat_completion_test_cases = load_test_cases("chat_completion")
THIS_DIR = Path(__file__).parent
-def case_id_generator(case):
- """Generate a test ID from the case's 'case_id' field, or use a default."""
- case_id = case.get("case_id")
- if isinstance(case_id, (str, int)):
- return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
- return None
-
-
-def pytest_generate_tests(metafunc):
- """Dynamically parametrize tests based on the selected provider and config."""
- if "model" in metafunc.fixturenames:
- provider = metafunc.config.getoption("provider")
- if not provider:
- print("Warning: --provider not specified. Skipping model parametrization.")
- metafunc.parametrize("model", [])
- return
-
- try:
- config_data = _load_all_verification_configs()
- except (FileNotFoundError, IOError) as e:
- print(f"ERROR loading verification configs: {e}")
- config_data = {"providers": {}}
-
- provider_config = config_data.get("providers", {}).get(provider)
- if provider_config:
- models = provider_config.get("models", [])
- if models:
- metafunc.parametrize("model", models)
- else:
- print(f"Warning: No models found for provider '{provider}' in config.")
- metafunc.parametrize("model", []) # Parametrize empty if no models found
- else:
- print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
- metafunc.parametrize("model", []) # Parametrize empty if provider not found
-
-
-def should_skip_test(verification_config, provider, model, test_name_base):
- """Check if a test should be skipped based on config exclusions."""
- provider_config = verification_config.get("providers", {}).get(provider)
- if not provider_config:
- return False # No config for provider, don't skip
-
- exclusions = provider_config.get("test_exclusions", {}).get(model, [])
- return test_name_base in exclusions
-
-
-# Helper to get the base test name from the request object
-def get_base_test_name(request):
- return request.node.originalname
-
-
@pytest.fixture
def multi_image_data():
files = [
diff --git a/tests/verifications/openai_api/test_responses.py b/tests/verifications/openai_api/test_responses.py
new file mode 100644
index 000000000..cc7ec320c
--- /dev/null
+++ b/tests/verifications/openai_api/test_responses.py
@@ -0,0 +1,166 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import pytest
+
+from tests.verifications.openai_api.fixtures.fixtures import (
+ case_id_generator,
+ get_base_test_name,
+ should_skip_test,
+)
+from tests.verifications.openai_api.fixtures.load import load_test_cases
+
+responses_test_cases = load_test_cases("responses")
+
+
+@pytest.mark.parametrize(
+ "case",
+ responses_test_cases["test_response_basic"]["test_params"]["case"],
+ ids=case_id_generator,
+)
+def test_response_non_streaming_basic(request, openai_client, model, provider, verification_config, case):
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ response = openai_client.responses.create(
+ model=model,
+ input=case["input"],
+ stream=False,
+ )
+ output_text = response.output_text.lower().strip()
+ assert len(output_text) > 0
+ assert case["output"].lower() in output_text
+
+ retrieved_response = openai_client.responses.retrieve(response_id=response.id)
+ assert retrieved_response.output_text == response.output_text
+
+ next_response = openai_client.responses.create(
+ model=model, input="Repeat your previous response in all caps.", previous_response_id=response.id
+ )
+ next_output_text = next_response.output_text.strip()
+ assert case["output"].upper() in next_output_text
+
+
+@pytest.mark.parametrize(
+ "case",
+ responses_test_cases["test_response_basic"]["test_params"]["case"],
+ ids=case_id_generator,
+)
+def test_response_streaming_basic(request, openai_client, model, provider, verification_config, case):
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ response = openai_client.responses.create(
+ model=model,
+ input=case["input"],
+ stream=True,
+ )
+ streamed_content = []
+ response_id = ""
+ for chunk in response:
+ if chunk.type == "response.completed":
+ response_id = chunk.response.id
+ streamed_content.append(chunk.response.output_text.strip())
+
+ assert len(streamed_content) > 0
+ assert case["output"].lower() in "".join(streamed_content).lower()
+
+ retrieved_response = openai_client.responses.retrieve(response_id=response_id)
+ assert retrieved_response.output_text == "".join(streamed_content)
+
+
+@pytest.mark.parametrize(
+ "case",
+ responses_test_cases["test_response_multi_turn"]["test_params"]["case"],
+ ids=case_id_generator,
+)
+def test_response_non_streaming_multi_turn(request, openai_client, model, provider, verification_config, case):
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ previous_response_id = None
+ for turn in case["turns"]:
+ response = openai_client.responses.create(
+ model=model,
+ input=turn["input"],
+ previous_response_id=previous_response_id,
+ tools=turn["tools"] if "tools" in turn else None,
+ )
+ previous_response_id = response.id
+ output_text = response.output_text.lower()
+ assert turn["output"].lower() in output_text
+
+
+@pytest.mark.parametrize(
+ "case",
+ responses_test_cases["test_response_web_search"]["test_params"]["case"],
+ ids=case_id_generator,
+)
+def test_response_non_streaming_web_search(request, openai_client, model, provider, verification_config, case):
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ response = openai_client.responses.create(
+ model=model,
+ input=case["input"],
+ tools=case["tools"],
+ stream=False,
+ )
+ assert len(response.output) > 1
+ assert response.output[0].type == "web_search_call"
+ assert response.output[0].status == "completed"
+ assert response.output[1].type == "message"
+ assert response.output[1].status == "completed"
+ assert response.output[1].role == "assistant"
+ assert len(response.output[1].content) > 0
+ assert case["output"].lower() in response.output_text.lower().strip()
+
+
+@pytest.mark.parametrize(
+ "case",
+ responses_test_cases["test_response_image"]["test_params"]["case"],
+ ids=case_id_generator,
+)
+def test_response_non_streaming_image(request, openai_client, model, provider, verification_config, case):
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ response = openai_client.responses.create(
+ model=model,
+ input=case["input"],
+ stream=False,
+ )
+ output_text = response.output_text.lower()
+ assert case["output"].lower() in output_text
+
+
+@pytest.mark.parametrize(
+ "case",
+ responses_test_cases["test_response_multi_turn_image"]["test_params"]["case"],
+ ids=case_id_generator,
+)
+def test_response_non_streaming_multi_turn_image(request, openai_client, model, provider, verification_config, case):
+ test_name_base = get_base_test_name(request)
+ if should_skip_test(verification_config, provider, model, test_name_base):
+ pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+ previous_response_id = None
+ for turn in case["turns"]:
+ response = openai_client.responses.create(
+ model=model,
+ input=turn["input"],
+ previous_response_id=previous_response_id,
+ tools=turn["tools"] if "tools" in turn else None,
+ )
+ previous_response_id = response.id
+ output_text = response.output_text.lower()
+ assert turn["output"].lower() in output_text