feat: OpenAI Responses API (#1989)

# What does this PR do?

This provides an initial [OpenAI Responses
API](https://platform.openai.com/docs/api-reference/responses)
implementation. The API is not yet complete, and this is more a
proof-of-concept to show how we can store responses in our key-value
stores and use them to support the Responses API concepts like
`previous_response_id`.

## Test Plan

I've added a new
`tests/integration/openai_responses/test_openai_responses.py` as part of
a test-driven development for this new API. I'm only testing this
locally with the remote-vllm provider for now, but it should work with
any of our inference providers since the only API it requires out of the
inference provider is the `openai_chat_completion` endpoint.

```
VLLM_URL="http://localhost:8000/v1" \
INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" \
llama stack build --template remote-vllm --image-type venv --run
```

```
LLAMA_STACK_CONFIG="http://localhost:8321" \
python -m pytest -v \
  tests/integration/openai_responses/test_openai_responses.py \
  --text-model "meta-llama/Llama-3.2-3B-Instruct"
 ```

---------

Signed-off-by: Ben Browning <bbrownin@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
Ben Browning 2025-04-28 17:06:00 -04:00 committed by GitHub
parent 79851d93aa
commit 8dfce2f596
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1766 additions and 59 deletions

View file

@ -497,6 +497,54 @@
}
}
},
"/v1/openai/v1/responses": {
"post": {
"responses": {
"200": {
"description": "Runtime representation of an annotated type.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenAIResponseObject"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/OpenAIResponseObjectStream"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "Create a new OpenAI response.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateOpenaiResponseRequest"
}
}
},
"required": true
}
}
},
"/v1/files": {
"get": {
"responses": {
@ -1278,6 +1326,49 @@
]
}
},
"/v1/openai/v1/responses/{id}": {
"get": {
"responses": {
"200": {
"description": "An OpenAIResponseObject.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenAIResponseObject"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "Retrieve an OpenAI response by its ID.",
"parameters": [
{
"name": "id",
"in": "path",
"description": "The ID of the OpenAI response to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/scoring-functions/{scoring_fn_id}": {
"get": {
"responses": {
@ -6192,6 +6283,427 @@
],
"title": "AgentTurnResponseTurnStartPayload"
},
"OpenAIResponseInputMessage": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
}
}
]
},
"role": {
"oneOf": [
{
"type": "string",
"const": "system"
},
{
"type": "string",
"const": "developer"
},
{
"type": "string",
"const": "user"
},
{
"type": "string",
"const": "assistant"
}
]
},
"type": {
"type": "string",
"const": "message",
"default": "message"
}
},
"additionalProperties": false,
"required": [
"content",
"role"
],
"title": "OpenAIResponseInputMessage"
},
"OpenAIResponseInputMessageContent": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
}
}
},
"OpenAIResponseInputMessageContentImage": {
"type": "object",
"properties": {
"detail": {
"oneOf": [
{
"type": "string",
"const": "low"
},
{
"type": "string",
"const": "high"
},
{
"type": "string",
"const": "auto"
}
],
"default": "auto"
},
"type": {
"type": "string",
"const": "input_image",
"default": "input_image"
},
"image_url": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"detail",
"type"
],
"title": "OpenAIResponseInputMessageContentImage"
},
"OpenAIResponseInputMessageContentText": {
"type": "object",
"properties": {
"text": {
"type": "string"
},
"type": {
"type": "string",
"const": "input_text",
"default": "input_text"
}
},
"additionalProperties": false,
"required": [
"text",
"type"
],
"title": "OpenAIResponseInputMessageContentText"
},
"OpenAIResponseInputTool": {
"type": "object",
"properties": {
"type": {
"oneOf": [
{
"type": "string",
"const": "web_search"
},
{
"type": "string",
"const": "web_search_preview_2025_03_11"
}
],
"default": "web_search"
},
"search_context_size": {
"type": "string",
"default": "medium"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "OpenAIResponseInputToolWebSearch"
},
"CreateOpenaiResponseRequest": {
"type": "object",
"properties": {
"input": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputMessage"
}
}
],
"description": "Input message(s) to create the response."
},
"model": {
"type": "string",
"description": "The underlying LLM used for completions."
},
"previous_response_id": {
"type": "string",
"description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
},
"store": {
"type": "boolean"
},
"stream": {
"type": "boolean"
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputTool"
}
}
},
"additionalProperties": false,
"required": [
"input",
"model"
],
"title": "CreateOpenaiResponseRequest"
},
"OpenAIResponseError": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"message": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"code",
"message"
],
"title": "OpenAIResponseError"
},
"OpenAIResponseObject": {
"type": "object",
"properties": {
"created_at": {
"type": "integer"
},
"error": {
"$ref": "#/components/schemas/OpenAIResponseError"
},
"id": {
"type": "string"
},
"model": {
"type": "string"
},
"object": {
"type": "string",
"const": "response",
"default": "response"
},
"output": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseOutput"
}
},
"parallel_tool_calls": {
"type": "boolean",
"default": false
},
"previous_response_id": {
"type": "string"
},
"status": {
"type": "string"
},
"temperature": {
"type": "number"
},
"top_p": {
"type": "number"
},
"truncation": {
"type": "string"
},
"user": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"created_at",
"id",
"model",
"object",
"output",
"parallel_tool_calls",
"status"
],
"title": "OpenAIResponseObject"
},
"OpenAIResponseOutput": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessage"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"message": "#/components/schemas/OpenAIResponseOutputMessage",
"web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
}
}
},
"OpenAIResponseOutputMessage": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"content": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
}
},
"role": {
"type": "string",
"const": "assistant",
"default": "assistant"
},
"status": {
"type": "string"
},
"type": {
"type": "string",
"const": "message",
"default": "message"
}
},
"additionalProperties": false,
"required": [
"id",
"content",
"role",
"status",
"type"
],
"title": "OpenAIResponseOutputMessage"
},
"OpenAIResponseOutputMessageContent": {
"type": "object",
"properties": {
"text": {
"type": "string"
},
"type": {
"type": "string",
"const": "output_text",
"default": "output_text"
}
},
"additionalProperties": false,
"required": [
"text",
"type"
],
"title": "OpenAIResponseOutputMessageContentOutputText"
},
"OpenAIResponseOutputMessageWebSearchToolCall": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"status": {
"type": "string"
},
"type": {
"type": "string",
"const": "web_search_call",
"default": "web_search_call"
}
},
"additionalProperties": false,
"required": [
"id",
"status",
"type"
],
"title": "OpenAIResponseOutputMessageWebSearchToolCall"
},
"OpenAIResponseObjectStream": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
},
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
"response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
}
},
"OpenAIResponseObjectStreamResponseCompleted": {
"type": "object",
"properties": {
"response": {
"$ref": "#/components/schemas/OpenAIResponseObject"
},
"type": {
"type": "string",
"const": "response.completed",
"default": "response.completed"
}
},
"additionalProperties": false,
"required": [
"response",
"type"
],
"title": "OpenAIResponseObjectStreamResponseCompleted"
},
"OpenAIResponseObjectStreamResponseCreated": {
"type": "object",
"properties": {
"response": {
"$ref": "#/components/schemas/OpenAIResponseObject"
},
"type": {
"type": "string",
"const": "response.created",
"default": "response.created"
}
},
"additionalProperties": false,
"required": [
"response",
"type"
],
"title": "OpenAIResponseObjectStreamResponseCreated"
},
"CreateUploadSessionRequest": {
"type": "object",
"properties": {

View file

@ -330,6 +330,39 @@ paths:
schema:
$ref: '#/components/schemas/CreateAgentTurnRequest'
required: true
/v1/openai/v1/responses:
post:
responses:
'200':
description: >-
Runtime representation of an annotated type.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIResponseObject'
text/event-stream:
schema:
$ref: '#/components/schemas/OpenAIResponseObjectStream'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: Create a new OpenAI response.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
required: true
/v1/files:
get:
responses:
@ -875,6 +908,36 @@ paths:
required: true
schema:
type: string
/v1/openai/v1/responses/{id}:
get:
responses:
'200':
description: An OpenAIResponseObject.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIResponseObject'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: Retrieve an OpenAI response by its ID.
parameters:
- name: id
in: path
description: >-
The ID of the OpenAI response to retrieve.
required: true
schema:
type: string
/v1/scoring-functions/{scoring_fn_id}:
get:
responses:
@ -4329,6 +4392,293 @@ components:
- event_type
- turn_id
title: AgentTurnResponseTurnStartPayload
OpenAIResponseInputMessage:
type: object
properties:
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
role:
oneOf:
- type: string
const: system
- type: string
const: developer
- type: string
const: user
- type: string
const: assistant
type:
type: string
const: message
default: message
additionalProperties: false
required:
- content
- role
title: OpenAIResponseInputMessage
OpenAIResponseInputMessageContent:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
discriminator:
propertyName: type
mapping:
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
OpenAIResponseInputMessageContentImage:
type: object
properties:
detail:
oneOf:
- type: string
const: low
- type: string
const: high
- type: string
const: auto
default: auto
type:
type: string
const: input_image
default: input_image
image_url:
type: string
additionalProperties: false
required:
- detail
- type
title: OpenAIResponseInputMessageContentImage
OpenAIResponseInputMessageContentText:
type: object
properties:
text:
type: string
type:
type: string
const: input_text
default: input_text
additionalProperties: false
required:
- text
- type
title: OpenAIResponseInputMessageContentText
OpenAIResponseInputTool:
type: object
properties:
type:
oneOf:
- type: string
const: web_search
- type: string
const: web_search_preview_2025_03_11
default: web_search
search_context_size:
type: string
default: medium
additionalProperties: false
required:
- type
title: OpenAIResponseInputToolWebSearch
CreateOpenaiResponseRequest:
type: object
properties:
input:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputMessage'
description: Input message(s) to create the response.
model:
type: string
description: The underlying LLM used for completions.
previous_response_id:
type: string
description: >-
(Optional) if specified, the new response will be a continuation of the
previous response. This can be used to easily fork-off new responses from
existing responses.
store:
type: boolean
stream:
type: boolean
tools:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputTool'
additionalProperties: false
required:
- input
- model
title: CreateOpenaiResponseRequest
OpenAIResponseError:
type: object
properties:
code:
type: string
message:
type: string
additionalProperties: false
required:
- code
- message
title: OpenAIResponseError
OpenAIResponseObject:
type: object
properties:
created_at:
type: integer
error:
$ref: '#/components/schemas/OpenAIResponseError'
id:
type: string
model:
type: string
object:
type: string
const: response
default: response
output:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseOutput'
parallel_tool_calls:
type: boolean
default: false
previous_response_id:
type: string
status:
type: string
temperature:
type: number
top_p:
type: number
truncation:
type: string
user:
type: string
additionalProperties: false
required:
- created_at
- id
- model
- object
- output
- parallel_tool_calls
- status
title: OpenAIResponseObject
OpenAIResponseOutput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessage'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
discriminator:
propertyName: type
mapping:
message: '#/components/schemas/OpenAIResponseOutputMessage'
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
OpenAIResponseOutputMessage:
type: object
properties:
id:
type: string
content:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
role:
type: string
const: assistant
default: assistant
status:
type: string
type:
type: string
const: message
default: message
additionalProperties: false
required:
- id
- content
- role
- status
- type
title: OpenAIResponseOutputMessage
OpenAIResponseOutputMessageContent:
type: object
properties:
text:
type: string
type:
type: string
const: output_text
default: output_text
additionalProperties: false
required:
- text
- type
title: >-
OpenAIResponseOutputMessageContentOutputText
"OpenAIResponseOutputMessageWebSearchToolCall":
type: object
properties:
id:
type: string
status:
type: string
type:
type: string
const: web_search_call
default: web_search_call
additionalProperties: false
required:
- id
- status
- type
title: >-
OpenAIResponseOutputMessageWebSearchToolCall
OpenAIResponseObjectStream:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
discriminator:
propertyName: type
mapping:
response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
"OpenAIResponseObjectStreamResponseCompleted":
type: object
properties:
response:
$ref: '#/components/schemas/OpenAIResponseObject'
type:
type: string
const: response.completed
default: response.completed
additionalProperties: false
required:
- response
- type
title: >-
OpenAIResponseObjectStreamResponseCompleted
"OpenAIResponseObjectStreamResponseCreated":
type: object
properties:
response:
$ref: '#/components/schemas/OpenAIResponseObject'
type:
type: string
const: response.created
default: response.created
additionalProperties: false
required:
- response
- type
title: >-
OpenAIResponseObjectStreamResponseCreated
CreateUploadSessionRequest:
type: object
properties:

View file

@ -179,7 +179,7 @@ class ContentBuilder:
"Creates the content subtree for a request or response."
def is_iterator_type(t):
return "StreamChunk" in str(t)
return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t)
def get_media_type(t):
if is_generic_list(t):

View file

@ -38,6 +38,13 @@ from llama_stack.apis.safety import SafetyViolation
from llama_stack.apis.tools import ToolDef
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from .openai_responses import (
OpenAIResponseInputMessage,
OpenAIResponseInputTool,
OpenAIResponseObject,
OpenAIResponseObjectStream,
)
class Attachment(BaseModel):
"""An attachment to an agent turn.
@ -593,3 +600,39 @@ class Agents(Protocol):
:returns: A ListAgentSessionsResponse.
"""
...
# We situate the OpenAI Responses API in the Agents API just like we did things
# for Inference. The Responses API, in its intent, serves the same purpose as
# the Agents API above -- it is essentially a lightweight "agentic loop" with
# integrated tool calling.
#
# Both of these APIs are inherently stateful.
@webmethod(route="/openai/v1/responses/{id}", method="GET")
async def get_openai_response(
self,
id: str,
) -> OpenAIResponseObject:
"""Retrieve an OpenAI response by its ID.
:param id: The ID of the OpenAI response to retrieve.
:returns: An OpenAIResponseObject.
"""
...
@webmethod(route="/openai/v1/responses", method="POST")
async def create_openai_response(
self,
input: Union[str, List[OpenAIResponseInputMessage]],
model: str,
previous_response_id: Optional[str] = None,
store: Optional[bool] = True,
stream: Optional[bool] = False,
tools: Optional[List[OpenAIResponseInputTool]] = None,
) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]:
"""Create a new OpenAI response.
:param input: Input message(s) to create the response.
:param model: The underlying LLM used for completions.
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
"""

View file

@ -0,0 +1,140 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import List, Literal, Optional, Union
from pydantic import BaseModel, Field
from typing_extensions import Annotated
from llama_stack.schema_utils import json_schema_type, register_schema
@json_schema_type
class OpenAIResponseError(BaseModel):
code: str
message: str
@json_schema_type
class OpenAIResponseOutputMessageContentOutputText(BaseModel):
text: str
type: Literal["output_text"] = "output_text"
OpenAIResponseOutputMessageContent = Annotated[
Union[OpenAIResponseOutputMessageContentOutputText,],
Field(discriminator="type"),
]
register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMessageContent")
@json_schema_type
class OpenAIResponseOutputMessage(BaseModel):
id: str
content: List[OpenAIResponseOutputMessageContent]
role: Literal["assistant"] = "assistant"
status: str
type: Literal["message"] = "message"
@json_schema_type
class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
id: str
status: str
type: Literal["web_search_call"] = "web_search_call"
OpenAIResponseOutput = Annotated[
Union[
OpenAIResponseOutputMessage,
OpenAIResponseOutputMessageWebSearchToolCall,
],
Field(discriminator="type"),
]
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
@json_schema_type
class OpenAIResponseObject(BaseModel):
created_at: int
error: Optional[OpenAIResponseError] = None
id: str
model: str
object: Literal["response"] = "response"
output: List[OpenAIResponseOutput]
parallel_tool_calls: bool = False
previous_response_id: Optional[str] = None
status: str
temperature: Optional[float] = None
top_p: Optional[float] = None
truncation: Optional[str] = None
user: Optional[str] = None
@json_schema_type
class OpenAIResponseObjectStreamResponseCreated(BaseModel):
response: OpenAIResponseObject
type: Literal["response.created"] = "response.created"
@json_schema_type
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
response: OpenAIResponseObject
type: Literal["response.completed"] = "response.completed"
OpenAIResponseObjectStream = Annotated[
Union[
OpenAIResponseObjectStreamResponseCreated,
OpenAIResponseObjectStreamResponseCompleted,
],
Field(discriminator="type"),
]
register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
@json_schema_type
class OpenAIResponseInputMessageContentText(BaseModel):
text: str
type: Literal["input_text"] = "input_text"
@json_schema_type
class OpenAIResponseInputMessageContentImage(BaseModel):
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
type: Literal["input_image"] = "input_image"
# TODO: handle file_id
image_url: Optional[str] = None
# TODO: handle file content types
OpenAIResponseInputMessageContent = Annotated[
Union[OpenAIResponseInputMessageContentText, OpenAIResponseInputMessageContentImage],
Field(discriminator="type"),
]
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
class OpenAIResponseInputMessage(BaseModel):
content: Union[str, List[OpenAIResponseInputMessageContent]]
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
type: Optional[Literal["message"]] = "message"
@json_schema_type
class OpenAIResponseInputToolWebSearch(BaseModel):
type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
# TODO: actually use search_context_size somewhere...
search_context_size: Optional[str] = Field(default="medium", pattern="^low|medium|high$")
# TODO: add user_location
OpenAIResponseInputTool = Annotated[
Union[OpenAIResponseInputToolWebSearch,],
Field(discriminator="type"),
]
register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")

View file

@ -23,6 +23,9 @@ from llama_stack.apis.agents import (
Document,
ListAgentSessionsResponse,
ListAgentsResponse,
OpenAIResponseInputMessage,
OpenAIResponseInputTool,
OpenAIResponseObject,
Session,
Turn,
)
@ -40,6 +43,7 @@ from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_imp
from .agent_instance import ChatAgent
from .config import MetaReferenceAgentsImplConfig
from .openai_responses import OpenAIResponsesImpl
logger = logging.getLogger()
logger.setLevel(logging.INFO)
@ -63,9 +67,16 @@ class MetaReferenceAgentsImpl(Agents):
self.tool_groups_api = tool_groups_api
self.in_memory_store = InmemoryKVStoreImpl()
self.openai_responses_impl = None
async def initialize(self) -> None:
self.persistence_store = await kvstore_impl(self.config.persistence_store)
self.openai_responses_impl = OpenAIResponsesImpl(
self.persistence_store,
inference_api=self.inference_api,
tool_groups_api=self.tool_groups_api,
tool_runtime_api=self.tool_runtime_api,
)
# check if "bwrap" is available
if not shutil.which("bwrap"):
@ -244,3 +255,23 @@ class MetaReferenceAgentsImpl(Agents):
agent_id: str,
) -> ListAgentSessionsResponse:
pass
# OpenAI responses
async def get_openai_response(
self,
id: str,
) -> OpenAIResponseObject:
return await self.openai_responses_impl.get_openai_response(id)
async def create_openai_response(
self,
input: Union[str, List[OpenAIResponseInputMessage]],
model: str,
previous_response_id: Optional[str] = None,
store: Optional[bool] = True,
stream: Optional[bool] = False,
tools: Optional[List[OpenAIResponseInputTool]] = None,
) -> OpenAIResponseObject:
return await self.openai_responses_impl.create_openai_response(
input, model, previous_response_id, store, stream, tools
)

View file

@ -0,0 +1,319 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
import uuid
from typing import AsyncIterator, List, Optional, Union, cast
from openai.types.chat import ChatCompletionToolParam
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInputMessage,
OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool,
OpenAIResponseObject,
OpenAIResponseObjectStream,
OpenAIResponseObjectStreamResponseCompleted,
OpenAIResponseObjectStreamResponseCreated,
OpenAIResponseOutput,
OpenAIResponseOutputMessage,
OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageWebSearchToolCall,
)
from llama_stack.apis.inference.inference import (
Inference,
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartParam,
OpenAIChatCompletionContentPartTextParam,
OpenAIChatCompletionToolCallFunction,
OpenAIChoice,
OpenAIImageURL,
OpenAIMessageParam,
OpenAIToolMessageParam,
OpenAIUserMessageParam,
)
from llama_stack.apis.tools.tools import ToolGroups, ToolInvocationResult, ToolRuntime
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
from llama_stack.providers.utils.kvstore import KVStore
logger = get_logger(name=__name__, category="openai_responses")
OPENAI_RESPONSES_PREFIX = "openai_responses:"
async def _previous_response_to_messages(previous_response: OpenAIResponseObject) -> List[OpenAIMessageParam]:
messages: List[OpenAIMessageParam] = []
for output_message in previous_response.output:
if isinstance(output_message, OpenAIResponseOutputMessage):
messages.append(OpenAIAssistantMessageParam(content=output_message.content[0].text))
return messages
async def _openai_choices_to_output_messages(choices: List[OpenAIChoice]) -> List[OpenAIResponseOutputMessage]:
output_messages = []
for choice in choices:
output_content = ""
if isinstance(choice.message.content, str):
output_content = choice.message.content
elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam):
output_content = choice.message.content.text
# TODO: handle image content
output_messages.append(
OpenAIResponseOutputMessage(
id=f"msg_{uuid.uuid4()}",
content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
status="completed",
)
)
return output_messages
class OpenAIResponsesImpl:
def __init__(
self,
persistence_store: KVStore,
inference_api: Inference,
tool_groups_api: ToolGroups,
tool_runtime_api: ToolRuntime,
):
self.persistence_store = persistence_store
self.inference_api = inference_api
self.tool_groups_api = tool_groups_api
self.tool_runtime_api = tool_runtime_api
async def get_openai_response(
self,
id: str,
) -> OpenAIResponseObject:
key = f"{OPENAI_RESPONSES_PREFIX}{id}"
response_json = await self.persistence_store.get(key=key)
if response_json is None:
raise ValueError(f"OpenAI response with id '{id}' not found")
return OpenAIResponseObject.model_validate_json(response_json)
async def create_openai_response(
self,
input: Union[str, List[OpenAIResponseInputMessage]],
model: str,
previous_response_id: Optional[str] = None,
store: Optional[bool] = True,
stream: Optional[bool] = False,
tools: Optional[List[OpenAIResponseInputTool]] = None,
):
stream = False if stream is None else stream
messages: List[OpenAIMessageParam] = []
if previous_response_id:
previous_response = await self.get_openai_response(previous_response_id)
messages.extend(await _previous_response_to_messages(previous_response))
# TODO: refactor this user_content parsing out into a separate method
user_content: Union[str, List[OpenAIChatCompletionContentPartParam]] = ""
if isinstance(input, list):
user_content = []
for user_input in input:
if isinstance(user_input.content, list):
for user_input_content in user_input.content:
if isinstance(user_input_content, OpenAIResponseInputMessageContentText):
user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input_content.text))
elif isinstance(user_input_content, OpenAIResponseInputMessageContentImage):
if user_input_content.image_url:
image_url = OpenAIImageURL(
url=user_input_content.image_url, detail=user_input_content.detail
)
user_content.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
else:
user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input.content))
else:
user_content = input
messages.append(OpenAIUserMessageParam(content=user_content))
chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None
chat_response = await self.inference_api.openai_chat_completion(
model=model,
messages=messages,
tools=chat_tools,
stream=stream,
)
if stream:
# TODO: refactor this into a separate method that handles streaming
chat_response_id = ""
chat_response_content = []
# TODO: these chunk_ fields are hacky and only take the last chunk into account
chunk_created = 0
chunk_model = ""
chunk_finish_reason = ""
async for chunk in chat_response:
chat_response_id = chunk.id
chunk_created = chunk.created
chunk_model = chunk.model
for chunk_choice in chunk.choices:
# TODO: this only works for text content
chat_response_content.append(chunk_choice.delta.content or "")
if chunk_choice.finish_reason:
chunk_finish_reason = chunk_choice.finish_reason
assistant_message = OpenAIAssistantMessageParam(content="".join(chat_response_content))
chat_response = OpenAIChatCompletion(
id=chat_response_id,
choices=[
OpenAIChoice(
message=assistant_message,
finish_reason=chunk_finish_reason,
index=0,
)
],
created=chunk_created,
model=chunk_model,
)
else:
# dump and reload to map to our pydantic types
chat_response = OpenAIChatCompletion(**chat_response.model_dump())
output_messages: List[OpenAIResponseOutput] = []
if chat_response.choices[0].message.tool_calls:
output_messages.extend(
await self._execute_tool_and_return_final_output(model, stream, chat_response, messages)
)
else:
output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices))
response = OpenAIResponseObject(
created_at=chat_response.created,
id=f"resp-{uuid.uuid4()}",
model=model,
object="response",
status="completed",
output=output_messages,
)
if store:
# Store in kvstore
key = f"{OPENAI_RESPONSES_PREFIX}{response.id}"
await self.persistence_store.set(
key=key,
value=response.model_dump_json(),
)
if stream:
async def async_response() -> AsyncIterator[OpenAIResponseObjectStream]:
# TODO: response created should actually get emitted much earlier in the process
yield OpenAIResponseObjectStreamResponseCreated(response=response)
yield OpenAIResponseObjectStreamResponseCompleted(response=response)
return async_response()
return response
async def _convert_response_tools_to_chat_tools(
self, tools: List[OpenAIResponseInputTool]
) -> List[ChatCompletionToolParam]:
chat_tools: List[ChatCompletionToolParam] = []
for input_tool in tools:
# TODO: Handle other tool types
if input_tool.type == "web_search":
tool_name = "web_search"
tool = await self.tool_groups_api.get_tool(tool_name)
tool_def = ToolDefinition(
tool_name=tool_name,
description=tool.description,
parameters={
param.name: ToolParamDefinition(
param_type=param.parameter_type,
description=param.description,
required=param.required,
default=param.default,
)
for param in tool.parameters
},
)
chat_tool = convert_tooldef_to_openai_tool(tool_def)
chat_tools.append(chat_tool)
else:
raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}")
return chat_tools
async def _execute_tool_and_return_final_output(
self, model_id: str, stream: bool, chat_response: OpenAIChatCompletion, messages: List[OpenAIMessageParam]
) -> List[OpenAIResponseOutput]:
output_messages: List[OpenAIResponseOutput] = []
choice = chat_response.choices[0]
# If the choice is not an assistant message, we don't need to execute any tools
if not isinstance(choice.message, OpenAIAssistantMessageParam):
return output_messages
# If the assistant message doesn't have any tool calls, we don't need to execute any tools
if not choice.message.tool_calls:
return output_messages
# Add the assistant message with tool_calls response to the messages list
messages.append(choice.message)
for tool_call in choice.message.tool_calls:
tool_call_id = tool_call.id
function = tool_call.function
# If for some reason the tool call doesn't have a function or id, we can't execute it
if not function or not tool_call_id:
continue
# TODO: telemetry spans for tool calls
result = await self._execute_tool_call(function)
# Handle tool call failure
if not result:
output_messages.append(
OpenAIResponseOutputMessageWebSearchToolCall(
id=tool_call_id,
status="failed",
)
)
continue
output_messages.append(
OpenAIResponseOutputMessageWebSearchToolCall(
id=tool_call_id,
status="completed",
),
)
result_content = ""
# TODO: handle other result content types and lists
if isinstance(result.content, str):
result_content = result.content
messages.append(OpenAIToolMessageParam(content=result_content, tool_call_id=tool_call_id))
tool_results_chat_response = await self.inference_api.openai_chat_completion(
model=model_id,
messages=messages,
stream=stream,
)
# type cast to appease mypy
tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response)
tool_final_outputs = await _openai_choices_to_output_messages(tool_results_chat_response.choices)
# TODO: Wire in annotations with URLs, titles, etc to these output messages
output_messages.extend(tool_final_outputs)
return output_messages
async def _execute_tool_call(
self,
function: OpenAIChatCompletionToolCallFunction,
) -> Optional[ToolInvocationResult]:
if not function.name:
return None
function_args = json.loads(function.arguments) if function.arguments else {}
logger.info(f"executing tool call: {function.name} with args: {function_args}")
result = await self.tool_runtime_api.invoke_tool(
tool_name=function.name,
kwargs=function_args,
)
logger.debug(f"tool call {function.name} completed with result: {result}")
return result

View file

@ -478,6 +478,8 @@ class JsonSchemaGenerator:
}
return ret
elif origin_type is Literal:
if len(typing.get_args(typ)) != 1:
raise ValueError(f"Literal type {typ} has {len(typing.get_args(typ))} arguments")
(literal_value,) = typing.get_args(typ) # unpack value of literal type
schema = self.type_to_schema(type(literal_value))
schema["const"] = literal_value

View file

@ -14,6 +14,7 @@ from pathlib import Path
import pytest
import yaml
from llama_stack_client import LlamaStackClient
from openai import OpenAI
from llama_stack import LlamaStackAsLibraryClient
from llama_stack.apis.datatypes import Api
@ -207,3 +208,9 @@ def llama_stack_client(request, provider_data, text_model_id):
raise RuntimeError("Initialization failed")
return client
@pytest.fixture(scope="session")
def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1"
return OpenAI(base_url=base_url, api_key="fake")

View file

@ -0,0 +1,37 @@
{
"non_streaming_01": {
"data": {
"question": "Which planet do humans live on?",
"expected": "Earth"
}
},
"non_streaming_02": {
"data": {
"question": "Which planet has rings around it with a name starting with letter S?",
"expected": "Saturn"
}
},
"streaming_01": {
"data": {
"question": "What's the name of the Sun in latin?",
"expected": "Sol"
}
},
"streaming_02": {
"data": {
"question": "What is the name of the US captial?",
"expected": "Washington"
}
},
"tools_web_search_01": {
"data": {
"input": "How many experts does the Llama 4 Maverick model have?",
"tools": [
{
"type": "web_search"
}
],
"expected": "128"
}
}
}

View file

@ -12,6 +12,7 @@ class TestCase:
_apis = [
"inference/chat_completion",
"inference/completion",
"openai/responses",
]
_jsonblob = {}

View file

@ -13,3 +13,5 @@ test_exclusions:
- test_chat_non_streaming_image
- test_chat_streaming_image
- test_chat_multi_turn_multiple_images
- test_response_non_streaming_image
- test_response_non_streaming_multi_turn_image

View file

@ -13,3 +13,5 @@ test_exclusions:
- test_chat_non_streaming_image
- test_chat_streaming_image
- test_chat_multi_turn_multiple_images
- test_response_non_streaming_image
- test_response_non_streaming_multi_turn_image

View file

@ -13,3 +13,5 @@ test_exclusions:
- test_chat_non_streaming_image
- test_chat_streaming_image
- test_chat_multi_turn_multiple_images
- test_response_non_streaming_image
- test_response_non_streaming_multi_turn_image

View file

@ -16,7 +16,7 @@ Description:
Configuration:
- Provider details (models, display names) are loaded from `tests/verifications/config.yaml`.
- Provider details (models, display names) are loaded from `tests/verifications/conf/*.yaml`.
- Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`.
- Test results are stored in `tests/verifications/test_results/`.

View file

@ -1,10 +1,15 @@
# This is a temporary run file because model names used by the verification tests
# are not quite consistent with various pre-existing distributions.
#
version: '2'
image_name: openai-api-verification
apis:
- agents
- inference
- telemetry
- tool_runtime
- vector_io
- safety
providers:
inference:
- provider_id: together
@ -16,12 +21,12 @@ providers:
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY}
api_key: ${env.FIREWORKS_API_KEY:}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY}
api_key: ${env.GROQ_API_KEY:}
- provider_id: openai
provider_type: remote::openai
config:
@ -45,6 +50,19 @@ providers:
service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/openai/trace_store.db}
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/agents_store.db
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search

View file

@ -0,0 +1,35 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
def pytest_generate_tests(metafunc):
"""Dynamically parametrize tests based on the selected provider and config."""
if "model" in metafunc.fixturenames:
provider = metafunc.config.getoption("provider")
if not provider:
print("Warning: --provider not specified. Skipping model parametrization.")
metafunc.parametrize("model", [])
return
try:
config_data = _load_all_verification_configs()
except (FileNotFoundError, IOError) as e:
print(f"ERROR loading verification configs: {e}")
config_data = {"providers": {}}
provider_config = config_data.get("providers", {}).get(provider)
if provider_config:
models = provider_config.get("models", [])
if models:
metafunc.parametrize("model", models)
else:
print(f"Warning: No models found for provider '{provider}' in config.")
metafunc.parametrize("model", []) # Parametrize empty if no models found
else:
print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
metafunc.parametrize("model", []) # Parametrize empty if provider not found

View file

@ -5,14 +5,16 @@
# the root directory of this source tree.
import os
import re
from pathlib import Path
import pytest
import yaml
from openai import OpenAI
# --- Helper Functions ---
# --- Helper Function to Load Config ---
def _load_all_verification_configs():
"""Load and aggregate verification configs from the conf/ directory."""
# Note: Path is relative to *this* file (fixtures.py)
@ -44,7 +46,30 @@ def _load_all_verification_configs():
return {"providers": all_provider_configs}
# --- End Helper Function ---
def case_id_generator(case):
"""Generate a test ID from the case's 'case_id' field, or use a default."""
case_id = case.get("case_id")
if isinstance(case_id, (str, int)):
return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
return None
def should_skip_test(verification_config, provider, model, test_name_base):
"""Check if a test should be skipped based on config exclusions."""
provider_config = verification_config.get("providers", {}).get(provider)
if not provider_config:
return False # No config for provider, don't skip
exclusions = provider_config.get("test_exclusions", {}).get(model, [])
return test_name_base in exclusions
# Helper to get the base test name from the request object
def get_base_test_name(request):
return request.node.originalname
# --- End Helper Functions ---
@pytest.fixture(scope="session")

View file

@ -0,0 +1,65 @@
test_response_basic:
test_name: test_response_basic
test_params:
case:
- case_id: "earth"
input: "Which planet do humans live on?"
output: "earth"
- case_id: "saturn"
input: "Which planet has rings around it with a name starting with letter S?"
output: "saturn"
test_response_multi_turn:
test_name: test_response_multi_turn
test_params:
case:
- case_id: "earth"
turns:
- input: "Which planet do humans live on?"
output: "earth"
- input: "What is the name of the planet from your previous response?"
output: "earth"
test_response_web_search:
test_name: test_response_web_search
test_params:
case:
- case_id: "llama_experts"
input: "How many experts does the Llama 4 Maverick model have?"
tools:
- type: web_search
search_context_size: "low"
output: "128"
test_response_image:
test_name: test_response_image
test_params:
case:
- case_id: "llama_image"
input:
- role: user
content:
- type: input_text
text: "Identify the type of animal in this image."
- type: input_image
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
output: "llama"
test_response_multi_turn_image:
test_name: test_response_multi_turn_image
test_params:
case:
- case_id: "llama_image_search"
turns:
- input:
- role: user
content:
- type: input_text
text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
- type: input_image
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
output: "llama"
- input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick' and 'scout'."
tools:
- type: web_search
output: "model"

View file

@ -7,7 +7,6 @@
import base64
import copy
import json
import re
from pathlib import Path
from typing import Any
@ -16,7 +15,9 @@ from openai import APIError
from pydantic import BaseModel
from tests.verifications.openai_api.fixtures.fixtures import (
_load_all_verification_configs,
case_id_generator,
get_base_test_name,
should_skip_test,
)
from tests.verifications.openai_api.fixtures.load import load_test_cases
@ -25,57 +26,6 @@ chat_completion_test_cases = load_test_cases("chat_completion")
THIS_DIR = Path(__file__).parent
def case_id_generator(case):
"""Generate a test ID from the case's 'case_id' field, or use a default."""
case_id = case.get("case_id")
if isinstance(case_id, (str, int)):
return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
return None
def pytest_generate_tests(metafunc):
"""Dynamically parametrize tests based on the selected provider and config."""
if "model" in metafunc.fixturenames:
provider = metafunc.config.getoption("provider")
if not provider:
print("Warning: --provider not specified. Skipping model parametrization.")
metafunc.parametrize("model", [])
return
try:
config_data = _load_all_verification_configs()
except (FileNotFoundError, IOError) as e:
print(f"ERROR loading verification configs: {e}")
config_data = {"providers": {}}
provider_config = config_data.get("providers", {}).get(provider)
if provider_config:
models = provider_config.get("models", [])
if models:
metafunc.parametrize("model", models)
else:
print(f"Warning: No models found for provider '{provider}' in config.")
metafunc.parametrize("model", []) # Parametrize empty if no models found
else:
print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
metafunc.parametrize("model", []) # Parametrize empty if provider not found
def should_skip_test(verification_config, provider, model, test_name_base):
"""Check if a test should be skipped based on config exclusions."""
provider_config = verification_config.get("providers", {}).get(provider)
if not provider_config:
return False # No config for provider, don't skip
exclusions = provider_config.get("test_exclusions", {}).get(model, [])
return test_name_base in exclusions
# Helper to get the base test name from the request object
def get_base_test_name(request):
return request.node.originalname
@pytest.fixture
def multi_image_data():
files = [

View file

@ -0,0 +1,166 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
from tests.verifications.openai_api.fixtures.fixtures import (
case_id_generator,
get_base_test_name,
should_skip_test,
)
from tests.verifications.openai_api.fixtures.load import load_test_cases
responses_test_cases = load_test_cases("responses")
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_basic"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_basic(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
response = openai_client.responses.create(
model=model,
input=case["input"],
stream=False,
)
output_text = response.output_text.lower().strip()
assert len(output_text) > 0
assert case["output"].lower() in output_text
retrieved_response = openai_client.responses.retrieve(response_id=response.id)
assert retrieved_response.output_text == response.output_text
next_response = openai_client.responses.create(
model=model, input="Repeat your previous response in all caps.", previous_response_id=response.id
)
next_output_text = next_response.output_text.strip()
assert case["output"].upper() in next_output_text
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_basic"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_streaming_basic(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
response = openai_client.responses.create(
model=model,
input=case["input"],
stream=True,
)
streamed_content = []
response_id = ""
for chunk in response:
if chunk.type == "response.completed":
response_id = chunk.response.id
streamed_content.append(chunk.response.output_text.strip())
assert len(streamed_content) > 0
assert case["output"].lower() in "".join(streamed_content).lower()
retrieved_response = openai_client.responses.retrieve(response_id=response_id)
assert retrieved_response.output_text == "".join(streamed_content)
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_multi_turn"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_multi_turn(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
previous_response_id = None
for turn in case["turns"]:
response = openai_client.responses.create(
model=model,
input=turn["input"],
previous_response_id=previous_response_id,
tools=turn["tools"] if "tools" in turn else None,
)
previous_response_id = response.id
output_text = response.output_text.lower()
assert turn["output"].lower() in output_text
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_web_search"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_web_search(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
response = openai_client.responses.create(
model=model,
input=case["input"],
tools=case["tools"],
stream=False,
)
assert len(response.output) > 1
assert response.output[0].type == "web_search_call"
assert response.output[0].status == "completed"
assert response.output[1].type == "message"
assert response.output[1].status == "completed"
assert response.output[1].role == "assistant"
assert len(response.output[1].content) > 0
assert case["output"].lower() in response.output_text.lower().strip()
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_image"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_image(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
response = openai_client.responses.create(
model=model,
input=case["input"],
stream=False,
)
output_text = response.output_text.lower()
assert case["output"].lower() in output_text
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_multi_turn_image"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_multi_turn_image(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
previous_response_id = None
for turn in case["turns"]:
response = openai_client.responses.create(
model=model,
input=turn["input"],
previous_response_id=previous_response_id,
tools=turn["tools"] if "tools" in turn else None,
)
previous_response_id = response.id
output_text = response.output_text.lower()
assert turn["output"].lower() in output_text