feat: OpenAI Responses API (#1989)

# What does this PR do?

This provides an initial [OpenAI Responses
API](https://platform.openai.com/docs/api-reference/responses)
implementation. The API is not yet complete, and this is more a
proof-of-concept to show how we can store responses in our key-value
stores and use them to support the Responses API concepts like
`previous_response_id`.

## Test Plan

I've added a new
`tests/integration/openai_responses/test_openai_responses.py` as part of
a test-driven development for this new API. I'm only testing this
locally with the remote-vllm provider for now, but it should work with
any of our inference providers since the only API it requires out of the
inference provider is the `openai_chat_completion` endpoint.

```
VLLM_URL="http://localhost:8000/v1" \
INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" \
llama stack build --template remote-vllm --image-type venv --run
```

```
LLAMA_STACK_CONFIG="http://localhost:8321" \
python -m pytest -v \
  tests/integration/openai_responses/test_openai_responses.py \
  --text-model "meta-llama/Llama-3.2-3B-Instruct"
 ```

---------

Signed-off-by: Ben Browning <bbrownin@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
Ben Browning 2025-04-28 17:06:00 -04:00 committed by GitHub
parent 79851d93aa
commit 8dfce2f596
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1766 additions and 59 deletions

View file

@ -497,6 +497,54 @@
} }
} }
}, },
"/v1/openai/v1/responses": {
"post": {
"responses": {
"200": {
"description": "Runtime representation of an annotated type.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenAIResponseObject"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/OpenAIResponseObjectStream"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "Create a new OpenAI response.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateOpenaiResponseRequest"
}
}
},
"required": true
}
}
},
"/v1/files": { "/v1/files": {
"get": { "get": {
"responses": { "responses": {
@ -1278,6 +1326,49 @@
] ]
} }
}, },
"/v1/openai/v1/responses/{id}": {
"get": {
"responses": {
"200": {
"description": "An OpenAIResponseObject.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenAIResponseObject"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "Retrieve an OpenAI response by its ID.",
"parameters": [
{
"name": "id",
"in": "path",
"description": "The ID of the OpenAI response to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/scoring-functions/{scoring_fn_id}": { "/v1/scoring-functions/{scoring_fn_id}": {
"get": { "get": {
"responses": { "responses": {
@ -6192,6 +6283,427 @@
], ],
"title": "AgentTurnResponseTurnStartPayload" "title": "AgentTurnResponseTurnStartPayload"
}, },
"OpenAIResponseInputMessage": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
}
}
]
},
"role": {
"oneOf": [
{
"type": "string",
"const": "system"
},
{
"type": "string",
"const": "developer"
},
{
"type": "string",
"const": "user"
},
{
"type": "string",
"const": "assistant"
}
]
},
"type": {
"type": "string",
"const": "message",
"default": "message"
}
},
"additionalProperties": false,
"required": [
"content",
"role"
],
"title": "OpenAIResponseInputMessage"
},
"OpenAIResponseInputMessageContent": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
}
}
},
"OpenAIResponseInputMessageContentImage": {
"type": "object",
"properties": {
"detail": {
"oneOf": [
{
"type": "string",
"const": "low"
},
{
"type": "string",
"const": "high"
},
{
"type": "string",
"const": "auto"
}
],
"default": "auto"
},
"type": {
"type": "string",
"const": "input_image",
"default": "input_image"
},
"image_url": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"detail",
"type"
],
"title": "OpenAIResponseInputMessageContentImage"
},
"OpenAIResponseInputMessageContentText": {
"type": "object",
"properties": {
"text": {
"type": "string"
},
"type": {
"type": "string",
"const": "input_text",
"default": "input_text"
}
},
"additionalProperties": false,
"required": [
"text",
"type"
],
"title": "OpenAIResponseInputMessageContentText"
},
"OpenAIResponseInputTool": {
"type": "object",
"properties": {
"type": {
"oneOf": [
{
"type": "string",
"const": "web_search"
},
{
"type": "string",
"const": "web_search_preview_2025_03_11"
}
],
"default": "web_search"
},
"search_context_size": {
"type": "string",
"default": "medium"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "OpenAIResponseInputToolWebSearch"
},
"CreateOpenaiResponseRequest": {
"type": "object",
"properties": {
"input": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputMessage"
}
}
],
"description": "Input message(s) to create the response."
},
"model": {
"type": "string",
"description": "The underlying LLM used for completions."
},
"previous_response_id": {
"type": "string",
"description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
},
"store": {
"type": "boolean"
},
"stream": {
"type": "boolean"
},
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputTool"
}
}
},
"additionalProperties": false,
"required": [
"input",
"model"
],
"title": "CreateOpenaiResponseRequest"
},
"OpenAIResponseError": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"message": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"code",
"message"
],
"title": "OpenAIResponseError"
},
"OpenAIResponseObject": {
"type": "object",
"properties": {
"created_at": {
"type": "integer"
},
"error": {
"$ref": "#/components/schemas/OpenAIResponseError"
},
"id": {
"type": "string"
},
"model": {
"type": "string"
},
"object": {
"type": "string",
"const": "response",
"default": "response"
},
"output": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseOutput"
}
},
"parallel_tool_calls": {
"type": "boolean",
"default": false
},
"previous_response_id": {
"type": "string"
},
"status": {
"type": "string"
},
"temperature": {
"type": "number"
},
"top_p": {
"type": "number"
},
"truncation": {
"type": "string"
},
"user": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"created_at",
"id",
"model",
"object",
"output",
"parallel_tool_calls",
"status"
],
"title": "OpenAIResponseObject"
},
"OpenAIResponseOutput": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessage"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"message": "#/components/schemas/OpenAIResponseOutputMessage",
"web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
}
}
},
"OpenAIResponseOutputMessage": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"content": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
}
},
"role": {
"type": "string",
"const": "assistant",
"default": "assistant"
},
"status": {
"type": "string"
},
"type": {
"type": "string",
"const": "message",
"default": "message"
}
},
"additionalProperties": false,
"required": [
"id",
"content",
"role",
"status",
"type"
],
"title": "OpenAIResponseOutputMessage"
},
"OpenAIResponseOutputMessageContent": {
"type": "object",
"properties": {
"text": {
"type": "string"
},
"type": {
"type": "string",
"const": "output_text",
"default": "output_text"
}
},
"additionalProperties": false,
"required": [
"text",
"type"
],
"title": "OpenAIResponseOutputMessageContentOutputText"
},
"OpenAIResponseOutputMessageWebSearchToolCall": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"status": {
"type": "string"
},
"type": {
"type": "string",
"const": "web_search_call",
"default": "web_search_call"
}
},
"additionalProperties": false,
"required": [
"id",
"status",
"type"
],
"title": "OpenAIResponseOutputMessageWebSearchToolCall"
},
"OpenAIResponseObjectStream": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
},
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
"response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
}
},
"OpenAIResponseObjectStreamResponseCompleted": {
"type": "object",
"properties": {
"response": {
"$ref": "#/components/schemas/OpenAIResponseObject"
},
"type": {
"type": "string",
"const": "response.completed",
"default": "response.completed"
}
},
"additionalProperties": false,
"required": [
"response",
"type"
],
"title": "OpenAIResponseObjectStreamResponseCompleted"
},
"OpenAIResponseObjectStreamResponseCreated": {
"type": "object",
"properties": {
"response": {
"$ref": "#/components/schemas/OpenAIResponseObject"
},
"type": {
"type": "string",
"const": "response.created",
"default": "response.created"
}
},
"additionalProperties": false,
"required": [
"response",
"type"
],
"title": "OpenAIResponseObjectStreamResponseCreated"
},
"CreateUploadSessionRequest": { "CreateUploadSessionRequest": {
"type": "object", "type": "object",
"properties": { "properties": {

View file

@ -330,6 +330,39 @@ paths:
schema: schema:
$ref: '#/components/schemas/CreateAgentTurnRequest' $ref: '#/components/schemas/CreateAgentTurnRequest'
required: true required: true
/v1/openai/v1/responses:
post:
responses:
'200':
description: >-
Runtime representation of an annotated type.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIResponseObject'
text/event-stream:
schema:
$ref: '#/components/schemas/OpenAIResponseObjectStream'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: Create a new OpenAI response.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
required: true
/v1/files: /v1/files:
get: get:
responses: responses:
@ -875,6 +908,36 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1/openai/v1/responses/{id}:
get:
responses:
'200':
description: An OpenAIResponseObject.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIResponseObject'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: Retrieve an OpenAI response by its ID.
parameters:
- name: id
in: path
description: >-
The ID of the OpenAI response to retrieve.
required: true
schema:
type: string
/v1/scoring-functions/{scoring_fn_id}: /v1/scoring-functions/{scoring_fn_id}:
get: get:
responses: responses:
@ -4329,6 +4392,293 @@ components:
- event_type - event_type
- turn_id - turn_id
title: AgentTurnResponseTurnStartPayload title: AgentTurnResponseTurnStartPayload
OpenAIResponseInputMessage:
type: object
properties:
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
role:
oneOf:
- type: string
const: system
- type: string
const: developer
- type: string
const: user
- type: string
const: assistant
type:
type: string
const: message
default: message
additionalProperties: false
required:
- content
- role
title: OpenAIResponseInputMessage
OpenAIResponseInputMessageContent:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
discriminator:
propertyName: type
mapping:
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
OpenAIResponseInputMessageContentImage:
type: object
properties:
detail:
oneOf:
- type: string
const: low
- type: string
const: high
- type: string
const: auto
default: auto
type:
type: string
const: input_image
default: input_image
image_url:
type: string
additionalProperties: false
required:
- detail
- type
title: OpenAIResponseInputMessageContentImage
OpenAIResponseInputMessageContentText:
type: object
properties:
text:
type: string
type:
type: string
const: input_text
default: input_text
additionalProperties: false
required:
- text
- type
title: OpenAIResponseInputMessageContentText
OpenAIResponseInputTool:
type: object
properties:
type:
oneOf:
- type: string
const: web_search
- type: string
const: web_search_preview_2025_03_11
default: web_search
search_context_size:
type: string
default: medium
additionalProperties: false
required:
- type
title: OpenAIResponseInputToolWebSearch
CreateOpenaiResponseRequest:
type: object
properties:
input:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputMessage'
description: Input message(s) to create the response.
model:
type: string
description: The underlying LLM used for completions.
previous_response_id:
type: string
description: >-
(Optional) if specified, the new response will be a continuation of the
previous response. This can be used to easily fork-off new responses from
existing responses.
store:
type: boolean
stream:
type: boolean
tools:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputTool'
additionalProperties: false
required:
- input
- model
title: CreateOpenaiResponseRequest
OpenAIResponseError:
type: object
properties:
code:
type: string
message:
type: string
additionalProperties: false
required:
- code
- message
title: OpenAIResponseError
OpenAIResponseObject:
type: object
properties:
created_at:
type: integer
error:
$ref: '#/components/schemas/OpenAIResponseError'
id:
type: string
model:
type: string
object:
type: string
const: response
default: response
output:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseOutput'
parallel_tool_calls:
type: boolean
default: false
previous_response_id:
type: string
status:
type: string
temperature:
type: number
top_p:
type: number
truncation:
type: string
user:
type: string
additionalProperties: false
required:
- created_at
- id
- model
- object
- output
- parallel_tool_calls
- status
title: OpenAIResponseObject
OpenAIResponseOutput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessage'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
discriminator:
propertyName: type
mapping:
message: '#/components/schemas/OpenAIResponseOutputMessage'
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
OpenAIResponseOutputMessage:
type: object
properties:
id:
type: string
content:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
role:
type: string
const: assistant
default: assistant
status:
type: string
type:
type: string
const: message
default: message
additionalProperties: false
required:
- id
- content
- role
- status
- type
title: OpenAIResponseOutputMessage
OpenAIResponseOutputMessageContent:
type: object
properties:
text:
type: string
type:
type: string
const: output_text
default: output_text
additionalProperties: false
required:
- text
- type
title: >-
OpenAIResponseOutputMessageContentOutputText
"OpenAIResponseOutputMessageWebSearchToolCall":
type: object
properties:
id:
type: string
status:
type: string
type:
type: string
const: web_search_call
default: web_search_call
additionalProperties: false
required:
- id
- status
- type
title: >-
OpenAIResponseOutputMessageWebSearchToolCall
OpenAIResponseObjectStream:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
discriminator:
propertyName: type
mapping:
response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
"OpenAIResponseObjectStreamResponseCompleted":
type: object
properties:
response:
$ref: '#/components/schemas/OpenAIResponseObject'
type:
type: string
const: response.completed
default: response.completed
additionalProperties: false
required:
- response
- type
title: >-
OpenAIResponseObjectStreamResponseCompleted
"OpenAIResponseObjectStreamResponseCreated":
type: object
properties:
response:
$ref: '#/components/schemas/OpenAIResponseObject'
type:
type: string
const: response.created
default: response.created
additionalProperties: false
required:
- response
- type
title: >-
OpenAIResponseObjectStreamResponseCreated
CreateUploadSessionRequest: CreateUploadSessionRequest:
type: object type: object
properties: properties:

View file

@ -179,7 +179,7 @@ class ContentBuilder:
"Creates the content subtree for a request or response." "Creates the content subtree for a request or response."
def is_iterator_type(t): def is_iterator_type(t):
return "StreamChunk" in str(t) return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t)
def get_media_type(t): def get_media_type(t):
if is_generic_list(t): if is_generic_list(t):

View file

@ -38,6 +38,13 @@ from llama_stack.apis.safety import SafetyViolation
from llama_stack.apis.tools import ToolDef from llama_stack.apis.tools import ToolDef
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from .openai_responses import (
OpenAIResponseInputMessage,
OpenAIResponseInputTool,
OpenAIResponseObject,
OpenAIResponseObjectStream,
)
class Attachment(BaseModel): class Attachment(BaseModel):
"""An attachment to an agent turn. """An attachment to an agent turn.
@ -593,3 +600,39 @@ class Agents(Protocol):
:returns: A ListAgentSessionsResponse. :returns: A ListAgentSessionsResponse.
""" """
... ...
# We situate the OpenAI Responses API in the Agents API just like we did things
# for Inference. The Responses API, in its intent, serves the same purpose as
# the Agents API above -- it is essentially a lightweight "agentic loop" with
# integrated tool calling.
#
# Both of these APIs are inherently stateful.
@webmethod(route="/openai/v1/responses/{id}", method="GET")
async def get_openai_response(
self,
id: str,
) -> OpenAIResponseObject:
"""Retrieve an OpenAI response by its ID.
:param id: The ID of the OpenAI response to retrieve.
:returns: An OpenAIResponseObject.
"""
...
@webmethod(route="/openai/v1/responses", method="POST")
async def create_openai_response(
self,
input: Union[str, List[OpenAIResponseInputMessage]],
model: str,
previous_response_id: Optional[str] = None,
store: Optional[bool] = True,
stream: Optional[bool] = False,
tools: Optional[List[OpenAIResponseInputTool]] = None,
) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]:
"""Create a new OpenAI response.
:param input: Input message(s) to create the response.
:param model: The underlying LLM used for completions.
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
"""

View file

@ -0,0 +1,140 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import List, Literal, Optional, Union
from pydantic import BaseModel, Field
from typing_extensions import Annotated
from llama_stack.schema_utils import json_schema_type, register_schema
@json_schema_type
class OpenAIResponseError(BaseModel):
code: str
message: str
@json_schema_type
class OpenAIResponseOutputMessageContentOutputText(BaseModel):
text: str
type: Literal["output_text"] = "output_text"
OpenAIResponseOutputMessageContent = Annotated[
Union[OpenAIResponseOutputMessageContentOutputText,],
Field(discriminator="type"),
]
register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMessageContent")
@json_schema_type
class OpenAIResponseOutputMessage(BaseModel):
id: str
content: List[OpenAIResponseOutputMessageContent]
role: Literal["assistant"] = "assistant"
status: str
type: Literal["message"] = "message"
@json_schema_type
class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
id: str
status: str
type: Literal["web_search_call"] = "web_search_call"
OpenAIResponseOutput = Annotated[
Union[
OpenAIResponseOutputMessage,
OpenAIResponseOutputMessageWebSearchToolCall,
],
Field(discriminator="type"),
]
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
@json_schema_type
class OpenAIResponseObject(BaseModel):
created_at: int
error: Optional[OpenAIResponseError] = None
id: str
model: str
object: Literal["response"] = "response"
output: List[OpenAIResponseOutput]
parallel_tool_calls: bool = False
previous_response_id: Optional[str] = None
status: str
temperature: Optional[float] = None
top_p: Optional[float] = None
truncation: Optional[str] = None
user: Optional[str] = None
@json_schema_type
class OpenAIResponseObjectStreamResponseCreated(BaseModel):
response: OpenAIResponseObject
type: Literal["response.created"] = "response.created"
@json_schema_type
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
response: OpenAIResponseObject
type: Literal["response.completed"] = "response.completed"
OpenAIResponseObjectStream = Annotated[
Union[
OpenAIResponseObjectStreamResponseCreated,
OpenAIResponseObjectStreamResponseCompleted,
],
Field(discriminator="type"),
]
register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
@json_schema_type
class OpenAIResponseInputMessageContentText(BaseModel):
text: str
type: Literal["input_text"] = "input_text"
@json_schema_type
class OpenAIResponseInputMessageContentImage(BaseModel):
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
type: Literal["input_image"] = "input_image"
# TODO: handle file_id
image_url: Optional[str] = None
# TODO: handle file content types
OpenAIResponseInputMessageContent = Annotated[
Union[OpenAIResponseInputMessageContentText, OpenAIResponseInputMessageContentImage],
Field(discriminator="type"),
]
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
class OpenAIResponseInputMessage(BaseModel):
content: Union[str, List[OpenAIResponseInputMessageContent]]
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
type: Optional[Literal["message"]] = "message"
@json_schema_type
class OpenAIResponseInputToolWebSearch(BaseModel):
type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
# TODO: actually use search_context_size somewhere...
search_context_size: Optional[str] = Field(default="medium", pattern="^low|medium|high$")
# TODO: add user_location
OpenAIResponseInputTool = Annotated[
Union[OpenAIResponseInputToolWebSearch,],
Field(discriminator="type"),
]
register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")

View file

@ -23,6 +23,9 @@ from llama_stack.apis.agents import (
Document, Document,
ListAgentSessionsResponse, ListAgentSessionsResponse,
ListAgentsResponse, ListAgentsResponse,
OpenAIResponseInputMessage,
OpenAIResponseInputTool,
OpenAIResponseObject,
Session, Session,
Turn, Turn,
) )
@ -40,6 +43,7 @@ from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_imp
from .agent_instance import ChatAgent from .agent_instance import ChatAgent
from .config import MetaReferenceAgentsImplConfig from .config import MetaReferenceAgentsImplConfig
from .openai_responses import OpenAIResponsesImpl
logger = logging.getLogger() logger = logging.getLogger()
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
@ -63,9 +67,16 @@ class MetaReferenceAgentsImpl(Agents):
self.tool_groups_api = tool_groups_api self.tool_groups_api = tool_groups_api
self.in_memory_store = InmemoryKVStoreImpl() self.in_memory_store = InmemoryKVStoreImpl()
self.openai_responses_impl = None
async def initialize(self) -> None: async def initialize(self) -> None:
self.persistence_store = await kvstore_impl(self.config.persistence_store) self.persistence_store = await kvstore_impl(self.config.persistence_store)
self.openai_responses_impl = OpenAIResponsesImpl(
self.persistence_store,
inference_api=self.inference_api,
tool_groups_api=self.tool_groups_api,
tool_runtime_api=self.tool_runtime_api,
)
# check if "bwrap" is available # check if "bwrap" is available
if not shutil.which("bwrap"): if not shutil.which("bwrap"):
@ -244,3 +255,23 @@ class MetaReferenceAgentsImpl(Agents):
agent_id: str, agent_id: str,
) -> ListAgentSessionsResponse: ) -> ListAgentSessionsResponse:
pass pass
# OpenAI responses
async def get_openai_response(
self,
id: str,
) -> OpenAIResponseObject:
return await self.openai_responses_impl.get_openai_response(id)
async def create_openai_response(
self,
input: Union[str, List[OpenAIResponseInputMessage]],
model: str,
previous_response_id: Optional[str] = None,
store: Optional[bool] = True,
stream: Optional[bool] = False,
tools: Optional[List[OpenAIResponseInputTool]] = None,
) -> OpenAIResponseObject:
return await self.openai_responses_impl.create_openai_response(
input, model, previous_response_id, store, stream, tools
)

View file

@ -0,0 +1,319 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
import uuid
from typing import AsyncIterator, List, Optional, Union, cast
from openai.types.chat import ChatCompletionToolParam
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInputMessage,
OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool,
OpenAIResponseObject,
OpenAIResponseObjectStream,
OpenAIResponseObjectStreamResponseCompleted,
OpenAIResponseObjectStreamResponseCreated,
OpenAIResponseOutput,
OpenAIResponseOutputMessage,
OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageWebSearchToolCall,
)
from llama_stack.apis.inference.inference import (
Inference,
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartParam,
OpenAIChatCompletionContentPartTextParam,
OpenAIChatCompletionToolCallFunction,
OpenAIChoice,
OpenAIImageURL,
OpenAIMessageParam,
OpenAIToolMessageParam,
OpenAIUserMessageParam,
)
from llama_stack.apis.tools.tools import ToolGroups, ToolInvocationResult, ToolRuntime
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
from llama_stack.providers.utils.kvstore import KVStore
logger = get_logger(name=__name__, category="openai_responses")
OPENAI_RESPONSES_PREFIX = "openai_responses:"
async def _previous_response_to_messages(previous_response: OpenAIResponseObject) -> List[OpenAIMessageParam]:
messages: List[OpenAIMessageParam] = []
for output_message in previous_response.output:
if isinstance(output_message, OpenAIResponseOutputMessage):
messages.append(OpenAIAssistantMessageParam(content=output_message.content[0].text))
return messages
async def _openai_choices_to_output_messages(choices: List[OpenAIChoice]) -> List[OpenAIResponseOutputMessage]:
output_messages = []
for choice in choices:
output_content = ""
if isinstance(choice.message.content, str):
output_content = choice.message.content
elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam):
output_content = choice.message.content.text
# TODO: handle image content
output_messages.append(
OpenAIResponseOutputMessage(
id=f"msg_{uuid.uuid4()}",
content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
status="completed",
)
)
return output_messages
class OpenAIResponsesImpl:
def __init__(
self,
persistence_store: KVStore,
inference_api: Inference,
tool_groups_api: ToolGroups,
tool_runtime_api: ToolRuntime,
):
self.persistence_store = persistence_store
self.inference_api = inference_api
self.tool_groups_api = tool_groups_api
self.tool_runtime_api = tool_runtime_api
async def get_openai_response(
self,
id: str,
) -> OpenAIResponseObject:
key = f"{OPENAI_RESPONSES_PREFIX}{id}"
response_json = await self.persistence_store.get(key=key)
if response_json is None:
raise ValueError(f"OpenAI response with id '{id}' not found")
return OpenAIResponseObject.model_validate_json(response_json)
async def create_openai_response(
self,
input: Union[str, List[OpenAIResponseInputMessage]],
model: str,
previous_response_id: Optional[str] = None,
store: Optional[bool] = True,
stream: Optional[bool] = False,
tools: Optional[List[OpenAIResponseInputTool]] = None,
):
stream = False if stream is None else stream
messages: List[OpenAIMessageParam] = []
if previous_response_id:
previous_response = await self.get_openai_response(previous_response_id)
messages.extend(await _previous_response_to_messages(previous_response))
# TODO: refactor this user_content parsing out into a separate method
user_content: Union[str, List[OpenAIChatCompletionContentPartParam]] = ""
if isinstance(input, list):
user_content = []
for user_input in input:
if isinstance(user_input.content, list):
for user_input_content in user_input.content:
if isinstance(user_input_content, OpenAIResponseInputMessageContentText):
user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input_content.text))
elif isinstance(user_input_content, OpenAIResponseInputMessageContentImage):
if user_input_content.image_url:
image_url = OpenAIImageURL(
url=user_input_content.image_url, detail=user_input_content.detail
)
user_content.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
else:
user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input.content))
else:
user_content = input
messages.append(OpenAIUserMessageParam(content=user_content))
chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None
chat_response = await self.inference_api.openai_chat_completion(
model=model,
messages=messages,
tools=chat_tools,
stream=stream,
)
if stream:
# TODO: refactor this into a separate method that handles streaming
chat_response_id = ""
chat_response_content = []
# TODO: these chunk_ fields are hacky and only take the last chunk into account
chunk_created = 0
chunk_model = ""
chunk_finish_reason = ""
async for chunk in chat_response:
chat_response_id = chunk.id
chunk_created = chunk.created
chunk_model = chunk.model
for chunk_choice in chunk.choices:
# TODO: this only works for text content
chat_response_content.append(chunk_choice.delta.content or "")
if chunk_choice.finish_reason:
chunk_finish_reason = chunk_choice.finish_reason
assistant_message = OpenAIAssistantMessageParam(content="".join(chat_response_content))
chat_response = OpenAIChatCompletion(
id=chat_response_id,
choices=[
OpenAIChoice(
message=assistant_message,
finish_reason=chunk_finish_reason,
index=0,
)
],
created=chunk_created,
model=chunk_model,
)
else:
# dump and reload to map to our pydantic types
chat_response = OpenAIChatCompletion(**chat_response.model_dump())
output_messages: List[OpenAIResponseOutput] = []
if chat_response.choices[0].message.tool_calls:
output_messages.extend(
await self._execute_tool_and_return_final_output(model, stream, chat_response, messages)
)
else:
output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices))
response = OpenAIResponseObject(
created_at=chat_response.created,
id=f"resp-{uuid.uuid4()}",
model=model,
object="response",
status="completed",
output=output_messages,
)
if store:
# Store in kvstore
key = f"{OPENAI_RESPONSES_PREFIX}{response.id}"
await self.persistence_store.set(
key=key,
value=response.model_dump_json(),
)
if stream:
async def async_response() -> AsyncIterator[OpenAIResponseObjectStream]:
# TODO: response created should actually get emitted much earlier in the process
yield OpenAIResponseObjectStreamResponseCreated(response=response)
yield OpenAIResponseObjectStreamResponseCompleted(response=response)
return async_response()
return response
async def _convert_response_tools_to_chat_tools(
self, tools: List[OpenAIResponseInputTool]
) -> List[ChatCompletionToolParam]:
chat_tools: List[ChatCompletionToolParam] = []
for input_tool in tools:
# TODO: Handle other tool types
if input_tool.type == "web_search":
tool_name = "web_search"
tool = await self.tool_groups_api.get_tool(tool_name)
tool_def = ToolDefinition(
tool_name=tool_name,
description=tool.description,
parameters={
param.name: ToolParamDefinition(
param_type=param.parameter_type,
description=param.description,
required=param.required,
default=param.default,
)
for param in tool.parameters
},
)
chat_tool = convert_tooldef_to_openai_tool(tool_def)
chat_tools.append(chat_tool)
else:
raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}")
return chat_tools
async def _execute_tool_and_return_final_output(
self, model_id: str, stream: bool, chat_response: OpenAIChatCompletion, messages: List[OpenAIMessageParam]
) -> List[OpenAIResponseOutput]:
output_messages: List[OpenAIResponseOutput] = []
choice = chat_response.choices[0]
# If the choice is not an assistant message, we don't need to execute any tools
if not isinstance(choice.message, OpenAIAssistantMessageParam):
return output_messages
# If the assistant message doesn't have any tool calls, we don't need to execute any tools
if not choice.message.tool_calls:
return output_messages
# Add the assistant message with tool_calls response to the messages list
messages.append(choice.message)
for tool_call in choice.message.tool_calls:
tool_call_id = tool_call.id
function = tool_call.function
# If for some reason the tool call doesn't have a function or id, we can't execute it
if not function or not tool_call_id:
continue
# TODO: telemetry spans for tool calls
result = await self._execute_tool_call(function)
# Handle tool call failure
if not result:
output_messages.append(
OpenAIResponseOutputMessageWebSearchToolCall(
id=tool_call_id,
status="failed",
)
)
continue
output_messages.append(
OpenAIResponseOutputMessageWebSearchToolCall(
id=tool_call_id,
status="completed",
),
)
result_content = ""
# TODO: handle other result content types and lists
if isinstance(result.content, str):
result_content = result.content
messages.append(OpenAIToolMessageParam(content=result_content, tool_call_id=tool_call_id))
tool_results_chat_response = await self.inference_api.openai_chat_completion(
model=model_id,
messages=messages,
stream=stream,
)
# type cast to appease mypy
tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response)
tool_final_outputs = await _openai_choices_to_output_messages(tool_results_chat_response.choices)
# TODO: Wire in annotations with URLs, titles, etc to these output messages
output_messages.extend(tool_final_outputs)
return output_messages
async def _execute_tool_call(
self,
function: OpenAIChatCompletionToolCallFunction,
) -> Optional[ToolInvocationResult]:
if not function.name:
return None
function_args = json.loads(function.arguments) if function.arguments else {}
logger.info(f"executing tool call: {function.name} with args: {function_args}")
result = await self.tool_runtime_api.invoke_tool(
tool_name=function.name,
kwargs=function_args,
)
logger.debug(f"tool call {function.name} completed with result: {result}")
return result

View file

@ -478,6 +478,8 @@ class JsonSchemaGenerator:
} }
return ret return ret
elif origin_type is Literal: elif origin_type is Literal:
if len(typing.get_args(typ)) != 1:
raise ValueError(f"Literal type {typ} has {len(typing.get_args(typ))} arguments")
(literal_value,) = typing.get_args(typ) # unpack value of literal type (literal_value,) = typing.get_args(typ) # unpack value of literal type
schema = self.type_to_schema(type(literal_value)) schema = self.type_to_schema(type(literal_value))
schema["const"] = literal_value schema["const"] = literal_value

View file

@ -14,6 +14,7 @@ from pathlib import Path
import pytest import pytest
import yaml import yaml
from llama_stack_client import LlamaStackClient from llama_stack_client import LlamaStackClient
from openai import OpenAI
from llama_stack import LlamaStackAsLibraryClient from llama_stack import LlamaStackAsLibraryClient
from llama_stack.apis.datatypes import Api from llama_stack.apis.datatypes import Api
@ -207,3 +208,9 @@ def llama_stack_client(request, provider_data, text_model_id):
raise RuntimeError("Initialization failed") raise RuntimeError("Initialization failed")
return client return client
@pytest.fixture(scope="session")
def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1"
return OpenAI(base_url=base_url, api_key="fake")

View file

@ -0,0 +1,37 @@
{
"non_streaming_01": {
"data": {
"question": "Which planet do humans live on?",
"expected": "Earth"
}
},
"non_streaming_02": {
"data": {
"question": "Which planet has rings around it with a name starting with letter S?",
"expected": "Saturn"
}
},
"streaming_01": {
"data": {
"question": "What's the name of the Sun in latin?",
"expected": "Sol"
}
},
"streaming_02": {
"data": {
"question": "What is the name of the US captial?",
"expected": "Washington"
}
},
"tools_web_search_01": {
"data": {
"input": "How many experts does the Llama 4 Maverick model have?",
"tools": [
{
"type": "web_search"
}
],
"expected": "128"
}
}
}

View file

@ -12,6 +12,7 @@ class TestCase:
_apis = [ _apis = [
"inference/chat_completion", "inference/chat_completion",
"inference/completion", "inference/completion",
"openai/responses",
] ]
_jsonblob = {} _jsonblob = {}

View file

@ -13,3 +13,5 @@ test_exclusions:
- test_chat_non_streaming_image - test_chat_non_streaming_image
- test_chat_streaming_image - test_chat_streaming_image
- test_chat_multi_turn_multiple_images - test_chat_multi_turn_multiple_images
- test_response_non_streaming_image
- test_response_non_streaming_multi_turn_image

View file

@ -13,3 +13,5 @@ test_exclusions:
- test_chat_non_streaming_image - test_chat_non_streaming_image
- test_chat_streaming_image - test_chat_streaming_image
- test_chat_multi_turn_multiple_images - test_chat_multi_turn_multiple_images
- test_response_non_streaming_image
- test_response_non_streaming_multi_turn_image

View file

@ -13,3 +13,5 @@ test_exclusions:
- test_chat_non_streaming_image - test_chat_non_streaming_image
- test_chat_streaming_image - test_chat_streaming_image
- test_chat_multi_turn_multiple_images - test_chat_multi_turn_multiple_images
- test_response_non_streaming_image
- test_response_non_streaming_multi_turn_image

View file

@ -16,7 +16,7 @@ Description:
Configuration: Configuration:
- Provider details (models, display names) are loaded from `tests/verifications/config.yaml`. - Provider details (models, display names) are loaded from `tests/verifications/conf/*.yaml`.
- Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`. - Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`.
- Test results are stored in `tests/verifications/test_results/`. - Test results are stored in `tests/verifications/test_results/`.

View file

@ -1,10 +1,15 @@
# This is a temporary run file because model names used by the verification tests
# are not quite consistent with various pre-existing distributions.
#
version: '2' version: '2'
image_name: openai-api-verification image_name: openai-api-verification
apis: apis:
- agents
- inference - inference
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io - vector_io
- safety
providers: providers:
inference: inference:
- provider_id: together - provider_id: together
@ -16,12 +21,12 @@ providers:
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY} api_key: ${env.FIREWORKS_API_KEY:}
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com url: https://api.groq.com
api_key: ${env.GROQ_API_KEY} api_key: ${env.GROQ_API_KEY:}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -45,6 +50,19 @@ providers:
service_name: "${env.OTEL_SERVICE_NAME:\u200B}" service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/openai/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/openai/trace_store.db}
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/agents_store.db
tool_runtime: tool_runtime:
- provider_id: brave-search - provider_id: brave-search
provider_type: remote::brave-search provider_type: remote::brave-search

View file

@ -0,0 +1,35 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
def pytest_generate_tests(metafunc):
"""Dynamically parametrize tests based on the selected provider and config."""
if "model" in metafunc.fixturenames:
provider = metafunc.config.getoption("provider")
if not provider:
print("Warning: --provider not specified. Skipping model parametrization.")
metafunc.parametrize("model", [])
return
try:
config_data = _load_all_verification_configs()
except (FileNotFoundError, IOError) as e:
print(f"ERROR loading verification configs: {e}")
config_data = {"providers": {}}
provider_config = config_data.get("providers", {}).get(provider)
if provider_config:
models = provider_config.get("models", [])
if models:
metafunc.parametrize("model", models)
else:
print(f"Warning: No models found for provider '{provider}' in config.")
metafunc.parametrize("model", []) # Parametrize empty if no models found
else:
print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
metafunc.parametrize("model", []) # Parametrize empty if provider not found

View file

@ -5,14 +5,16 @@
# the root directory of this source tree. # the root directory of this source tree.
import os import os
import re
from pathlib import Path from pathlib import Path
import pytest import pytest
import yaml import yaml
from openai import OpenAI from openai import OpenAI
# --- Helper Functions ---
# --- Helper Function to Load Config ---
def _load_all_verification_configs(): def _load_all_verification_configs():
"""Load and aggregate verification configs from the conf/ directory.""" """Load and aggregate verification configs from the conf/ directory."""
# Note: Path is relative to *this* file (fixtures.py) # Note: Path is relative to *this* file (fixtures.py)
@ -44,7 +46,30 @@ def _load_all_verification_configs():
return {"providers": all_provider_configs} return {"providers": all_provider_configs}
# --- End Helper Function --- def case_id_generator(case):
"""Generate a test ID from the case's 'case_id' field, or use a default."""
case_id = case.get("case_id")
if isinstance(case_id, (str, int)):
return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
return None
def should_skip_test(verification_config, provider, model, test_name_base):
"""Check if a test should be skipped based on config exclusions."""
provider_config = verification_config.get("providers", {}).get(provider)
if not provider_config:
return False # No config for provider, don't skip
exclusions = provider_config.get("test_exclusions", {}).get(model, [])
return test_name_base in exclusions
# Helper to get the base test name from the request object
def get_base_test_name(request):
return request.node.originalname
# --- End Helper Functions ---
@pytest.fixture(scope="session") @pytest.fixture(scope="session")

View file

@ -0,0 +1,65 @@
test_response_basic:
test_name: test_response_basic
test_params:
case:
- case_id: "earth"
input: "Which planet do humans live on?"
output: "earth"
- case_id: "saturn"
input: "Which planet has rings around it with a name starting with letter S?"
output: "saturn"
test_response_multi_turn:
test_name: test_response_multi_turn
test_params:
case:
- case_id: "earth"
turns:
- input: "Which planet do humans live on?"
output: "earth"
- input: "What is the name of the planet from your previous response?"
output: "earth"
test_response_web_search:
test_name: test_response_web_search
test_params:
case:
- case_id: "llama_experts"
input: "How many experts does the Llama 4 Maverick model have?"
tools:
- type: web_search
search_context_size: "low"
output: "128"
test_response_image:
test_name: test_response_image
test_params:
case:
- case_id: "llama_image"
input:
- role: user
content:
- type: input_text
text: "Identify the type of animal in this image."
- type: input_image
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
output: "llama"
test_response_multi_turn_image:
test_name: test_response_multi_turn_image
test_params:
case:
- case_id: "llama_image_search"
turns:
- input:
- role: user
content:
- type: input_text
text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
- type: input_image
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
output: "llama"
- input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick' and 'scout'."
tools:
- type: web_search
output: "model"

View file

@ -7,7 +7,6 @@
import base64 import base64
import copy import copy
import json import json
import re
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -16,7 +15,9 @@ from openai import APIError
from pydantic import BaseModel from pydantic import BaseModel
from tests.verifications.openai_api.fixtures.fixtures import ( from tests.verifications.openai_api.fixtures.fixtures import (
_load_all_verification_configs, case_id_generator,
get_base_test_name,
should_skip_test,
) )
from tests.verifications.openai_api.fixtures.load import load_test_cases from tests.verifications.openai_api.fixtures.load import load_test_cases
@ -25,57 +26,6 @@ chat_completion_test_cases = load_test_cases("chat_completion")
THIS_DIR = Path(__file__).parent THIS_DIR = Path(__file__).parent
def case_id_generator(case):
"""Generate a test ID from the case's 'case_id' field, or use a default."""
case_id = case.get("case_id")
if isinstance(case_id, (str, int)):
return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
return None
def pytest_generate_tests(metafunc):
"""Dynamically parametrize tests based on the selected provider and config."""
if "model" in metafunc.fixturenames:
provider = metafunc.config.getoption("provider")
if not provider:
print("Warning: --provider not specified. Skipping model parametrization.")
metafunc.parametrize("model", [])
return
try:
config_data = _load_all_verification_configs()
except (FileNotFoundError, IOError) as e:
print(f"ERROR loading verification configs: {e}")
config_data = {"providers": {}}
provider_config = config_data.get("providers", {}).get(provider)
if provider_config:
models = provider_config.get("models", [])
if models:
metafunc.parametrize("model", models)
else:
print(f"Warning: No models found for provider '{provider}' in config.")
metafunc.parametrize("model", []) # Parametrize empty if no models found
else:
print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
metafunc.parametrize("model", []) # Parametrize empty if provider not found
def should_skip_test(verification_config, provider, model, test_name_base):
"""Check if a test should be skipped based on config exclusions."""
provider_config = verification_config.get("providers", {}).get(provider)
if not provider_config:
return False # No config for provider, don't skip
exclusions = provider_config.get("test_exclusions", {}).get(model, [])
return test_name_base in exclusions
# Helper to get the base test name from the request object
def get_base_test_name(request):
return request.node.originalname
@pytest.fixture @pytest.fixture
def multi_image_data(): def multi_image_data():
files = [ files = [

View file

@ -0,0 +1,166 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
from tests.verifications.openai_api.fixtures.fixtures import (
case_id_generator,
get_base_test_name,
should_skip_test,
)
from tests.verifications.openai_api.fixtures.load import load_test_cases
responses_test_cases = load_test_cases("responses")
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_basic"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_basic(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
response = openai_client.responses.create(
model=model,
input=case["input"],
stream=False,
)
output_text = response.output_text.lower().strip()
assert len(output_text) > 0
assert case["output"].lower() in output_text
retrieved_response = openai_client.responses.retrieve(response_id=response.id)
assert retrieved_response.output_text == response.output_text
next_response = openai_client.responses.create(
model=model, input="Repeat your previous response in all caps.", previous_response_id=response.id
)
next_output_text = next_response.output_text.strip()
assert case["output"].upper() in next_output_text
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_basic"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_streaming_basic(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
response = openai_client.responses.create(
model=model,
input=case["input"],
stream=True,
)
streamed_content = []
response_id = ""
for chunk in response:
if chunk.type == "response.completed":
response_id = chunk.response.id
streamed_content.append(chunk.response.output_text.strip())
assert len(streamed_content) > 0
assert case["output"].lower() in "".join(streamed_content).lower()
retrieved_response = openai_client.responses.retrieve(response_id=response_id)
assert retrieved_response.output_text == "".join(streamed_content)
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_multi_turn"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_multi_turn(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
previous_response_id = None
for turn in case["turns"]:
response = openai_client.responses.create(
model=model,
input=turn["input"],
previous_response_id=previous_response_id,
tools=turn["tools"] if "tools" in turn else None,
)
previous_response_id = response.id
output_text = response.output_text.lower()
assert turn["output"].lower() in output_text
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_web_search"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_web_search(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
response = openai_client.responses.create(
model=model,
input=case["input"],
tools=case["tools"],
stream=False,
)
assert len(response.output) > 1
assert response.output[0].type == "web_search_call"
assert response.output[0].status == "completed"
assert response.output[1].type == "message"
assert response.output[1].status == "completed"
assert response.output[1].role == "assistant"
assert len(response.output[1].content) > 0
assert case["output"].lower() in response.output_text.lower().strip()
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_image"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_image(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
response = openai_client.responses.create(
model=model,
input=case["input"],
stream=False,
)
output_text = response.output_text.lower()
assert case["output"].lower() in output_text
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_multi_turn_image"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_multi_turn_image(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
previous_response_id = None
for turn in case["turns"]:
response = openai_client.responses.create(
model=model,
input=turn["input"],
previous_response_id=previous_response_id,
tools=turn["tools"] if "tools" in turn else None,
)
previous_response_id = response.id
output_text = response.output_text.lower()
assert turn["output"].lower() in output_text