feat: OpenAI Responses API (#1989)

# What does this PR do? This provides an initial [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses) implementation. The API is not yet complete, and this is more a proof-of-concept to show how we can store responses in our key-value stores and use them to support the Responses API concepts like `previous_response_id`. ## Test Plan I've added a new `tests/integration/openai_responses/test_openai_responses.py` as part of a test-driven development for this new API. I'm only testing this locally with the remote-vllm provider for now, but it should work with any of our inference providers since the only API it requires out of the inference provider is the `openai_chat_completion` endpoint. ``` VLLM_URL="http://localhost:8000/v1" \ INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" \ llama stack build --template remote-vllm --image-type venv --run ``` ``` LLAMA_STACK_CONFIG="http://localhost:8321" \ python -m pytest -v \ tests/integration/openai_responses/test_openai_responses.py \ --text-model "meta-llama/Llama-3.2-3B-Instruct" ``` --------- Signed-off-by: Ben Browning <bbrownin@redhat.com> Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-04-28 17:06:00 -04:00 · 2025-04-28 17:06:00 -04:00 · 8dfce2f596
commit 8dfce2f596
parent 79851d93aa
21 changed files with 1766 additions and 59 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -497,6 +497,54 @@
                }
            }
        },
        "/v1/openai/v1/responses": {
            "post": {
                "responses": {
                    "200": {
                        "description": "Runtime representation of an annotated type.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/OpenAIResponseObject"
                                }
                            },
                            "text/event-stream": {
                                "schema": {
                                    "$ref": "#/components/schemas/OpenAIResponseObjectStream"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Agents"
                ],
                "description": "Create a new OpenAI response.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/CreateOpenaiResponseRequest"
                            }
                        }
                    },
                    "required": true
                }
            }
        },
        "/v1/files": {
            "get": {
                "responses": {
@ -1278,6 +1326,49 @@
                ]
            }
        },
        "/v1/openai/v1/responses/{id}": {
            "get": {
                "responses": {
                    "200": {
                        "description": "An OpenAIResponseObject.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/OpenAIResponseObject"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Agents"
                ],
                "description": "Retrieve an OpenAI response by its ID.",
                "parameters": [
                    {
                        "name": "id",
                        "in": "path",
                        "description": "The ID of the OpenAI response to retrieve.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ]
            }
        },
        "/v1/scoring-functions/{scoring_fn_id}": {
            "get": {
                "responses": {
@ -6192,6 +6283,427 @@
                ],
                "title": "AgentTurnResponseTurnStartPayload"
            },
            "OpenAIResponseInputMessage": {
                "type": "object",
                "properties": {
                    "content": {
                        "oneOf": [
                            {
                                "type": "string"
                            },
                            {
                                "type": "array",
                                "items": {
                                    "$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
                                }
                            }
                        ]
                    },
                    "role": {
                        "oneOf": [
                            {
                                "type": "string",
                                "const": "system"
                            },
                            {
                                "type": "string",
                                "const": "developer"
                            },
                            {
                                "type": "string",
                                "const": "user"
                            },
                            {
                                "type": "string",
                                "const": "assistant"
                            }
                        ]
                    },
                    "type": {
                        "type": "string",
                        "const": "message",
                        "default": "message"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "content",
                    "role"
                ],
                "title": "OpenAIResponseInputMessage"
            },
            "OpenAIResponseInputMessageContent": {
                "oneOf": [
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
                        "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
                    }
                }
            },
            "OpenAIResponseInputMessageContentImage": {
                "type": "object",
                "properties": {
                    "detail": {
                        "oneOf": [
                            {
                                "type": "string",
                                "const": "low"
                            },
                            {
                                "type": "string",
                                "const": "high"
                            },
                            {
                                "type": "string",
                                "const": "auto"
                            }
                        ],
                        "default": "auto"
                    },
                    "type": {
                        "type": "string",
                        "const": "input_image",
                        "default": "input_image"
                    },
                    "image_url": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "detail",
                    "type"
                ],
                "title": "OpenAIResponseInputMessageContentImage"
            },
            "OpenAIResponseInputMessageContentText": {
                "type": "object",
                "properties": {
                    "text": {
                        "type": "string"
                    },
                    "type": {
                        "type": "string",
                        "const": "input_text",
                        "default": "input_text"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "text",
                    "type"
                ],
                "title": "OpenAIResponseInputMessageContentText"
            },
            "OpenAIResponseInputTool": {
                "type": "object",
                "properties": {
                    "type": {
                        "oneOf": [
                            {
                                "type": "string",
                                "const": "web_search"
                            },
                            {
                                "type": "string",
                                "const": "web_search_preview_2025_03_11"
                            }
                        ],
                        "default": "web_search"
                    },
                    "search_context_size": {
                        "type": "string",
                        "default": "medium"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
                "title": "OpenAIResponseInputToolWebSearch"
            },
            "CreateOpenaiResponseRequest": {
                "type": "object",
                "properties": {
                    "input": {
                        "oneOf": [
                            {
                                "type": "string"
                            },
                            {
                                "type": "array",
                                "items": {
                                    "$ref": "#/components/schemas/OpenAIResponseInputMessage"
                                }
                            }
                        ],
                        "description": "Input message(s) to create the response."
                    },
                    "model": {
                        "type": "string",
                        "description": "The underlying LLM used for completions."
                    },
                    "previous_response_id": {
                        "type": "string",
                        "description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
                    },
                    "store": {
                        "type": "boolean"
                    },
                    "stream": {
                        "type": "boolean"
                    },
                    "tools": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/OpenAIResponseInputTool"
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "input",
                    "model"
                ],
                "title": "CreateOpenaiResponseRequest"
            },
            "OpenAIResponseError": {
                "type": "object",
                "properties": {
                    "code": {
                        "type": "string"
                    },
                    "message": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "code",
                    "message"
                ],
                "title": "OpenAIResponseError"
            },
            "OpenAIResponseObject": {
                "type": "object",
                "properties": {
                    "created_at": {
                        "type": "integer"
                    },
                    "error": {
                        "$ref": "#/components/schemas/OpenAIResponseError"
                    },
                    "id": {
                        "type": "string"
                    },
                    "model": {
                        "type": "string"
                    },
                    "object": {
                        "type": "string",
                        "const": "response",
                        "default": "response"
                    },
                    "output": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/OpenAIResponseOutput"
                        }
                    },
                    "parallel_tool_calls": {
                        "type": "boolean",
                        "default": false
                    },
                    "previous_response_id": {
                        "type": "string"
                    },
                    "status": {
                        "type": "string"
                    },
                    "temperature": {
                        "type": "number"
                    },
                    "top_p": {
                        "type": "number"
                    },
                    "truncation": {
                        "type": "string"
                    },
                    "user": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "created_at",
                    "id",
                    "model",
                    "object",
                    "output",
                    "parallel_tool_calls",
                    "status"
                ],
                "title": "OpenAIResponseObject"
            },
            "OpenAIResponseOutput": {
                "oneOf": [
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessage"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "message": "#/components/schemas/OpenAIResponseOutputMessage",
                        "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
                    }
                }
            },
            "OpenAIResponseOutputMessage": {
                "type": "object",
                "properties": {
                    "id": {
                        "type": "string"
                    },
                    "content": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
                        }
                    },
                    "role": {
                        "type": "string",
                        "const": "assistant",
                        "default": "assistant"
                    },
                    "status": {
                        "type": "string"
                    },
                    "type": {
                        "type": "string",
                        "const": "message",
                        "default": "message"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "id",
                    "content",
                    "role",
                    "status",
                    "type"
                ],
                "title": "OpenAIResponseOutputMessage"
            },
            "OpenAIResponseOutputMessageContent": {
                "type": "object",
                "properties": {
                    "text": {
                        "type": "string"
                    },
                    "type": {
                        "type": "string",
                        "const": "output_text",
                        "default": "output_text"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "text",
                    "type"
                ],
                "title": "OpenAIResponseOutputMessageContentOutputText"
            },
            "OpenAIResponseOutputMessageWebSearchToolCall": {
                "type": "object",
                "properties": {
                    "id": {
                        "type": "string"
                    },
                    "status": {
                        "type": "string"
                    },
                    "type": {
                        "type": "string",
                        "const": "web_search_call",
                        "default": "web_search_call"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "id",
                    "status",
                    "type"
                ],
                "title": "OpenAIResponseOutputMessageWebSearchToolCall"
            },
            "OpenAIResponseObjectStream": {
                "oneOf": [
                    {
                        "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
                        "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
                    }
                }
            },
            "OpenAIResponseObjectStreamResponseCompleted": {
                "type": "object",
                "properties": {
                    "response": {
                        "$ref": "#/components/schemas/OpenAIResponseObject"
                    },
                    "type": {
                        "type": "string",
                        "const": "response.completed",
                        "default": "response.completed"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "response",
                    "type"
                ],
                "title": "OpenAIResponseObjectStreamResponseCompleted"
            },
            "OpenAIResponseObjectStreamResponseCreated": {
                "type": "object",
                "properties": {
                    "response": {
                        "$ref": "#/components/schemas/OpenAIResponseObject"
                    },
                    "type": {
                        "type": "string",
                        "const": "response.created",
                        "default": "response.created"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "response",
                    "type"
                ],
                "title": "OpenAIResponseObjectStreamResponseCreated"
            },
            "CreateUploadSessionRequest": {
                "type": "object",
                "properties": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -330,6 +330,39 @@ paths:
            schema:
              $ref: '#/components/schemas/CreateAgentTurnRequest'
        required: true
  /v1/openai/v1/responses:
    post:
      responses:
        '200':
          description: >-
            Runtime representation of an annotated type.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/OpenAIResponseObject'
            text/event-stream:
              schema:
                $ref: '#/components/schemas/OpenAIResponseObjectStream'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
      description: Create a new OpenAI response.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateOpenaiResponseRequest'
        required: true
  /v1/files:
    get:
      responses:
@ -875,6 +908,36 @@ paths:
          required: true
          schema:
            type: string
  /v1/openai/v1/responses/{id}:
    get:
      responses:
        '200':
          description: An OpenAIResponseObject.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/OpenAIResponseObject'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
      description: Retrieve an OpenAI response by its ID.
      parameters:
        - name: id
          in: path
          description: >-
            The ID of the OpenAI response to retrieve.
          required: true
          schema:
            type: string
  /v1/scoring-functions/{scoring_fn_id}:
    get:
      responses:
@ -4329,6 +4392,293 @@ components:
        - event_type
        - turn_id
      title: AgentTurnResponseTurnStartPayload
    OpenAIResponseInputMessage:
      type: object
      properties:
        content:
          oneOf:
            - type: string
            - type: array
              items:
                $ref: '#/components/schemas/OpenAIResponseInputMessageContent'
        role:
          oneOf:
            - type: string
              const: system
            - type: string
              const: developer
            - type: string
              const: user
            - type: string
              const: assistant
        type:
          type: string
          const: message
          default: message
      additionalProperties: false
      required:
        - content
        - role
      title: OpenAIResponseInputMessage
    OpenAIResponseInputMessageContent:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
        - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
      discriminator:
        propertyName: type
        mapping:
          input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
          input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
    OpenAIResponseInputMessageContentImage:
      type: object
      properties:
        detail:
          oneOf:
            - type: string
              const: low
            - type: string
              const: high
            - type: string
              const: auto
          default: auto
        type:
          type: string
          const: input_image
          default: input_image
        image_url:
          type: string
      additionalProperties: false
      required:
        - detail
        - type
      title: OpenAIResponseInputMessageContentImage
    OpenAIResponseInputMessageContentText:
      type: object
      properties:
        text:
          type: string
        type:
          type: string
          const: input_text
          default: input_text
      additionalProperties: false
      required:
        - text
        - type
      title: OpenAIResponseInputMessageContentText
    OpenAIResponseInputTool:
      type: object
      properties:
        type:
          oneOf:
            - type: string
              const: web_search
            - type: string
              const: web_search_preview_2025_03_11
          default: web_search
        search_context_size:
          type: string
          default: medium
      additionalProperties: false
      required:
        - type
      title: OpenAIResponseInputToolWebSearch
    CreateOpenaiResponseRequest:
      type: object
      properties:
        input:
          oneOf:
            - type: string
            - type: array
              items:
                $ref: '#/components/schemas/OpenAIResponseInputMessage'
          description: Input message(s) to create the response.
        model:
          type: string
          description: The underlying LLM used for completions.
        previous_response_id:
          type: string
          description: >-
            (Optional) if specified, the new response will be a continuation of the
            previous response. This can be used to easily fork-off new responses from
            existing responses.
        store:
          type: boolean
        stream:
          type: boolean
        tools:
          type: array
          items:
            $ref: '#/components/schemas/OpenAIResponseInputTool'
      additionalProperties: false
      required:
        - input
        - model
      title: CreateOpenaiResponseRequest
    OpenAIResponseError:
      type: object
      properties:
        code:
          type: string
        message:
          type: string
      additionalProperties: false
      required:
        - code
        - message
      title: OpenAIResponseError
    OpenAIResponseObject:
      type: object
      properties:
        created_at:
          type: integer
        error:
          $ref: '#/components/schemas/OpenAIResponseError'
        id:
          type: string
        model:
          type: string
        object:
          type: string
          const: response
          default: response
        output:
          type: array
          items:
            $ref: '#/components/schemas/OpenAIResponseOutput'
        parallel_tool_calls:
          type: boolean
          default: false
        previous_response_id:
          type: string
        status:
          type: string
        temperature:
          type: number
        top_p:
          type: number
        truncation:
          type: string
        user:
          type: string
      additionalProperties: false
      required:
        - created_at
        - id
        - model
        - object
        - output
        - parallel_tool_calls
        - status
      title: OpenAIResponseObject
    OpenAIResponseOutput:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseOutputMessage'
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
      discriminator:
        propertyName: type
        mapping:
          message: '#/components/schemas/OpenAIResponseOutputMessage'
          web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
    OpenAIResponseOutputMessage:
      type: object
      properties:
        id:
          type: string
        content:
          type: array
          items:
            $ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
        role:
          type: string
          const: assistant
          default: assistant
        status:
          type: string
        type:
          type: string
          const: message
          default: message
      additionalProperties: false
      required:
        - id
        - content
        - role
        - status
        - type
      title: OpenAIResponseOutputMessage
    OpenAIResponseOutputMessageContent:
      type: object
      properties:
        text:
          type: string
        type:
          type: string
          const: output_text
          default: output_text
      additionalProperties: false
      required:
        - text
        - type
      title: >-
        OpenAIResponseOutputMessageContentOutputText
    "OpenAIResponseOutputMessageWebSearchToolCall":
      type: object
      properties:
        id:
          type: string
        status:
          type: string
        type:
          type: string
          const: web_search_call
          default: web_search_call
      additionalProperties: false
      required:
        - id
        - status
        - type
      title: >-
        OpenAIResponseOutputMessageWebSearchToolCall
    OpenAIResponseObjectStream:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
        - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
      discriminator:
        propertyName: type
        mapping:
          response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
          response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
    "OpenAIResponseObjectStreamResponseCompleted":
      type: object
      properties:
        response:
          $ref: '#/components/schemas/OpenAIResponseObject'
        type:
          type: string
          const: response.completed
          default: response.completed
      additionalProperties: false
      required:
        - response
        - type
      title: >-
        OpenAIResponseObjectStreamResponseCompleted
    "OpenAIResponseObjectStreamResponseCreated":
      type: object
      properties:
        response:
          $ref: '#/components/schemas/OpenAIResponseObject'
        type:
          type: string
          const: response.created
          default: response.created
      additionalProperties: false
      required:
        - response
        - type
      title: >-
        OpenAIResponseObjectStreamResponseCreated
    CreateUploadSessionRequest:
      type: object
      properties:
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -179,7 +179,7 @@ class ContentBuilder:
        "Creates the content subtree for a request or response."
        def is_iterator_type(t):
-            return "StreamChunk" in str(t)
+            return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t)
        def get_media_type(t):
            if is_generic_list(t):
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -38,6 +38,13 @@ from llama_stack.apis.safety import SafetyViolation
 from llama_stack.apis.tools import ToolDef
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 from .openai_responses import (
    OpenAIResponseInputMessage,
    OpenAIResponseInputTool,
    OpenAIResponseObject,
    OpenAIResponseObjectStream,
 )
 class Attachment(BaseModel):
    """An attachment to an agent turn.
@ -593,3 +600,39 @@ class Agents(Protocol):
        :returns: A ListAgentSessionsResponse.
        """
        ...
    # We situate the OpenAI Responses API in the Agents API just like we did things
    # for Inference. The Responses API, in its intent, serves the same purpose as
    # the Agents API above -- it is essentially a lightweight "agentic loop" with
    # integrated tool calling.
    #
    # Both of these APIs are inherently stateful.
    @webmethod(route="/openai/v1/responses/{id}", method="GET")
    async def get_openai_response(
        self,
        id: str,
    ) -> OpenAIResponseObject:
        """Retrieve an OpenAI response by its ID.
        :param id: The ID of the OpenAI response to retrieve.
        :returns: An OpenAIResponseObject.
        """
        ...
    @webmethod(route="/openai/v1/responses", method="POST")
    async def create_openai_response(
        self,
        input: Union[str, List[OpenAIResponseInputMessage]],
        model: str,
        previous_response_id: Optional[str] = None,
        store: Optional[bool] = True,
        stream: Optional[bool] = False,
        tools: Optional[List[OpenAIResponseInputTool]] = None,
    ) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]:
        """Create a new OpenAI response.
        :param input: Input message(s) to create the response.
        :param model: The underlying LLM used for completions.
        :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
        """
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -0,0 +1,140 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import List, Literal, Optional, Union
 from pydantic import BaseModel, Field
 from typing_extensions import Annotated
 from llama_stack.schema_utils import json_schema_type, register_schema
@json_schema_type
 class OpenAIResponseError(BaseModel):
    code: str
    message: str
@json_schema_type
 class OpenAIResponseOutputMessageContentOutputText(BaseModel):
    text: str
    type: Literal["output_text"] = "output_text"
 OpenAIResponseOutputMessageContent = Annotated[
    Union[OpenAIResponseOutputMessageContentOutputText,],
    Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMessageContent")
@json_schema_type
 class OpenAIResponseOutputMessage(BaseModel):
    id: str
    content: List[OpenAIResponseOutputMessageContent]
    role: Literal["assistant"] = "assistant"
    status: str
    type: Literal["message"] = "message"
@json_schema_type
 class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
    id: str
    status: str
    type: Literal["web_search_call"] = "web_search_call"
 OpenAIResponseOutput = Annotated[
    Union[
        OpenAIResponseOutputMessage,
        OpenAIResponseOutputMessageWebSearchToolCall,
    ],
    Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
@json_schema_type
 class OpenAIResponseObject(BaseModel):
    created_at: int
    error: Optional[OpenAIResponseError] = None
    id: str
    model: str
    object: Literal["response"] = "response"
    output: List[OpenAIResponseOutput]
    parallel_tool_calls: bool = False
    previous_response_id: Optional[str] = None
    status: str
    temperature: Optional[float] = None
    top_p: Optional[float] = None
    truncation: Optional[str] = None
    user: Optional[str] = None
@json_schema_type
 class OpenAIResponseObjectStreamResponseCreated(BaseModel):
    response: OpenAIResponseObject
    type: Literal["response.created"] = "response.created"
@json_schema_type
 class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
    response: OpenAIResponseObject
    type: Literal["response.completed"] = "response.completed"
 OpenAIResponseObjectStream = Annotated[
    Union[
        OpenAIResponseObjectStreamResponseCreated,
        OpenAIResponseObjectStreamResponseCompleted,
    ],
    Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
@json_schema_type
 class OpenAIResponseInputMessageContentText(BaseModel):
    text: str
    type: Literal["input_text"] = "input_text"
@json_schema_type
 class OpenAIResponseInputMessageContentImage(BaseModel):
    detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
    type: Literal["input_image"] = "input_image"
    # TODO: handle file_id
    image_url: Optional[str] = None
 # TODO: handle file content types
 OpenAIResponseInputMessageContent = Annotated[
    Union[OpenAIResponseInputMessageContentText, OpenAIResponseInputMessageContentImage],
    Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
 class OpenAIResponseInputMessage(BaseModel):
    content: Union[str, List[OpenAIResponseInputMessageContent]]
    role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
    type: Optional[Literal["message"]] = "message"
@json_schema_type
 class OpenAIResponseInputToolWebSearch(BaseModel):
    type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
    # TODO: actually use search_context_size somewhere...
    search_context_size: Optional[str] = Field(default="medium", pattern="^low|medium|high$")
    # TODO: add user_location
 OpenAIResponseInputTool = Annotated[
    Union[OpenAIResponseInputToolWebSearch,],
    Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@ -23,6 +23,9 @@ from llama_stack.apis.agents import (
    Document,
    ListAgentSessionsResponse,
    ListAgentsResponse,
    OpenAIResponseInputMessage,
    OpenAIResponseInputTool,
    OpenAIResponseObject,
    Session,
    Turn,
 )
@ -40,6 +43,7 @@ from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_imp
 from .agent_instance import ChatAgent
 from .config import MetaReferenceAgentsImplConfig
 from .openai_responses import OpenAIResponsesImpl
 logger = logging.getLogger()
 logger.setLevel(logging.INFO)
@ -63,9 +67,16 @@ class MetaReferenceAgentsImpl(Agents):
        self.tool_groups_api = tool_groups_api
        self.in_memory_store = InmemoryKVStoreImpl()
        self.openai_responses_impl = None
    async def initialize(self) -> None:
        self.persistence_store = await kvstore_impl(self.config.persistence_store)
        self.openai_responses_impl = OpenAIResponsesImpl(
            self.persistence_store,
            inference_api=self.inference_api,
            tool_groups_api=self.tool_groups_api,
            tool_runtime_api=self.tool_runtime_api,
        )
        # check if "bwrap" is available
        if not shutil.which("bwrap"):
@ -244,3 +255,23 @@ class MetaReferenceAgentsImpl(Agents):
        agent_id: str,
    ) -> ListAgentSessionsResponse:
        pass
    # OpenAI responses
    async def get_openai_response(
        self,
        id: str,
    ) -> OpenAIResponseObject:
        return await self.openai_responses_impl.get_openai_response(id)
    async def create_openai_response(
        self,
        input: Union[str, List[OpenAIResponseInputMessage]],
        model: str,
        previous_response_id: Optional[str] = None,
        store: Optional[bool] = True,
        stream: Optional[bool] = False,
        tools: Optional[List[OpenAIResponseInputTool]] = None,
    ) -> OpenAIResponseObject:
        return await self.openai_responses_impl.create_openai_response(
            input, model, previous_response_id, store, stream, tools
        )
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@ -0,0 +1,319 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import json
 import uuid
 from typing import AsyncIterator, List, Optional, Union, cast
 from openai.types.chat import ChatCompletionToolParam
 from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseInputMessage,
    OpenAIResponseInputMessageContentImage,
    OpenAIResponseInputMessageContentText,
    OpenAIResponseInputTool,
    OpenAIResponseObject,
    OpenAIResponseObjectStream,
    OpenAIResponseObjectStreamResponseCompleted,
    OpenAIResponseObjectStreamResponseCreated,
    OpenAIResponseOutput,
    OpenAIResponseOutputMessage,
    OpenAIResponseOutputMessageContentOutputText,
    OpenAIResponseOutputMessageWebSearchToolCall,
 )
 from llama_stack.apis.inference.inference import (
    Inference,
    OpenAIAssistantMessageParam,
    OpenAIChatCompletion,
    OpenAIChatCompletionContentPartImageParam,
    OpenAIChatCompletionContentPartParam,
    OpenAIChatCompletionContentPartTextParam,
    OpenAIChatCompletionToolCallFunction,
    OpenAIChoice,
    OpenAIImageURL,
    OpenAIMessageParam,
    OpenAIToolMessageParam,
    OpenAIUserMessageParam,
 )
 from llama_stack.apis.tools.tools import ToolGroups, ToolInvocationResult, ToolRuntime
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
 from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
 from llama_stack.providers.utils.kvstore import KVStore
 logger = get_logger(name=__name__, category="openai_responses")
 OPENAI_RESPONSES_PREFIX = "openai_responses:"
 async def _previous_response_to_messages(previous_response: OpenAIResponseObject) -> List[OpenAIMessageParam]:
    messages: List[OpenAIMessageParam] = []
    for output_message in previous_response.output:
        if isinstance(output_message, OpenAIResponseOutputMessage):
            messages.append(OpenAIAssistantMessageParam(content=output_message.content[0].text))
    return messages
 async def _openai_choices_to_output_messages(choices: List[OpenAIChoice]) -> List[OpenAIResponseOutputMessage]:
    output_messages = []
    for choice in choices:
        output_content = ""
        if isinstance(choice.message.content, str):
            output_content = choice.message.content
        elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam):
            output_content = choice.message.content.text
        # TODO: handle image content
        output_messages.append(
            OpenAIResponseOutputMessage(
                id=f"msg_{uuid.uuid4()}",
                content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
                status="completed",
            )
        )
    return output_messages
 class OpenAIResponsesImpl:
    def __init__(
        self,
        persistence_store: KVStore,
        inference_api: Inference,
        tool_groups_api: ToolGroups,
        tool_runtime_api: ToolRuntime,
    ):
        self.persistence_store = persistence_store
        self.inference_api = inference_api
        self.tool_groups_api = tool_groups_api
        self.tool_runtime_api = tool_runtime_api
    async def get_openai_response(
        self,
        id: str,
    ) -> OpenAIResponseObject:
        key = f"{OPENAI_RESPONSES_PREFIX}{id}"
        response_json = await self.persistence_store.get(key=key)
        if response_json is None:
            raise ValueError(f"OpenAI response with id '{id}' not found")
        return OpenAIResponseObject.model_validate_json(response_json)
    async def create_openai_response(
        self,
        input: Union[str, List[OpenAIResponseInputMessage]],
        model: str,
        previous_response_id: Optional[str] = None,
        store: Optional[bool] = True,
        stream: Optional[bool] = False,
        tools: Optional[List[OpenAIResponseInputTool]] = None,
    ):
        stream = False if stream is None else stream
        messages: List[OpenAIMessageParam] = []
        if previous_response_id:
            previous_response = await self.get_openai_response(previous_response_id)
            messages.extend(await _previous_response_to_messages(previous_response))
        # TODO: refactor this user_content parsing out into a separate method
        user_content: Union[str, List[OpenAIChatCompletionContentPartParam]] = ""
        if isinstance(input, list):
            user_content = []
            for user_input in input:
                if isinstance(user_input.content, list):
                    for user_input_content in user_input.content:
                        if isinstance(user_input_content, OpenAIResponseInputMessageContentText):
                            user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input_content.text))
                        elif isinstance(user_input_content, OpenAIResponseInputMessageContentImage):
                            if user_input_content.image_url:
                                image_url = OpenAIImageURL(
                                    url=user_input_content.image_url, detail=user_input_content.detail
                                )
                                user_content.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
                else:
                    user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input.content))
        else:
            user_content = input
        messages.append(OpenAIUserMessageParam(content=user_content))
        chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None
        chat_response = await self.inference_api.openai_chat_completion(
            model=model,
            messages=messages,
            tools=chat_tools,
            stream=stream,
        )
        if stream:
            # TODO: refactor this into a separate method that handles streaming
            chat_response_id = ""
            chat_response_content = []
            # TODO: these chunk_ fields are hacky and only take the last chunk into account
            chunk_created = 0
            chunk_model = ""
            chunk_finish_reason = ""
            async for chunk in chat_response:
                chat_response_id = chunk.id
                chunk_created = chunk.created
                chunk_model = chunk.model
                for chunk_choice in chunk.choices:
                    # TODO: this only works for text content
                    chat_response_content.append(chunk_choice.delta.content or "")
                    if chunk_choice.finish_reason:
                        chunk_finish_reason = chunk_choice.finish_reason
            assistant_message = OpenAIAssistantMessageParam(content="".join(chat_response_content))
            chat_response = OpenAIChatCompletion(
                id=chat_response_id,
                choices=[
                    OpenAIChoice(
                        message=assistant_message,
                        finish_reason=chunk_finish_reason,
                        index=0,
                    )
                ],
                created=chunk_created,
                model=chunk_model,
            )
        else:
            # dump and reload to map to our pydantic types
            chat_response = OpenAIChatCompletion(**chat_response.model_dump())
        output_messages: List[OpenAIResponseOutput] = []
        if chat_response.choices[0].message.tool_calls:
            output_messages.extend(
                await self._execute_tool_and_return_final_output(model, stream, chat_response, messages)
            )
        else:
            output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices))
        response = OpenAIResponseObject(
            created_at=chat_response.created,
            id=f"resp-{uuid.uuid4()}",
            model=model,
            object="response",
            status="completed",
            output=output_messages,
        )
        if store:
            # Store in kvstore
            key = f"{OPENAI_RESPONSES_PREFIX}{response.id}"
            await self.persistence_store.set(
                key=key,
                value=response.model_dump_json(),
            )
        if stream:
            async def async_response() -> AsyncIterator[OpenAIResponseObjectStream]:
                # TODO: response created should actually get emitted much earlier in the process
                yield OpenAIResponseObjectStreamResponseCreated(response=response)
                yield OpenAIResponseObjectStreamResponseCompleted(response=response)
            return async_response()
        return response
    async def _convert_response_tools_to_chat_tools(
        self, tools: List[OpenAIResponseInputTool]
    ) -> List[ChatCompletionToolParam]:
        chat_tools: List[ChatCompletionToolParam] = []
        for input_tool in tools:
            # TODO: Handle other tool types
            if input_tool.type == "web_search":
                tool_name = "web_search"
                tool = await self.tool_groups_api.get_tool(tool_name)
                tool_def = ToolDefinition(
                    tool_name=tool_name,
                    description=tool.description,
                    parameters={
                        param.name: ToolParamDefinition(
                            param_type=param.parameter_type,
                            description=param.description,
                            required=param.required,
                            default=param.default,
                        )
                        for param in tool.parameters
                    },
                )
                chat_tool = convert_tooldef_to_openai_tool(tool_def)
                chat_tools.append(chat_tool)
            else:
                raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}")
        return chat_tools
    async def _execute_tool_and_return_final_output(
        self, model_id: str, stream: bool, chat_response: OpenAIChatCompletion, messages: List[OpenAIMessageParam]
    ) -> List[OpenAIResponseOutput]:
        output_messages: List[OpenAIResponseOutput] = []
        choice = chat_response.choices[0]
        # If the choice is not an assistant message, we don't need to execute any tools
        if not isinstance(choice.message, OpenAIAssistantMessageParam):
            return output_messages
        # If the assistant message doesn't have any tool calls, we don't need to execute any tools
        if not choice.message.tool_calls:
            return output_messages
        # Add the assistant message with tool_calls response to the messages list
        messages.append(choice.message)
        for tool_call in choice.message.tool_calls:
            tool_call_id = tool_call.id
            function = tool_call.function
            # If for some reason the tool call doesn't have a function or id, we can't execute it
            if not function or not tool_call_id:
                continue
            # TODO: telemetry spans for tool calls
            result = await self._execute_tool_call(function)
            # Handle tool call failure
            if not result:
                output_messages.append(
                    OpenAIResponseOutputMessageWebSearchToolCall(
                        id=tool_call_id,
                        status="failed",
                    )
                )
                continue
            output_messages.append(
                OpenAIResponseOutputMessageWebSearchToolCall(
                    id=tool_call_id,
                    status="completed",
                ),
            )
            result_content = ""
            # TODO: handle other result content types and lists
            if isinstance(result.content, str):
                result_content = result.content
            messages.append(OpenAIToolMessageParam(content=result_content, tool_call_id=tool_call_id))
        tool_results_chat_response = await self.inference_api.openai_chat_completion(
            model=model_id,
            messages=messages,
            stream=stream,
        )
        # type cast to appease mypy
        tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response)
        tool_final_outputs = await _openai_choices_to_output_messages(tool_results_chat_response.choices)
        # TODO: Wire in annotations with URLs, titles, etc to these output messages
        output_messages.extend(tool_final_outputs)
        return output_messages
    async def _execute_tool_call(
        self,
        function: OpenAIChatCompletionToolCallFunction,
    ) -> Optional[ToolInvocationResult]:
        if not function.name:
            return None
        function_args = json.loads(function.arguments) if function.arguments else {}
        logger.info(f"executing tool call: {function.name} with args: {function_args}")
        result = await self.tool_runtime_api.invoke_tool(
            tool_name=function.name,
            kwargs=function_args,
        )
        logger.debug(f"tool call {function.name} completed with result: {result}")
        return result
--- a/llama_stack/strong_typing/schema.py
+++ b/llama_stack/strong_typing/schema.py
@ -478,6 +478,8 @@ class JsonSchemaGenerator:
                }
            return ret
        elif origin_type is Literal:
            if len(typing.get_args(typ)) != 1:
                raise ValueError(f"Literal type {typ} has {len(typing.get_args(typ))} arguments")
            (literal_value,) = typing.get_args(typ)  # unpack value of literal type
            schema = self.type_to_schema(type(literal_value))
            schema["const"] = literal_value
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -14,6 +14,7 @@ from pathlib import Path
 import pytest
 import yaml
 from llama_stack_client import LlamaStackClient
 from openai import OpenAI
 from llama_stack import LlamaStackAsLibraryClient
 from llama_stack.apis.datatypes import Api
@ -207,3 +208,9 @@ def llama_stack_client(request, provider_data, text_model_id):
        raise RuntimeError("Initialization failed")
    return client
@pytest.fixture(scope="session")
 def openai_client(client_with_models):
    base_url = f"{client_with_models.base_url}/v1/openai/v1"
    return OpenAI(base_url=base_url, api_key="fake")
--- a/tests/integration/test_cases/openai/responses.json
+++ b/tests/integration/test_cases/openai/responses.json
@ -0,0 +1,37 @@
 {
  "non_streaming_01": {
    "data": {
      "question": "Which planet do humans live on?",
      "expected": "Earth"
    }
  },
  "non_streaming_02": {
    "data": {
      "question": "Which planet has rings around it with a name starting with letter S?",
      "expected": "Saturn"
    }
  },
  "streaming_01": {
    "data": {
      "question": "What's the name of the Sun in latin?",
      "expected": "Sol"
    }
  },
  "streaming_02": {
    "data": {
      "question": "What is the name of the US captial?",
      "expected": "Washington"
    }
  },
  "tools_web_search_01": {
    "data": {
      "input": "How many experts does the Llama 4 Maverick model have?",
      "tools": [
        {
          "type": "web_search"
        }
      ],
      "expected": "128"
    }
  }
 }
--- a/tests/integration/test_cases/test_case.py
+++ b/tests/integration/test_cases/test_case.py
@ -12,6 +12,7 @@ class TestCase:
    _apis = [
        "inference/chat_completion",
        "inference/completion",
        "openai/responses",
    ]
    _jsonblob = {}
--- a/tests/verifications/conf/fireworks-llama-stack.yaml
+++ b/tests/verifications/conf/fireworks-llama-stack.yaml
@ -13,3 +13,5 @@ test_exclusions:
  - test_chat_non_streaming_image
  - test_chat_streaming_image
  - test_chat_multi_turn_multiple_images
  - test_response_non_streaming_image
  - test_response_non_streaming_multi_turn_image
--- a/tests/verifications/conf/groq-llama-stack.yaml
+++ b/tests/verifications/conf/groq-llama-stack.yaml
@ -13,3 +13,5 @@ test_exclusions:
  - test_chat_non_streaming_image
  - test_chat_streaming_image
  - test_chat_multi_turn_multiple_images
  - test_response_non_streaming_image
  - test_response_non_streaming_multi_turn_image
--- a/tests/verifications/conf/together-llama-stack.yaml
+++ b/tests/verifications/conf/together-llama-stack.yaml
@ -13,3 +13,5 @@ test_exclusions:
  - test_chat_non_streaming_image
  - test_chat_streaming_image
  - test_chat_multi_turn_multiple_images
  - test_response_non_streaming_image
  - test_response_non_streaming_multi_turn_image
--- a/tests/verifications/generate_report.py
+++ b/tests/verifications/generate_report.py
@ -16,7 +16,7 @@ Description:
 Configuration:
-    - Provider details (models, display names) are loaded from `tests/verifications/config.yaml`.
+    - Provider details (models, display names) are loaded from `tests/verifications/conf/*.yaml`.
    - Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`.
    - Test results are stored in `tests/verifications/test_results/`.
--- a/tests/verifications/openai-api-verification-run.yaml
+++ b/tests/verifications/openai-api-verification-run.yaml
@ -1,10 +1,15 @@
 # This is a temporary run file because model names used by the verification tests
 # are not quite consistent with various pre-existing distributions.
 #
 version: '2'
 image_name: openai-api-verification
 apis:
 - agents
 - inference
 - telemetry
 - tool_runtime
 - vector_io
 - safety
 providers:
  inference:
  - provider_id: together
@ -16,12 +21,12 @@ providers:
    provider_type: remote::fireworks
    config:
      url: https://api.fireworks.ai/inference/v1
-      api_key: ${env.FIREWORKS_API_KEY}
+      api_key: ${env.FIREWORKS_API_KEY:}
  - provider_id: groq
    provider_type: remote::groq
    config:
      url: https://api.groq.com
-      api_key: ${env.GROQ_API_KEY}
+      api_key: ${env.GROQ_API_KEY:}
  - provider_id: openai
    provider_type: remote::openai
    config:
@ -45,6 +50,19 @@ providers:
      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/openai/trace_store.db}
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config:
      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/agents_store.db
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
--- a/tests/verifications/openai_api/conftest.py
+++ b/tests/verifications/openai_api/conftest.py
@ -0,0 +1,35 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
 def pytest_generate_tests(metafunc):
    """Dynamically parametrize tests based on the selected provider and config."""
    if "model" in metafunc.fixturenames:
        provider = metafunc.config.getoption("provider")
        if not provider:
            print("Warning: --provider not specified. Skipping model parametrization.")
            metafunc.parametrize("model", [])
            return
        try:
            config_data = _load_all_verification_configs()
        except (FileNotFoundError, IOError) as e:
            print(f"ERROR loading verification configs: {e}")
            config_data = {"providers": {}}
        provider_config = config_data.get("providers", {}).get(provider)
        if provider_config:
            models = provider_config.get("models", [])
            if models:
                metafunc.parametrize("model", models)
            else:
                print(f"Warning: No models found for provider '{provider}' in config.")
                metafunc.parametrize("model", [])  # Parametrize empty if no models found
        else:
            print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
            metafunc.parametrize("model", [])  # Parametrize empty if provider not found
--- a/tests/verifications/openai_api/fixtures/fixtures.py
+++ b/tests/verifications/openai_api/fixtures/fixtures.py
@ -5,14 +5,16 @@
 # the root directory of this source tree.
 import os
 import re
 from pathlib import Path
 import pytest
 import yaml
 from openai import OpenAI
 # --- Helper Functions ---
 # --- Helper Function to Load Config ---
 def _load_all_verification_configs():
    """Load and aggregate verification configs from the conf/ directory."""
    # Note: Path is relative to *this* file (fixtures.py)
@ -44,7 +46,30 @@ def _load_all_verification_configs():
    return {"providers": all_provider_configs}
-# --- End Helper Function ---
+def case_id_generator(case):
    """Generate a test ID from the case's 'case_id' field, or use a default."""
    case_id = case.get("case_id")
    if isinstance(case_id, (str, int)):
        return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
    return None
 def should_skip_test(verification_config, provider, model, test_name_base):
    """Check if a test should be skipped based on config exclusions."""
    provider_config = verification_config.get("providers", {}).get(provider)
    if not provider_config:
        return False  # No config for provider, don't skip
    exclusions = provider_config.get("test_exclusions", {}).get(model, [])
    return test_name_base in exclusions
 # Helper to get the base test name from the request object
 def get_base_test_name(request):
    return request.node.originalname
 # --- End Helper Functions ---
@pytest.fixture(scope="session")
--- a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
+++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
@ -0,0 +1,65 @@
 test_response_basic:
  test_name: test_response_basic
  test_params:
    case:
    - case_id: "earth"
      input: "Which planet do humans live on?"
      output: "earth"
    - case_id: "saturn"
      input: "Which planet has rings around it with a name starting with letter S?"
      output: "saturn"
 test_response_multi_turn:
  test_name: test_response_multi_turn
  test_params:
    case:
    - case_id: "earth"
      turns:
      - input: "Which planet do humans live on?"
        output: "earth"
      - input: "What is the name of the planet from your previous response?"
        output: "earth"
 test_response_web_search:
  test_name: test_response_web_search
  test_params:
    case:
    - case_id: "llama_experts"
      input: "How many experts does the Llama 4 Maverick model have?"
      tools:
      - type: web_search
        search_context_size: "low"
      output: "128"
 test_response_image:
  test_name: test_response_image
  test_params:
    case:
    - case_id: "llama_image"
      input:
      - role: user
        content:
        - type: input_text
          text: "Identify the type of animal in this image."
        - type: input_image
          image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
      output: "llama"
 test_response_multi_turn_image:
  test_name: test_response_multi_turn_image
  test_params:
    case:
    - case_id: "llama_image_search"
      turns:
      - input:
        - role: user
          content:
          - type: input_text
            text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
          - type: input_image
            image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
        output: "llama"
      - input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick' and 'scout'."
        tools:
        - type: web_search
        output: "model"
--- a/tests/verifications/openai_api/test_chat_completion.py
+++ b/tests/verifications/openai_api/test_chat_completion.py
@ -7,7 +7,6 @@
 import base64
 import copy
 import json
 import re
 from pathlib import Path
 from typing import Any
@ -16,7 +15,9 @@ from openai import APIError
 from pydantic import BaseModel
 from tests.verifications.openai_api.fixtures.fixtures import (
-    _load_all_verification_configs,
+    case_id_generator,
    get_base_test_name,
    should_skip_test,
 )
 from tests.verifications.openai_api.fixtures.load import load_test_cases
@ -25,57 +26,6 @@ chat_completion_test_cases = load_test_cases("chat_completion")
 THIS_DIR = Path(__file__).parent
 def case_id_generator(case):
    """Generate a test ID from the case's 'case_id' field, or use a default."""
    case_id = case.get("case_id")
    if isinstance(case_id, (str, int)):
        return re.sub(r"\\W|^(?=\\d)", "_", str(case_id))
    return None
 def pytest_generate_tests(metafunc):
    """Dynamically parametrize tests based on the selected provider and config."""
    if "model" in metafunc.fixturenames:
        provider = metafunc.config.getoption("provider")
        if not provider:
            print("Warning: --provider not specified. Skipping model parametrization.")
            metafunc.parametrize("model", [])
            return
        try:
            config_data = _load_all_verification_configs()
        except (FileNotFoundError, IOError) as e:
            print(f"ERROR loading verification configs: {e}")
            config_data = {"providers": {}}
        provider_config = config_data.get("providers", {}).get(provider)
        if provider_config:
            models = provider_config.get("models", [])
            if models:
                metafunc.parametrize("model", models)
            else:
                print(f"Warning: No models found for provider '{provider}' in config.")
                metafunc.parametrize("model", [])  # Parametrize empty if no models found
        else:
            print(f"Warning: Provider '{provider}' not found in config. No models parametrized.")
            metafunc.parametrize("model", [])  # Parametrize empty if provider not found
 def should_skip_test(verification_config, provider, model, test_name_base):
    """Check if a test should be skipped based on config exclusions."""
    provider_config = verification_config.get("providers", {}).get(provider)
    if not provider_config:
        return False  # No config for provider, don't skip
    exclusions = provider_config.get("test_exclusions", {}).get(model, [])
    return test_name_base in exclusions
 # Helper to get the base test name from the request object
 def get_base_test_name(request):
    return request.node.originalname
@pytest.fixture
 def multi_image_data():
    files = [
--- a/tests/verifications/openai_api/test_responses.py
+++ b/tests/verifications/openai_api/test_responses.py
@ -0,0 +1,166 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import pytest
 from tests.verifications.openai_api.fixtures.fixtures import (
    case_id_generator,
    get_base_test_name,
    should_skip_test,
 )
 from tests.verifications.openai_api.fixtures.load import load_test_cases
 responses_test_cases = load_test_cases("responses")
@pytest.mark.parametrize(
    "case",
    responses_test_cases["test_response_basic"]["test_params"]["case"],
    ids=case_id_generator,
 )
 def test_response_non_streaming_basic(request, openai_client, model, provider, verification_config, case):
    test_name_base = get_base_test_name(request)
    if should_skip_test(verification_config, provider, model, test_name_base):
        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
    response = openai_client.responses.create(
        model=model,
        input=case["input"],
        stream=False,
    )
    output_text = response.output_text.lower().strip()
    assert len(output_text) > 0
    assert case["output"].lower() in output_text
    retrieved_response = openai_client.responses.retrieve(response_id=response.id)
    assert retrieved_response.output_text == response.output_text
    next_response = openai_client.responses.create(
        model=model, input="Repeat your previous response in all caps.", previous_response_id=response.id
    )
    next_output_text = next_response.output_text.strip()
    assert case["output"].upper() in next_output_text
@pytest.mark.parametrize(
    "case",
    responses_test_cases["test_response_basic"]["test_params"]["case"],
    ids=case_id_generator,
 )
 def test_response_streaming_basic(request, openai_client, model, provider, verification_config, case):
    test_name_base = get_base_test_name(request)
    if should_skip_test(verification_config, provider, model, test_name_base):
        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
    response = openai_client.responses.create(
        model=model,
        input=case["input"],
        stream=True,
    )
    streamed_content = []
    response_id = ""
    for chunk in response:
        if chunk.type == "response.completed":
            response_id = chunk.response.id
            streamed_content.append(chunk.response.output_text.strip())
    assert len(streamed_content) > 0
    assert case["output"].lower() in "".join(streamed_content).lower()
    retrieved_response = openai_client.responses.retrieve(response_id=response_id)
    assert retrieved_response.output_text == "".join(streamed_content)
@pytest.mark.parametrize(
    "case",
    responses_test_cases["test_response_multi_turn"]["test_params"]["case"],
    ids=case_id_generator,
 )
 def test_response_non_streaming_multi_turn(request, openai_client, model, provider, verification_config, case):
    test_name_base = get_base_test_name(request)
    if should_skip_test(verification_config, provider, model, test_name_base):
        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
    previous_response_id = None
    for turn in case["turns"]:
        response = openai_client.responses.create(
            model=model,
            input=turn["input"],
            previous_response_id=previous_response_id,
            tools=turn["tools"] if "tools" in turn else None,
        )
        previous_response_id = response.id
        output_text = response.output_text.lower()
        assert turn["output"].lower() in output_text
@pytest.mark.parametrize(
    "case",
    responses_test_cases["test_response_web_search"]["test_params"]["case"],
    ids=case_id_generator,
 )
 def test_response_non_streaming_web_search(request, openai_client, model, provider, verification_config, case):
    test_name_base = get_base_test_name(request)
    if should_skip_test(verification_config, provider, model, test_name_base):
        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
    response = openai_client.responses.create(
        model=model,
        input=case["input"],
        tools=case["tools"],
        stream=False,
    )
    assert len(response.output) > 1
    assert response.output[0].type == "web_search_call"
    assert response.output[0].status == "completed"
    assert response.output[1].type == "message"
    assert response.output[1].status == "completed"
    assert response.output[1].role == "assistant"
    assert len(response.output[1].content) > 0
    assert case["output"].lower() in response.output_text.lower().strip()
@pytest.mark.parametrize(
    "case",
    responses_test_cases["test_response_image"]["test_params"]["case"],
    ids=case_id_generator,
 )
 def test_response_non_streaming_image(request, openai_client, model, provider, verification_config, case):
    test_name_base = get_base_test_name(request)
    if should_skip_test(verification_config, provider, model, test_name_base):
        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
    response = openai_client.responses.create(
        model=model,
        input=case["input"],
        stream=False,
    )
    output_text = response.output_text.lower()
    assert case["output"].lower() in output_text
@pytest.mark.parametrize(
    "case",
    responses_test_cases["test_response_multi_turn_image"]["test_params"]["case"],
    ids=case_id_generator,
 )
 def test_response_non_streaming_multi_turn_image(request, openai_client, model, provider, verification_config, case):
    test_name_base = get_base_test_name(request)
    if should_skip_test(verification_config, provider, model, test_name_base):
        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
    previous_response_id = None
    for turn in case["turns"]:
        response = openai_client.responses.create(
            model=model,
            input=turn["input"],
            previous_response_id=previous_response_id,
            tools=turn["tools"] if "tools" in turn else None,
        )
        previous_response_id = response.id
        output_text = response.output_text.lower()
        assert turn["output"].lower() in output_text