From 8dfce2f59677759f7bbb005c621a85dc3b76088d Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Mon, 28 Apr 2025 17:06:00 -0400 Subject: [PATCH] feat: OpenAI Responses API (#1989) # What does this PR do? This provides an initial [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses) implementation. The API is not yet complete, and this is more a proof-of-concept to show how we can store responses in our key-value stores and use them to support the Responses API concepts like `previous_response_id`. ## Test Plan I've added a new `tests/integration/openai_responses/test_openai_responses.py` as part of a test-driven development for this new API. I'm only testing this locally with the remote-vllm provider for now, but it should work with any of our inference providers since the only API it requires out of the inference provider is the `openai_chat_completion` endpoint. ``` VLLM_URL="http://localhost:8000/v1" \ INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" \ llama stack build --template remote-vllm --image-type venv --run ``` ``` LLAMA_STACK_CONFIG="http://localhost:8321" \ python -m pytest -v \ tests/integration/openai_responses/test_openai_responses.py \ --text-model "meta-llama/Llama-3.2-3B-Instruct" ``` --------- Signed-off-by: Ben Browning Co-authored-by: Ashwin Bharambe --- docs/_static/llama-stack-spec.html | 512 ++++++++++++++++++ docs/_static/llama-stack-spec.yaml | 350 ++++++++++++ docs/openapi_generator/pyopenapi/generator.py | 2 +- llama_stack/apis/agents/agents.py | 43 ++ llama_stack/apis/agents/openai_responses.py | 140 +++++ .../inline/agents/meta_reference/agents.py | 31 ++ .../agents/meta_reference/openai_responses.py | 319 +++++++++++ llama_stack/strong_typing/schema.py | 2 + tests/integration/fixtures/common.py | 7 + .../test_cases/openai/responses.json | 37 ++ tests/integration/test_cases/test_case.py | 1 + .../conf/fireworks-llama-stack.yaml | 2 + .../verifications/conf/groq-llama-stack.yaml | 2 + .../conf/together-llama-stack.yaml | 2 + tests/verifications/generate_report.py | 2 +- .../openai-api-verification-run.yaml | 22 +- tests/verifications/openai_api/conftest.py | 35 ++ .../openai_api/fixtures/fixtures.py | 29 +- .../fixtures/test_cases/responses.yaml | 65 +++ .../openai_api/test_chat_completion.py | 56 +- .../openai_api/test_responses.py | 166 ++++++ 21 files changed, 1766 insertions(+), 59 deletions(-) create mode 100644 llama_stack/apis/agents/openai_responses.py create mode 100644 llama_stack/providers/inline/agents/meta_reference/openai_responses.py create mode 100644 tests/integration/test_cases/openai/responses.json create mode 100644 tests/verifications/openai_api/conftest.py create mode 100644 tests/verifications/openai_api/fixtures/test_cases/responses.yaml create mode 100644 tests/verifications/openai_api/test_responses.py diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 4c5393947..49c402d37 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -497,6 +497,54 @@ } } }, + "/v1/openai/v1/responses": { + "post": { + "responses": { + "200": { + "description": "Runtime representation of an annotated type.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OpenAIResponseObject" + } + }, + "text/event-stream": { + "schema": { + "$ref": "#/components/schemas/OpenAIResponseObjectStream" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Agents" + ], + "description": "Create a new OpenAI response.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateOpenaiResponseRequest" + } + } + }, + "required": true + } + } + }, "/v1/files": { "get": { "responses": { @@ -1278,6 +1326,49 @@ ] } }, + "/v1/openai/v1/responses/{id}": { + "get": { + "responses": { + "200": { + "description": "An OpenAIResponseObject.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OpenAIResponseObject" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Agents" + ], + "description": "Retrieve an OpenAI response by its ID.", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "The ID of the OpenAI response to retrieve.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/scoring-functions/{scoring_fn_id}": { "get": { "responses": { @@ -6192,6 +6283,427 @@ ], "title": "AgentTurnResponseTurnStartPayload" }, + "OpenAIResponseInputMessage": { + "type": "object", + "properties": { + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContent" + } + } + ] + }, + "role": { + "oneOf": [ + { + "type": "string", + "const": "system" + }, + { + "type": "string", + "const": "developer" + }, + { + "type": "string", + "const": "user" + }, + { + "type": "string", + "const": "assistant" + } + ] + }, + "type": { + "type": "string", + "const": "message", + "default": "message" + } + }, + "additionalProperties": false, + "required": [ + "content", + "role" + ], + "title": "OpenAIResponseInputMessage" + }, + "OpenAIResponseInputMessageContent": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText", + "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage" + } + } + }, + "OpenAIResponseInputMessageContentImage": { + "type": "object", + "properties": { + "detail": { + "oneOf": [ + { + "type": "string", + "const": "low" + }, + { + "type": "string", + "const": "high" + }, + { + "type": "string", + "const": "auto" + } + ], + "default": "auto" + }, + "type": { + "type": "string", + "const": "input_image", + "default": "input_image" + }, + "image_url": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "detail", + "type" + ], + "title": "OpenAIResponseInputMessageContentImage" + }, + "OpenAIResponseInputMessageContentText": { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "type": { + "type": "string", + "const": "input_text", + "default": "input_text" + } + }, + "additionalProperties": false, + "required": [ + "text", + "type" + ], + "title": "OpenAIResponseInputMessageContentText" + }, + "OpenAIResponseInputTool": { + "type": "object", + "properties": { + "type": { + "oneOf": [ + { + "type": "string", + "const": "web_search" + }, + { + "type": "string", + "const": "web_search_preview_2025_03_11" + } + ], + "default": "web_search" + }, + "search_context_size": { + "type": "string", + "default": "medium" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "OpenAIResponseInputToolWebSearch" + }, + "CreateOpenaiResponseRequest": { + "type": "object", + "properties": { + "input": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInputMessage" + } + } + ], + "description": "Input message(s) to create the response." + }, + "model": { + "type": "string", + "description": "The underlying LLM used for completions." + }, + "previous_response_id": { + "type": "string", + "description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses." + }, + "store": { + "type": "boolean" + }, + "stream": { + "type": "boolean" + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInputTool" + } + } + }, + "additionalProperties": false, + "required": [ + "input", + "model" + ], + "title": "CreateOpenaiResponseRequest" + }, + "OpenAIResponseError": { + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "message": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "code", + "message" + ], + "title": "OpenAIResponseError" + }, + "OpenAIResponseObject": { + "type": "object", + "properties": { + "created_at": { + "type": "integer" + }, + "error": { + "$ref": "#/components/schemas/OpenAIResponseError" + }, + "id": { + "type": "string" + }, + "model": { + "type": "string" + }, + "object": { + "type": "string", + "const": "response", + "default": "response" + }, + "output": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseOutput" + } + }, + "parallel_tool_calls": { + "type": "boolean", + "default": false + }, + "previous_response_id": { + "type": "string" + }, + "status": { + "type": "string" + }, + "temperature": { + "type": "number" + }, + "top_p": { + "type": "number" + }, + "truncation": { + "type": "string" + }, + "user": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "created_at", + "id", + "model", + "object", + "output", + "parallel_tool_calls", + "status" + ], + "title": "OpenAIResponseObject" + }, + "OpenAIResponseOutput": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessage" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "message": "#/components/schemas/OpenAIResponseOutputMessage", + "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + } + } + }, + "OpenAIResponseOutputMessage": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "content": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageContent" + } + }, + "role": { + "type": "string", + "const": "assistant", + "default": "assistant" + }, + "status": { + "type": "string" + }, + "type": { + "type": "string", + "const": "message", + "default": "message" + } + }, + "additionalProperties": false, + "required": [ + "id", + "content", + "role", + "status", + "type" + ], + "title": "OpenAIResponseOutputMessage" + }, + "OpenAIResponseOutputMessageContent": { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "type": { + "type": "string", + "const": "output_text", + "default": "output_text" + } + }, + "additionalProperties": false, + "required": [ + "text", + "type" + ], + "title": "OpenAIResponseOutputMessageContentOutputText" + }, + "OpenAIResponseOutputMessageWebSearchToolCall": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "status": { + "type": "string" + }, + "type": { + "type": "string", + "const": "web_search_call", + "default": "web_search_call" + } + }, + "additionalProperties": false, + "required": [ + "id", + "status", + "type" + ], + "title": "OpenAIResponseOutputMessageWebSearchToolCall" + }, + "OpenAIResponseObjectStream": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated", + "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" + } + } + }, + "OpenAIResponseObjectStreamResponseCompleted": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject" + }, + "type": { + "type": "string", + "const": "response.completed", + "default": "response.completed" + } + }, + "additionalProperties": false, + "required": [ + "response", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseCompleted" + }, + "OpenAIResponseObjectStreamResponseCreated": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject" + }, + "type": { + "type": "string", + "const": "response.created", + "default": "response.created" + } + }, + "additionalProperties": false, + "required": [ + "response", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseCreated" + }, "CreateUploadSessionRequest": { "type": "object", "properties": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index a24f1a9db..e5bfad623 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -330,6 +330,39 @@ paths: schema: $ref: '#/components/schemas/CreateAgentTurnRequest' required: true + /v1/openai/v1/responses: + post: + responses: + '200': + description: >- + Runtime representation of an annotated type. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIResponseObject' + text/event-stream: + schema: + $ref: '#/components/schemas/OpenAIResponseObjectStream' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + description: Create a new OpenAI response. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateOpenaiResponseRequest' + required: true /v1/files: get: responses: @@ -875,6 +908,36 @@ paths: required: true schema: type: string + /v1/openai/v1/responses/{id}: + get: + responses: + '200': + description: An OpenAIResponseObject. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIResponseObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + description: Retrieve an OpenAI response by its ID. + parameters: + - name: id + in: path + description: >- + The ID of the OpenAI response to retrieve. + required: true + schema: + type: string /v1/scoring-functions/{scoring_fn_id}: get: responses: @@ -4329,6 +4392,293 @@ components: - event_type - turn_id title: AgentTurnResponseTurnStartPayload + OpenAIResponseInputMessage: + type: object + properties: + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInputMessageContent' + role: + oneOf: + - type: string + const: system + - type: string + const: developer + - type: string + const: user + - type: string + const: assistant + type: + type: string + const: message + default: message + additionalProperties: false + required: + - content + - role + title: OpenAIResponseInputMessage + OpenAIResponseInputMessageContent: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + discriminator: + propertyName: type + mapping: + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + OpenAIResponseInputMessageContentImage: + type: object + properties: + detail: + oneOf: + - type: string + const: low + - type: string + const: high + - type: string + const: auto + default: auto + type: + type: string + const: input_image + default: input_image + image_url: + type: string + additionalProperties: false + required: + - detail + - type + title: OpenAIResponseInputMessageContentImage + OpenAIResponseInputMessageContentText: + type: object + properties: + text: + type: string + type: + type: string + const: input_text + default: input_text + additionalProperties: false + required: + - text + - type + title: OpenAIResponseInputMessageContentText + OpenAIResponseInputTool: + type: object + properties: + type: + oneOf: + - type: string + const: web_search + - type: string + const: web_search_preview_2025_03_11 + default: web_search + search_context_size: + type: string + default: medium + additionalProperties: false + required: + - type + title: OpenAIResponseInputToolWebSearch + CreateOpenaiResponseRequest: + type: object + properties: + input: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInputMessage' + description: Input message(s) to create the response. + model: + type: string + description: The underlying LLM used for completions. + previous_response_id: + type: string + description: >- + (Optional) if specified, the new response will be a continuation of the + previous response. This can be used to easily fork-off new responses from + existing responses. + store: + type: boolean + stream: + type: boolean + tools: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInputTool' + additionalProperties: false + required: + - input + - model + title: CreateOpenaiResponseRequest + OpenAIResponseError: + type: object + properties: + code: + type: string + message: + type: string + additionalProperties: false + required: + - code + - message + title: OpenAIResponseError + OpenAIResponseObject: + type: object + properties: + created_at: + type: integer + error: + $ref: '#/components/schemas/OpenAIResponseError' + id: + type: string + model: + type: string + object: + type: string + const: response + default: response + output: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutput' + parallel_tool_calls: + type: boolean + default: false + previous_response_id: + type: string + status: + type: string + temperature: + type: number + top_p: + type: number + truncation: + type: string + user: + type: string + additionalProperties: false + required: + - created_at + - id + - model + - object + - output + - parallel_tool_calls + - status + title: OpenAIResponseObject + OpenAIResponseOutput: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessage' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + discriminator: + propertyName: type + mapping: + message: '#/components/schemas/OpenAIResponseOutputMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + OpenAIResponseOutputMessage: + type: object + properties: + id: + type: string + content: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutputMessageContent' + role: + type: string + const: assistant + default: assistant + status: + type: string + type: + type: string + const: message + default: message + additionalProperties: false + required: + - id + - content + - role + - status + - type + title: OpenAIResponseOutputMessage + OpenAIResponseOutputMessageContent: + type: object + properties: + text: + type: string + type: + type: string + const: output_text + default: output_text + additionalProperties: false + required: + - text + - type + title: >- + OpenAIResponseOutputMessageContentOutputText + "OpenAIResponseOutputMessageWebSearchToolCall": + type: object + properties: + id: + type: string + status: + type: string + type: + type: string + const: web_search_call + default: web_search_call + additionalProperties: false + required: + - id + - status + - type + title: >- + OpenAIResponseOutputMessageWebSearchToolCall + OpenAIResponseObjectStream: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + discriminator: + propertyName: type + mapping: + response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + "OpenAIResponseObjectStreamResponseCompleted": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + type: + type: string + const: response.completed + default: response.completed + additionalProperties: false + required: + - response + - type + title: >- + OpenAIResponseObjectStreamResponseCompleted + "OpenAIResponseObjectStreamResponseCreated": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + type: + type: string + const: response.created + default: response.created + additionalProperties: false + required: + - response + - type + title: >- + OpenAIResponseObjectStreamResponseCreated CreateUploadSessionRequest: type: object properties: diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 3936bb3c4..6d5e48a46 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -179,7 +179,7 @@ class ContentBuilder: "Creates the content subtree for a request or response." def is_iterator_type(t): - return "StreamChunk" in str(t) + return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t) def get_media_type(t): if is_generic_list(t): diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index dec43280b..4db6e2226 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -38,6 +38,13 @@ from llama_stack.apis.safety import SafetyViolation from llama_stack.apis.tools import ToolDef from llama_stack.schema_utils import json_schema_type, register_schema, webmethod +from .openai_responses import ( + OpenAIResponseInputMessage, + OpenAIResponseInputTool, + OpenAIResponseObject, + OpenAIResponseObjectStream, +) + class Attachment(BaseModel): """An attachment to an agent turn. @@ -593,3 +600,39 @@ class Agents(Protocol): :returns: A ListAgentSessionsResponse. """ ... + + # We situate the OpenAI Responses API in the Agents API just like we did things + # for Inference. The Responses API, in its intent, serves the same purpose as + # the Agents API above -- it is essentially a lightweight "agentic loop" with + # integrated tool calling. + # + # Both of these APIs are inherently stateful. + + @webmethod(route="/openai/v1/responses/{id}", method="GET") + async def get_openai_response( + self, + id: str, + ) -> OpenAIResponseObject: + """Retrieve an OpenAI response by its ID. + + :param id: The ID of the OpenAI response to retrieve. + :returns: An OpenAIResponseObject. + """ + ... + + @webmethod(route="/openai/v1/responses", method="POST") + async def create_openai_response( + self, + input: Union[str, List[OpenAIResponseInputMessage]], + model: str, + previous_response_id: Optional[str] = None, + store: Optional[bool] = True, + stream: Optional[bool] = False, + tools: Optional[List[OpenAIResponseInputTool]] = None, + ) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]: + """Create a new OpenAI response. + + :param input: Input message(s) to create the response. + :param model: The underlying LLM used for completions. + :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses. + """ diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py new file mode 100644 index 000000000..72f16e224 --- /dev/null +++ b/llama_stack/apis/agents/openai_responses.py @@ -0,0 +1,140 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import List, Literal, Optional, Union + +from pydantic import BaseModel, Field +from typing_extensions import Annotated + +from llama_stack.schema_utils import json_schema_type, register_schema + + +@json_schema_type +class OpenAIResponseError(BaseModel): + code: str + message: str + + +@json_schema_type +class OpenAIResponseOutputMessageContentOutputText(BaseModel): + text: str + type: Literal["output_text"] = "output_text" + + +OpenAIResponseOutputMessageContent = Annotated[ + Union[OpenAIResponseOutputMessageContentOutputText,], + Field(discriminator="type"), +] +register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMessageContent") + + +@json_schema_type +class OpenAIResponseOutputMessage(BaseModel): + id: str + content: List[OpenAIResponseOutputMessageContent] + role: Literal["assistant"] = "assistant" + status: str + type: Literal["message"] = "message" + + +@json_schema_type +class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel): + id: str + status: str + type: Literal["web_search_call"] = "web_search_call" + + +OpenAIResponseOutput = Annotated[ + Union[ + OpenAIResponseOutputMessage, + OpenAIResponseOutputMessageWebSearchToolCall, + ], + Field(discriminator="type"), +] +register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput") + + +@json_schema_type +class OpenAIResponseObject(BaseModel): + created_at: int + error: Optional[OpenAIResponseError] = None + id: str + model: str + object: Literal["response"] = "response" + output: List[OpenAIResponseOutput] + parallel_tool_calls: bool = False + previous_response_id: Optional[str] = None + status: str + temperature: Optional[float] = None + top_p: Optional[float] = None + truncation: Optional[str] = None + user: Optional[str] = None + + +@json_schema_type +class OpenAIResponseObjectStreamResponseCreated(BaseModel): + response: OpenAIResponseObject + type: Literal["response.created"] = "response.created" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseCompleted(BaseModel): + response: OpenAIResponseObject + type: Literal["response.completed"] = "response.completed" + + +OpenAIResponseObjectStream = Annotated[ + Union[ + OpenAIResponseObjectStreamResponseCreated, + OpenAIResponseObjectStreamResponseCompleted, + ], + Field(discriminator="type"), +] +register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream") + + +@json_schema_type +class OpenAIResponseInputMessageContentText(BaseModel): + text: str + type: Literal["input_text"] = "input_text" + + +@json_schema_type +class OpenAIResponseInputMessageContentImage(BaseModel): + detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto" + type: Literal["input_image"] = "input_image" + # TODO: handle file_id + image_url: Optional[str] = None + + +# TODO: handle file content types +OpenAIResponseInputMessageContent = Annotated[ + Union[OpenAIResponseInputMessageContentText, OpenAIResponseInputMessageContentImage], + Field(discriminator="type"), +] +register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent") + + +@json_schema_type +class OpenAIResponseInputMessage(BaseModel): + content: Union[str, List[OpenAIResponseInputMessageContent]] + role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"] + type: Optional[Literal["message"]] = "message" + + +@json_schema_type +class OpenAIResponseInputToolWebSearch(BaseModel): + type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search" + # TODO: actually use search_context_size somewhere... + search_context_size: Optional[str] = Field(default="medium", pattern="^low|medium|high$") + # TODO: add user_location + + +OpenAIResponseInputTool = Annotated[ + Union[OpenAIResponseInputToolWebSearch,], + Field(discriminator="type"), +] +register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool") diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 656178773..38aa6fd97 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -23,6 +23,9 @@ from llama_stack.apis.agents import ( Document, ListAgentSessionsResponse, ListAgentsResponse, + OpenAIResponseInputMessage, + OpenAIResponseInputTool, + OpenAIResponseObject, Session, Turn, ) @@ -40,6 +43,7 @@ from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_imp from .agent_instance import ChatAgent from .config import MetaReferenceAgentsImplConfig +from .openai_responses import OpenAIResponsesImpl logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -63,9 +67,16 @@ class MetaReferenceAgentsImpl(Agents): self.tool_groups_api = tool_groups_api self.in_memory_store = InmemoryKVStoreImpl() + self.openai_responses_impl = None async def initialize(self) -> None: self.persistence_store = await kvstore_impl(self.config.persistence_store) + self.openai_responses_impl = OpenAIResponsesImpl( + self.persistence_store, + inference_api=self.inference_api, + tool_groups_api=self.tool_groups_api, + tool_runtime_api=self.tool_runtime_api, + ) # check if "bwrap" is available if not shutil.which("bwrap"): @@ -244,3 +255,23 @@ class MetaReferenceAgentsImpl(Agents): agent_id: str, ) -> ListAgentSessionsResponse: pass + + # OpenAI responses + async def get_openai_response( + self, + id: str, + ) -> OpenAIResponseObject: + return await self.openai_responses_impl.get_openai_response(id) + + async def create_openai_response( + self, + input: Union[str, List[OpenAIResponseInputMessage]], + model: str, + previous_response_id: Optional[str] = None, + store: Optional[bool] = True, + stream: Optional[bool] = False, + tools: Optional[List[OpenAIResponseInputTool]] = None, + ) -> OpenAIResponseObject: + return await self.openai_responses_impl.create_openai_response( + input, model, previous_response_id, store, stream, tools + ) diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py new file mode 100644 index 000000000..0690a15fe --- /dev/null +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -0,0 +1,319 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +import uuid +from typing import AsyncIterator, List, Optional, Union, cast + +from openai.types.chat import ChatCompletionToolParam + +from llama_stack.apis.agents.openai_responses import ( + OpenAIResponseInputMessage, + OpenAIResponseInputMessageContentImage, + OpenAIResponseInputMessageContentText, + OpenAIResponseInputTool, + OpenAIResponseObject, + OpenAIResponseObjectStream, + OpenAIResponseObjectStreamResponseCompleted, + OpenAIResponseObjectStreamResponseCreated, + OpenAIResponseOutput, + OpenAIResponseOutputMessage, + OpenAIResponseOutputMessageContentOutputText, + OpenAIResponseOutputMessageWebSearchToolCall, +) +from llama_stack.apis.inference.inference import ( + Inference, + OpenAIAssistantMessageParam, + OpenAIChatCompletion, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartParam, + OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionToolCallFunction, + OpenAIChoice, + OpenAIImageURL, + OpenAIMessageParam, + OpenAIToolMessageParam, + OpenAIUserMessageParam, +) +from llama_stack.apis.tools.tools import ToolGroups, ToolInvocationResult, ToolRuntime +from llama_stack.log import get_logger +from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition +from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool +from llama_stack.providers.utils.kvstore import KVStore + +logger = get_logger(name=__name__, category="openai_responses") + +OPENAI_RESPONSES_PREFIX = "openai_responses:" + + +async def _previous_response_to_messages(previous_response: OpenAIResponseObject) -> List[OpenAIMessageParam]: + messages: List[OpenAIMessageParam] = [] + for output_message in previous_response.output: + if isinstance(output_message, OpenAIResponseOutputMessage): + messages.append(OpenAIAssistantMessageParam(content=output_message.content[0].text)) + return messages + + +async def _openai_choices_to_output_messages(choices: List[OpenAIChoice]) -> List[OpenAIResponseOutputMessage]: + output_messages = [] + for choice in choices: + output_content = "" + if isinstance(choice.message.content, str): + output_content = choice.message.content + elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam): + output_content = choice.message.content.text + # TODO: handle image content + output_messages.append( + OpenAIResponseOutputMessage( + id=f"msg_{uuid.uuid4()}", + content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)], + status="completed", + ) + ) + return output_messages + + +class OpenAIResponsesImpl: + def __init__( + self, + persistence_store: KVStore, + inference_api: Inference, + tool_groups_api: ToolGroups, + tool_runtime_api: ToolRuntime, + ): + self.persistence_store = persistence_store + self.inference_api = inference_api + self.tool_groups_api = tool_groups_api + self.tool_runtime_api = tool_runtime_api + + async def get_openai_response( + self, + id: str, + ) -> OpenAIResponseObject: + key = f"{OPENAI_RESPONSES_PREFIX}{id}" + response_json = await self.persistence_store.get(key=key) + if response_json is None: + raise ValueError(f"OpenAI response with id '{id}' not found") + return OpenAIResponseObject.model_validate_json(response_json) + + async def create_openai_response( + self, + input: Union[str, List[OpenAIResponseInputMessage]], + model: str, + previous_response_id: Optional[str] = None, + store: Optional[bool] = True, + stream: Optional[bool] = False, + tools: Optional[List[OpenAIResponseInputTool]] = None, + ): + stream = False if stream is None else stream + + messages: List[OpenAIMessageParam] = [] + if previous_response_id: + previous_response = await self.get_openai_response(previous_response_id) + messages.extend(await _previous_response_to_messages(previous_response)) + # TODO: refactor this user_content parsing out into a separate method + user_content: Union[str, List[OpenAIChatCompletionContentPartParam]] = "" + if isinstance(input, list): + user_content = [] + for user_input in input: + if isinstance(user_input.content, list): + for user_input_content in user_input.content: + if isinstance(user_input_content, OpenAIResponseInputMessageContentText): + user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input_content.text)) + elif isinstance(user_input_content, OpenAIResponseInputMessageContentImage): + if user_input_content.image_url: + image_url = OpenAIImageURL( + url=user_input_content.image_url, detail=user_input_content.detail + ) + user_content.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url)) + else: + user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input.content)) + else: + user_content = input + messages.append(OpenAIUserMessageParam(content=user_content)) + + chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None + chat_response = await self.inference_api.openai_chat_completion( + model=model, + messages=messages, + tools=chat_tools, + stream=stream, + ) + + if stream: + # TODO: refactor this into a separate method that handles streaming + chat_response_id = "" + chat_response_content = [] + # TODO: these chunk_ fields are hacky and only take the last chunk into account + chunk_created = 0 + chunk_model = "" + chunk_finish_reason = "" + async for chunk in chat_response: + chat_response_id = chunk.id + chunk_created = chunk.created + chunk_model = chunk.model + for chunk_choice in chunk.choices: + # TODO: this only works for text content + chat_response_content.append(chunk_choice.delta.content or "") + if chunk_choice.finish_reason: + chunk_finish_reason = chunk_choice.finish_reason + assistant_message = OpenAIAssistantMessageParam(content="".join(chat_response_content)) + chat_response = OpenAIChatCompletion( + id=chat_response_id, + choices=[ + OpenAIChoice( + message=assistant_message, + finish_reason=chunk_finish_reason, + index=0, + ) + ], + created=chunk_created, + model=chunk_model, + ) + else: + # dump and reload to map to our pydantic types + chat_response = OpenAIChatCompletion(**chat_response.model_dump()) + + output_messages: List[OpenAIResponseOutput] = [] + if chat_response.choices[0].message.tool_calls: + output_messages.extend( + await self._execute_tool_and_return_final_output(model, stream, chat_response, messages) + ) + else: + output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices)) + response = OpenAIResponseObject( + created_at=chat_response.created, + id=f"resp-{uuid.uuid4()}", + model=model, + object="response", + status="completed", + output=output_messages, + ) + + if store: + # Store in kvstore + key = f"{OPENAI_RESPONSES_PREFIX}{response.id}" + await self.persistence_store.set( + key=key, + value=response.model_dump_json(), + ) + + if stream: + + async def async_response() -> AsyncIterator[OpenAIResponseObjectStream]: + # TODO: response created should actually get emitted much earlier in the process + yield OpenAIResponseObjectStreamResponseCreated(response=response) + yield OpenAIResponseObjectStreamResponseCompleted(response=response) + + return async_response() + + return response + + async def _convert_response_tools_to_chat_tools( + self, tools: List[OpenAIResponseInputTool] + ) -> List[ChatCompletionToolParam]: + chat_tools: List[ChatCompletionToolParam] = [] + for input_tool in tools: + # TODO: Handle other tool types + if input_tool.type == "web_search": + tool_name = "web_search" + tool = await self.tool_groups_api.get_tool(tool_name) + tool_def = ToolDefinition( + tool_name=tool_name, + description=tool.description, + parameters={ + param.name: ToolParamDefinition( + param_type=param.parameter_type, + description=param.description, + required=param.required, + default=param.default, + ) + for param in tool.parameters + }, + ) + chat_tool = convert_tooldef_to_openai_tool(tool_def) + chat_tools.append(chat_tool) + else: + raise ValueError(f"Llama Stack OpenAI Responses does not yet support tool type: {input_tool.type}") + return chat_tools + + async def _execute_tool_and_return_final_output( + self, model_id: str, stream: bool, chat_response: OpenAIChatCompletion, messages: List[OpenAIMessageParam] + ) -> List[OpenAIResponseOutput]: + output_messages: List[OpenAIResponseOutput] = [] + choice = chat_response.choices[0] + + # If the choice is not an assistant message, we don't need to execute any tools + if not isinstance(choice.message, OpenAIAssistantMessageParam): + return output_messages + + # If the assistant message doesn't have any tool calls, we don't need to execute any tools + if not choice.message.tool_calls: + return output_messages + + # Add the assistant message with tool_calls response to the messages list + messages.append(choice.message) + + for tool_call in choice.message.tool_calls: + tool_call_id = tool_call.id + function = tool_call.function + + # If for some reason the tool call doesn't have a function or id, we can't execute it + if not function or not tool_call_id: + continue + + # TODO: telemetry spans for tool calls + result = await self._execute_tool_call(function) + + # Handle tool call failure + if not result: + output_messages.append( + OpenAIResponseOutputMessageWebSearchToolCall( + id=tool_call_id, + status="failed", + ) + ) + continue + + output_messages.append( + OpenAIResponseOutputMessageWebSearchToolCall( + id=tool_call_id, + status="completed", + ), + ) + + result_content = "" + # TODO: handle other result content types and lists + if isinstance(result.content, str): + result_content = result.content + messages.append(OpenAIToolMessageParam(content=result_content, tool_call_id=tool_call_id)) + + tool_results_chat_response = await self.inference_api.openai_chat_completion( + model=model_id, + messages=messages, + stream=stream, + ) + # type cast to appease mypy + tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response) + tool_final_outputs = await _openai_choices_to_output_messages(tool_results_chat_response.choices) + # TODO: Wire in annotations with URLs, titles, etc to these output messages + output_messages.extend(tool_final_outputs) + return output_messages + + async def _execute_tool_call( + self, + function: OpenAIChatCompletionToolCallFunction, + ) -> Optional[ToolInvocationResult]: + if not function.name: + return None + function_args = json.loads(function.arguments) if function.arguments else {} + logger.info(f"executing tool call: {function.name} with args: {function_args}") + result = await self.tool_runtime_api.invoke_tool( + tool_name=function.name, + kwargs=function_args, + ) + logger.debug(f"tool call {function.name} completed with result: {result}") + return result diff --git a/llama_stack/strong_typing/schema.py b/llama_stack/strong_typing/schema.py index 0f5121906..1427c22e6 100644 --- a/llama_stack/strong_typing/schema.py +++ b/llama_stack/strong_typing/schema.py @@ -478,6 +478,8 @@ class JsonSchemaGenerator: } return ret elif origin_type is Literal: + if len(typing.get_args(typ)) != 1: + raise ValueError(f"Literal type {typ} has {len(typing.get_args(typ))} arguments") (literal_value,) = typing.get_args(typ) # unpack value of literal type schema = self.type_to_schema(type(literal_value)) schema["const"] = literal_value diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 1878c9e88..809a00897 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -14,6 +14,7 @@ from pathlib import Path import pytest import yaml from llama_stack_client import LlamaStackClient +from openai import OpenAI from llama_stack import LlamaStackAsLibraryClient from llama_stack.apis.datatypes import Api @@ -207,3 +208,9 @@ def llama_stack_client(request, provider_data, text_model_id): raise RuntimeError("Initialization failed") return client + + +@pytest.fixture(scope="session") +def openai_client(client_with_models): + base_url = f"{client_with_models.base_url}/v1/openai/v1" + return OpenAI(base_url=base_url, api_key="fake") diff --git a/tests/integration/test_cases/openai/responses.json b/tests/integration/test_cases/openai/responses.json new file mode 100644 index 000000000..d17d0cd4f --- /dev/null +++ b/tests/integration/test_cases/openai/responses.json @@ -0,0 +1,37 @@ +{ + "non_streaming_01": { + "data": { + "question": "Which planet do humans live on?", + "expected": "Earth" + } + }, + "non_streaming_02": { + "data": { + "question": "Which planet has rings around it with a name starting with letter S?", + "expected": "Saturn" + } + }, + "streaming_01": { + "data": { + "question": "What's the name of the Sun in latin?", + "expected": "Sol" + } + }, + "streaming_02": { + "data": { + "question": "What is the name of the US captial?", + "expected": "Washington" + } + }, + "tools_web_search_01": { + "data": { + "input": "How many experts does the Llama 4 Maverick model have?", + "tools": [ + { + "type": "web_search" + } + ], + "expected": "128" + } + } +} diff --git a/tests/integration/test_cases/test_case.py b/tests/integration/test_cases/test_case.py index 8514f3046..2a3c73310 100644 --- a/tests/integration/test_cases/test_case.py +++ b/tests/integration/test_cases/test_case.py @@ -12,6 +12,7 @@ class TestCase: _apis = [ "inference/chat_completion", "inference/completion", + "openai/responses", ] _jsonblob = {} diff --git a/tests/verifications/conf/fireworks-llama-stack.yaml b/tests/verifications/conf/fireworks-llama-stack.yaml index fc78a1377..dffd7c739 100644 --- a/tests/verifications/conf/fireworks-llama-stack.yaml +++ b/tests/verifications/conf/fireworks-llama-stack.yaml @@ -13,3 +13,5 @@ test_exclusions: - test_chat_non_streaming_image - test_chat_streaming_image - test_chat_multi_turn_multiple_images + - test_response_non_streaming_image + - test_response_non_streaming_multi_turn_image diff --git a/tests/verifications/conf/groq-llama-stack.yaml b/tests/verifications/conf/groq-llama-stack.yaml index 6958bafc5..786b79c24 100644 --- a/tests/verifications/conf/groq-llama-stack.yaml +++ b/tests/verifications/conf/groq-llama-stack.yaml @@ -13,3 +13,5 @@ test_exclusions: - test_chat_non_streaming_image - test_chat_streaming_image - test_chat_multi_turn_multiple_images + - test_response_non_streaming_image + - test_response_non_streaming_multi_turn_image diff --git a/tests/verifications/conf/together-llama-stack.yaml b/tests/verifications/conf/together-llama-stack.yaml index 719e2d776..58cbcfa93 100644 --- a/tests/verifications/conf/together-llama-stack.yaml +++ b/tests/verifications/conf/together-llama-stack.yaml @@ -13,3 +13,5 @@ test_exclusions: - test_chat_non_streaming_image - test_chat_streaming_image - test_chat_multi_turn_multiple_images + - test_response_non_streaming_image + - test_response_non_streaming_multi_turn_image diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py index f0894bfce..bdaea3ebf 100755 --- a/tests/verifications/generate_report.py +++ b/tests/verifications/generate_report.py @@ -16,7 +16,7 @@ Description: Configuration: - - Provider details (models, display names) are loaded from `tests/verifications/config.yaml`. + - Provider details (models, display names) are loaded from `tests/verifications/conf/*.yaml`. - Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`. - Test results are stored in `tests/verifications/test_results/`. diff --git a/tests/verifications/openai-api-verification-run.yaml b/tests/verifications/openai-api-verification-run.yaml index 71885d058..04675577d 100644 --- a/tests/verifications/openai-api-verification-run.yaml +++ b/tests/verifications/openai-api-verification-run.yaml @@ -1,10 +1,15 @@ +# This is a temporary run file because model names used by the verification tests +# are not quite consistent with various pre-existing distributions. +# version: '2' image_name: openai-api-verification apis: +- agents - inference - telemetry - tool_runtime - vector_io +- safety providers: inference: - provider_id: together @@ -16,12 +21,12 @@ providers: provider_type: remote::fireworks config: url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY} + api_key: ${env.FIREWORKS_API_KEY:} - provider_id: groq provider_type: remote::groq config: url: https://api.groq.com - api_key: ${env.GROQ_API_KEY} + api_key: ${env.GROQ_API_KEY:} - provider_id: openai provider_type: remote::openai config: @@ -45,6 +50,19 @@ providers: service_name: "${env.OTEL_SERVICE_NAME:\u200B}" sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/openai/trace_store.db} + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/agents_store.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search diff --git a/tests/verifications/openai_api/conftest.py b/tests/verifications/openai_api/conftest.py new file mode 100644 index 000000000..7b4c92f1c --- /dev/null +++ b/tests/verifications/openai_api/conftest.py @@ -0,0 +1,35 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs + + +def pytest_generate_tests(metafunc): + """Dynamically parametrize tests based on the selected provider and config.""" + if "model" in metafunc.fixturenames: + provider = metafunc.config.getoption("provider") + if not provider: + print("Warning: --provider not specified. Skipping model parametrization.") + metafunc.parametrize("model", []) + return + + try: + config_data = _load_all_verification_configs() + except (FileNotFoundError, IOError) as e: + print(f"ERROR loading verification configs: {e}") + config_data = {"providers": {}} + + provider_config = config_data.get("providers", {}).get(provider) + if provider_config: + models = provider_config.get("models", []) + if models: + metafunc.parametrize("model", models) + else: + print(f"Warning: No models found for provider '{provider}' in config.") + metafunc.parametrize("model", []) # Parametrize empty if no models found + else: + print(f"Warning: Provider '{provider}' not found in config. No models parametrized.") + metafunc.parametrize("model", []) # Parametrize empty if provider not found diff --git a/tests/verifications/openai_api/fixtures/fixtures.py b/tests/verifications/openai_api/fixtures/fixtures.py index 940b99b2a..2ea73cf26 100644 --- a/tests/verifications/openai_api/fixtures/fixtures.py +++ b/tests/verifications/openai_api/fixtures/fixtures.py @@ -5,14 +5,16 @@ # the root directory of this source tree. import os +import re from pathlib import Path import pytest import yaml from openai import OpenAI +# --- Helper Functions --- + -# --- Helper Function to Load Config --- def _load_all_verification_configs(): """Load and aggregate verification configs from the conf/ directory.""" # Note: Path is relative to *this* file (fixtures.py) @@ -44,7 +46,30 @@ def _load_all_verification_configs(): return {"providers": all_provider_configs} -# --- End Helper Function --- +def case_id_generator(case): + """Generate a test ID from the case's 'case_id' field, or use a default.""" + case_id = case.get("case_id") + if isinstance(case_id, (str, int)): + return re.sub(r"\\W|^(?=\\d)", "_", str(case_id)) + return None + + +def should_skip_test(verification_config, provider, model, test_name_base): + """Check if a test should be skipped based on config exclusions.""" + provider_config = verification_config.get("providers", {}).get(provider) + if not provider_config: + return False # No config for provider, don't skip + + exclusions = provider_config.get("test_exclusions", {}).get(model, []) + return test_name_base in exclusions + + +# Helper to get the base test name from the request object +def get_base_test_name(request): + return request.node.originalname + + +# --- End Helper Functions --- @pytest.fixture(scope="session") diff --git a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml new file mode 100644 index 000000000..f235b2ea8 --- /dev/null +++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml @@ -0,0 +1,65 @@ +test_response_basic: + test_name: test_response_basic + test_params: + case: + - case_id: "earth" + input: "Which planet do humans live on?" + output: "earth" + - case_id: "saturn" + input: "Which planet has rings around it with a name starting with letter S?" + output: "saturn" + +test_response_multi_turn: + test_name: test_response_multi_turn + test_params: + case: + - case_id: "earth" + turns: + - input: "Which planet do humans live on?" + output: "earth" + - input: "What is the name of the planet from your previous response?" + output: "earth" + +test_response_web_search: + test_name: test_response_web_search + test_params: + case: + - case_id: "llama_experts" + input: "How many experts does the Llama 4 Maverick model have?" + tools: + - type: web_search + search_context_size: "low" + output: "128" + +test_response_image: + test_name: test_response_image + test_params: + case: + - case_id: "llama_image" + input: + - role: user + content: + - type: input_text + text: "Identify the type of animal in this image." + - type: input_image + image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg" + output: "llama" + +test_response_multi_turn_image: + test_name: test_response_multi_turn_image + test_params: + case: + - case_id: "llama_image_search" + turns: + - input: + - role: user + content: + - type: input_text + text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'." + - type: input_image + image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg" + output: "llama" + - input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick' and 'scout'." + tools: + - type: web_search + output: "model" diff --git a/tests/verifications/openai_api/test_chat_completion.py b/tests/verifications/openai_api/test_chat_completion.py index 277eaafa3..64e49d352 100644 --- a/tests/verifications/openai_api/test_chat_completion.py +++ b/tests/verifications/openai_api/test_chat_completion.py @@ -7,7 +7,6 @@ import base64 import copy import json -import re from pathlib import Path from typing import Any @@ -16,7 +15,9 @@ from openai import APIError from pydantic import BaseModel from tests.verifications.openai_api.fixtures.fixtures import ( - _load_all_verification_configs, + case_id_generator, + get_base_test_name, + should_skip_test, ) from tests.verifications.openai_api.fixtures.load import load_test_cases @@ -25,57 +26,6 @@ chat_completion_test_cases = load_test_cases("chat_completion") THIS_DIR = Path(__file__).parent -def case_id_generator(case): - """Generate a test ID from the case's 'case_id' field, or use a default.""" - case_id = case.get("case_id") - if isinstance(case_id, (str, int)): - return re.sub(r"\\W|^(?=\\d)", "_", str(case_id)) - return None - - -def pytest_generate_tests(metafunc): - """Dynamically parametrize tests based on the selected provider and config.""" - if "model" in metafunc.fixturenames: - provider = metafunc.config.getoption("provider") - if not provider: - print("Warning: --provider not specified. Skipping model parametrization.") - metafunc.parametrize("model", []) - return - - try: - config_data = _load_all_verification_configs() - except (FileNotFoundError, IOError) as e: - print(f"ERROR loading verification configs: {e}") - config_data = {"providers": {}} - - provider_config = config_data.get("providers", {}).get(provider) - if provider_config: - models = provider_config.get("models", []) - if models: - metafunc.parametrize("model", models) - else: - print(f"Warning: No models found for provider '{provider}' in config.") - metafunc.parametrize("model", []) # Parametrize empty if no models found - else: - print(f"Warning: Provider '{provider}' not found in config. No models parametrized.") - metafunc.parametrize("model", []) # Parametrize empty if provider not found - - -def should_skip_test(verification_config, provider, model, test_name_base): - """Check if a test should be skipped based on config exclusions.""" - provider_config = verification_config.get("providers", {}).get(provider) - if not provider_config: - return False # No config for provider, don't skip - - exclusions = provider_config.get("test_exclusions", {}).get(model, []) - return test_name_base in exclusions - - -# Helper to get the base test name from the request object -def get_base_test_name(request): - return request.node.originalname - - @pytest.fixture def multi_image_data(): files = [ diff --git a/tests/verifications/openai_api/test_responses.py b/tests/verifications/openai_api/test_responses.py new file mode 100644 index 000000000..cc7ec320c --- /dev/null +++ b/tests/verifications/openai_api/test_responses.py @@ -0,0 +1,166 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +import pytest + +from tests.verifications.openai_api.fixtures.fixtures import ( + case_id_generator, + get_base_test_name, + should_skip_test, +) +from tests.verifications.openai_api.fixtures.load import load_test_cases + +responses_test_cases = load_test_cases("responses") + + +@pytest.mark.parametrize( + "case", + responses_test_cases["test_response_basic"]["test_params"]["case"], + ids=case_id_generator, +) +def test_response_non_streaming_basic(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.responses.create( + model=model, + input=case["input"], + stream=False, + ) + output_text = response.output_text.lower().strip() + assert len(output_text) > 0 + assert case["output"].lower() in output_text + + retrieved_response = openai_client.responses.retrieve(response_id=response.id) + assert retrieved_response.output_text == response.output_text + + next_response = openai_client.responses.create( + model=model, input="Repeat your previous response in all caps.", previous_response_id=response.id + ) + next_output_text = next_response.output_text.strip() + assert case["output"].upper() in next_output_text + + +@pytest.mark.parametrize( + "case", + responses_test_cases["test_response_basic"]["test_params"]["case"], + ids=case_id_generator, +) +def test_response_streaming_basic(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.responses.create( + model=model, + input=case["input"], + stream=True, + ) + streamed_content = [] + response_id = "" + for chunk in response: + if chunk.type == "response.completed": + response_id = chunk.response.id + streamed_content.append(chunk.response.output_text.strip()) + + assert len(streamed_content) > 0 + assert case["output"].lower() in "".join(streamed_content).lower() + + retrieved_response = openai_client.responses.retrieve(response_id=response_id) + assert retrieved_response.output_text == "".join(streamed_content) + + +@pytest.mark.parametrize( + "case", + responses_test_cases["test_response_multi_turn"]["test_params"]["case"], + ids=case_id_generator, +) +def test_response_non_streaming_multi_turn(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + previous_response_id = None + for turn in case["turns"]: + response = openai_client.responses.create( + model=model, + input=turn["input"], + previous_response_id=previous_response_id, + tools=turn["tools"] if "tools" in turn else None, + ) + previous_response_id = response.id + output_text = response.output_text.lower() + assert turn["output"].lower() in output_text + + +@pytest.mark.parametrize( + "case", + responses_test_cases["test_response_web_search"]["test_params"]["case"], + ids=case_id_generator, +) +def test_response_non_streaming_web_search(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.responses.create( + model=model, + input=case["input"], + tools=case["tools"], + stream=False, + ) + assert len(response.output) > 1 + assert response.output[0].type == "web_search_call" + assert response.output[0].status == "completed" + assert response.output[1].type == "message" + assert response.output[1].status == "completed" + assert response.output[1].role == "assistant" + assert len(response.output[1].content) > 0 + assert case["output"].lower() in response.output_text.lower().strip() + + +@pytest.mark.parametrize( + "case", + responses_test_cases["test_response_image"]["test_params"]["case"], + ids=case_id_generator, +) +def test_response_non_streaming_image(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.responses.create( + model=model, + input=case["input"], + stream=False, + ) + output_text = response.output_text.lower() + assert case["output"].lower() in output_text + + +@pytest.mark.parametrize( + "case", + responses_test_cases["test_response_multi_turn_image"]["test_params"]["case"], + ids=case_id_generator, +) +def test_response_non_streaming_multi_turn_image(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + previous_response_id = None + for turn in case["turns"]: + response = openai_client.responses.create( + model=model, + input=turn["input"], + previous_response_id=previous_response_id, + tools=turn["tools"] if "tools" in turn else None, + ) + previous_response_id = response.id + output_text = response.output_text.lower() + assert turn["output"].lower() in output_text