From abd6280cb8772545193560772a0d94f7323ee629 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 28 Apr 2025 10:27:28 -0700 Subject: [PATCH] fold openai responses into the Agents API --- docs/_static/llama-stack-spec.html | 512 ++++++++++++++++++ docs/_static/llama-stack-spec.yaml | 350 ++++++++++++ docs/openapi_generator/pyopenapi/generator.py | 2 +- .../self_hosted_distro/remote-vllm.md | 1 - .../self_hosted_distro/together.md | 1 - llama_stack/apis/agents/agents.py | 43 ++ .../openai_responses.py | 34 +- llama_stack/apis/datatypes.py | 1 - llama_stack/apis/openai_responses/__init__.py | 7 - llama_stack/distribution/resolver.py | 2 - .../distribution/routers/routing_tables.py | 2 - .../inline/agents/meta_reference/agents.py | 31 ++ .../meta_reference}/openai_responses.py | 64 +-- .../inline/openai_responses/__init__.py | 21 - .../inline/openai_responses/config.py | 24 - .../providers/registry/openai_responses.py | 27 - llama_stack/strong_typing/schema.py | 2 + llama_stack/templates/remote-vllm/build.yaml | 2 - .../remote-vllm/run-with-safety.yaml | 9 - llama_stack/templates/remote-vllm/run.yaml | 9 - llama_stack/templates/remote-vllm/vllm.py | 1 - llama_stack/templates/together/build.yaml | 2 - .../templates/together/run-with-safety.yaml | 9 - llama_stack/templates/together/run.yaml | 9 - llama_stack/templates/together/together.py | 1 - 25 files changed, 967 insertions(+), 199 deletions(-) rename llama_stack/apis/{openai_responses => agents}/openai_responses.py (78%) delete mode 100644 llama_stack/apis/openai_responses/__init__.py rename llama_stack/providers/inline/{openai_responses => agents/meta_reference}/openai_responses.py (91%) delete mode 100644 llama_stack/providers/inline/openai_responses/__init__.py delete mode 100644 llama_stack/providers/inline/openai_responses/config.py delete mode 100644 llama_stack/providers/registry/openai_responses.py diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 4c5393947..49c402d37 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -497,6 +497,54 @@ } } }, + "/v1/openai/v1/responses": { + "post": { + "responses": { + "200": { + "description": "Runtime representation of an annotated type.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OpenAIResponseObject" + } + }, + "text/event-stream": { + "schema": { + "$ref": "#/components/schemas/OpenAIResponseObjectStream" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Agents" + ], + "description": "Create a new OpenAI response.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateOpenaiResponseRequest" + } + } + }, + "required": true + } + } + }, "/v1/files": { "get": { "responses": { @@ -1278,6 +1326,49 @@ ] } }, + "/v1/openai/v1/responses/{id}": { + "get": { + "responses": { + "200": { + "description": "An OpenAIResponseObject.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OpenAIResponseObject" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Agents" + ], + "description": "Retrieve an OpenAI response by its ID.", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "The ID of the OpenAI response to retrieve.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/scoring-functions/{scoring_fn_id}": { "get": { "responses": { @@ -6192,6 +6283,427 @@ ], "title": "AgentTurnResponseTurnStartPayload" }, + "OpenAIResponseInputMessage": { + "type": "object", + "properties": { + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContent" + } + } + ] + }, + "role": { + "oneOf": [ + { + "type": "string", + "const": "system" + }, + { + "type": "string", + "const": "developer" + }, + { + "type": "string", + "const": "user" + }, + { + "type": "string", + "const": "assistant" + } + ] + }, + "type": { + "type": "string", + "const": "message", + "default": "message" + } + }, + "additionalProperties": false, + "required": [ + "content", + "role" + ], + "title": "OpenAIResponseInputMessage" + }, + "OpenAIResponseInputMessageContent": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentText" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText", + "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage" + } + } + }, + "OpenAIResponseInputMessageContentImage": { + "type": "object", + "properties": { + "detail": { + "oneOf": [ + { + "type": "string", + "const": "low" + }, + { + "type": "string", + "const": "high" + }, + { + "type": "string", + "const": "auto" + } + ], + "default": "auto" + }, + "type": { + "type": "string", + "const": "input_image", + "default": "input_image" + }, + "image_url": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "detail", + "type" + ], + "title": "OpenAIResponseInputMessageContentImage" + }, + "OpenAIResponseInputMessageContentText": { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "type": { + "type": "string", + "const": "input_text", + "default": "input_text" + } + }, + "additionalProperties": false, + "required": [ + "text", + "type" + ], + "title": "OpenAIResponseInputMessageContentText" + }, + "OpenAIResponseInputTool": { + "type": "object", + "properties": { + "type": { + "oneOf": [ + { + "type": "string", + "const": "web_search" + }, + { + "type": "string", + "const": "web_search_preview_2025_03_11" + } + ], + "default": "web_search" + }, + "search_context_size": { + "type": "string", + "default": "medium" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "OpenAIResponseInputToolWebSearch" + }, + "CreateOpenaiResponseRequest": { + "type": "object", + "properties": { + "input": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInputMessage" + } + } + ], + "description": "Input message(s) to create the response." + }, + "model": { + "type": "string", + "description": "The underlying LLM used for completions." + }, + "previous_response_id": { + "type": "string", + "description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses." + }, + "store": { + "type": "boolean" + }, + "stream": { + "type": "boolean" + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInputTool" + } + } + }, + "additionalProperties": false, + "required": [ + "input", + "model" + ], + "title": "CreateOpenaiResponseRequest" + }, + "OpenAIResponseError": { + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "message": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "code", + "message" + ], + "title": "OpenAIResponseError" + }, + "OpenAIResponseObject": { + "type": "object", + "properties": { + "created_at": { + "type": "integer" + }, + "error": { + "$ref": "#/components/schemas/OpenAIResponseError" + }, + "id": { + "type": "string" + }, + "model": { + "type": "string" + }, + "object": { + "type": "string", + "const": "response", + "default": "response" + }, + "output": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseOutput" + } + }, + "parallel_tool_calls": { + "type": "boolean", + "default": false + }, + "previous_response_id": { + "type": "string" + }, + "status": { + "type": "string" + }, + "temperature": { + "type": "number" + }, + "top_p": { + "type": "number" + }, + "truncation": { + "type": "string" + }, + "user": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "created_at", + "id", + "model", + "object", + "output", + "parallel_tool_calls", + "status" + ], + "title": "OpenAIResponseObject" + }, + "OpenAIResponseOutput": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessage" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "message": "#/components/schemas/OpenAIResponseOutputMessage", + "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + } + } + }, + "OpenAIResponseOutputMessage": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "content": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageContent" + } + }, + "role": { + "type": "string", + "const": "assistant", + "default": "assistant" + }, + "status": { + "type": "string" + }, + "type": { + "type": "string", + "const": "message", + "default": "message" + } + }, + "additionalProperties": false, + "required": [ + "id", + "content", + "role", + "status", + "type" + ], + "title": "OpenAIResponseOutputMessage" + }, + "OpenAIResponseOutputMessageContent": { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "type": { + "type": "string", + "const": "output_text", + "default": "output_text" + } + }, + "additionalProperties": false, + "required": [ + "text", + "type" + ], + "title": "OpenAIResponseOutputMessageContentOutputText" + }, + "OpenAIResponseOutputMessageWebSearchToolCall": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "status": { + "type": "string" + }, + "type": { + "type": "string", + "const": "web_search_call", + "default": "web_search_call" + } + }, + "additionalProperties": false, + "required": [ + "id", + "status", + "type" + ], + "title": "OpenAIResponseOutputMessageWebSearchToolCall" + }, + "OpenAIResponseObjectStream": { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated", + "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" + } + } + }, + "OpenAIResponseObjectStreamResponseCompleted": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject" + }, + "type": { + "type": "string", + "const": "response.completed", + "default": "response.completed" + } + }, + "additionalProperties": false, + "required": [ + "response", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseCompleted" + }, + "OpenAIResponseObjectStreamResponseCreated": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject" + }, + "type": { + "type": "string", + "const": "response.created", + "default": "response.created" + } + }, + "additionalProperties": false, + "required": [ + "response", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseCreated" + }, "CreateUploadSessionRequest": { "type": "object", "properties": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index a24f1a9db..e5bfad623 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -330,6 +330,39 @@ paths: schema: $ref: '#/components/schemas/CreateAgentTurnRequest' required: true + /v1/openai/v1/responses: + post: + responses: + '200': + description: >- + Runtime representation of an annotated type. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIResponseObject' + text/event-stream: + schema: + $ref: '#/components/schemas/OpenAIResponseObjectStream' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + description: Create a new OpenAI response. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CreateOpenaiResponseRequest' + required: true /v1/files: get: responses: @@ -875,6 +908,36 @@ paths: required: true schema: type: string + /v1/openai/v1/responses/{id}: + get: + responses: + '200': + description: An OpenAIResponseObject. + content: + application/json: + schema: + $ref: '#/components/schemas/OpenAIResponseObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + description: Retrieve an OpenAI response by its ID. + parameters: + - name: id + in: path + description: >- + The ID of the OpenAI response to retrieve. + required: true + schema: + type: string /v1/scoring-functions/{scoring_fn_id}: get: responses: @@ -4329,6 +4392,293 @@ components: - event_type - turn_id title: AgentTurnResponseTurnStartPayload + OpenAIResponseInputMessage: + type: object + properties: + content: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInputMessageContent' + role: + oneOf: + - type: string + const: system + - type: string + const: developer + - type: string + const: user + - type: string + const: assistant + type: + type: string + const: message + default: message + additionalProperties: false + required: + - content + - role + title: OpenAIResponseInputMessage + OpenAIResponseInputMessageContent: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' + - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' + discriminator: + propertyName: type + mapping: + input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' + input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' + OpenAIResponseInputMessageContentImage: + type: object + properties: + detail: + oneOf: + - type: string + const: low + - type: string + const: high + - type: string + const: auto + default: auto + type: + type: string + const: input_image + default: input_image + image_url: + type: string + additionalProperties: false + required: + - detail + - type + title: OpenAIResponseInputMessageContentImage + OpenAIResponseInputMessageContentText: + type: object + properties: + text: + type: string + type: + type: string + const: input_text + default: input_text + additionalProperties: false + required: + - text + - type + title: OpenAIResponseInputMessageContentText + OpenAIResponseInputTool: + type: object + properties: + type: + oneOf: + - type: string + const: web_search + - type: string + const: web_search_preview_2025_03_11 + default: web_search + search_context_size: + type: string + default: medium + additionalProperties: false + required: + - type + title: OpenAIResponseInputToolWebSearch + CreateOpenaiResponseRequest: + type: object + properties: + input: + oneOf: + - type: string + - type: array + items: + $ref: '#/components/schemas/OpenAIResponseInputMessage' + description: Input message(s) to create the response. + model: + type: string + description: The underlying LLM used for completions. + previous_response_id: + type: string + description: >- + (Optional) if specified, the new response will be a continuation of the + previous response. This can be used to easily fork-off new responses from + existing responses. + store: + type: boolean + stream: + type: boolean + tools: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInputTool' + additionalProperties: false + required: + - input + - model + title: CreateOpenaiResponseRequest + OpenAIResponseError: + type: object + properties: + code: + type: string + message: + type: string + additionalProperties: false + required: + - code + - message + title: OpenAIResponseError + OpenAIResponseObject: + type: object + properties: + created_at: + type: integer + error: + $ref: '#/components/schemas/OpenAIResponseError' + id: + type: string + model: + type: string + object: + type: string + const: response + default: response + output: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutput' + parallel_tool_calls: + type: boolean + default: false + previous_response_id: + type: string + status: + type: string + temperature: + type: number + top_p: + type: number + truncation: + type: string + user: + type: string + additionalProperties: false + required: + - created_at + - id + - model + - object + - output + - parallel_tool_calls + - status + title: OpenAIResponseObject + OpenAIResponseOutput: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseOutputMessage' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + discriminator: + propertyName: type + mapping: + message: '#/components/schemas/OpenAIResponseOutputMessage' + web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + OpenAIResponseOutputMessage: + type: object + properties: + id: + type: string + content: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutputMessageContent' + role: + type: string + const: assistant + default: assistant + status: + type: string + type: + type: string + const: message + default: message + additionalProperties: false + required: + - id + - content + - role + - status + - type + title: OpenAIResponseOutputMessage + OpenAIResponseOutputMessageContent: + type: object + properties: + text: + type: string + type: + type: string + const: output_text + default: output_text + additionalProperties: false + required: + - text + - type + title: >- + OpenAIResponseOutputMessageContentOutputText + "OpenAIResponseOutputMessageWebSearchToolCall": + type: object + properties: + id: + type: string + status: + type: string + type: + type: string + const: web_search_call + default: web_search_call + additionalProperties: false + required: + - id + - status + - type + title: >- + OpenAIResponseOutputMessageWebSearchToolCall + OpenAIResponseObjectStream: + oneOf: + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + discriminator: + propertyName: type + mapping: + response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' + "OpenAIResponseObjectStreamResponseCompleted": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + type: + type: string + const: response.completed + default: response.completed + additionalProperties: false + required: + - response + - type + title: >- + OpenAIResponseObjectStreamResponseCompleted + "OpenAIResponseObjectStreamResponseCreated": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + type: + type: string + const: response.created + default: response.created + additionalProperties: false + required: + - response + - type + title: >- + OpenAIResponseObjectStreamResponseCreated CreateUploadSessionRequest: type: object properties: diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 3936bb3c4..6d5e48a46 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -179,7 +179,7 @@ class ContentBuilder: "Creates the content subtree for a request or response." def is_iterator_type(t): - return "StreamChunk" in str(t) + return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t) def get_media_type(t): if is_generic_list(t): diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md index 74365722d..46df56008 100644 --- a/docs/source/distributions/self_hosted_distro/remote-vllm.md +++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md @@ -18,7 +18,6 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::vllm`, `inline::sentence-transformers` | -| openai_responses | `inline::openai-responses` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index 5da0ee980..3ebb1f59e 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -19,7 +19,6 @@ The `llamastack/distribution-together` distribution consists of the following pr | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::together`, `inline::sentence-transformers` | -| openai_responses | `inline::openai-responses` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index dec43280b..4db6e2226 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -38,6 +38,13 @@ from llama_stack.apis.safety import SafetyViolation from llama_stack.apis.tools import ToolDef from llama_stack.schema_utils import json_schema_type, register_schema, webmethod +from .openai_responses import ( + OpenAIResponseInputMessage, + OpenAIResponseInputTool, + OpenAIResponseObject, + OpenAIResponseObjectStream, +) + class Attachment(BaseModel): """An attachment to an agent turn. @@ -593,3 +600,39 @@ class Agents(Protocol): :returns: A ListAgentSessionsResponse. """ ... + + # We situate the OpenAI Responses API in the Agents API just like we did things + # for Inference. The Responses API, in its intent, serves the same purpose as + # the Agents API above -- it is essentially a lightweight "agentic loop" with + # integrated tool calling. + # + # Both of these APIs are inherently stateful. + + @webmethod(route="/openai/v1/responses/{id}", method="GET") + async def get_openai_response( + self, + id: str, + ) -> OpenAIResponseObject: + """Retrieve an OpenAI response by its ID. + + :param id: The ID of the OpenAI response to retrieve. + :returns: An OpenAIResponseObject. + """ + ... + + @webmethod(route="/openai/v1/responses", method="POST") + async def create_openai_response( + self, + input: Union[str, List[OpenAIResponseInputMessage]], + model: str, + previous_response_id: Optional[str] = None, + store: Optional[bool] = True, + stream: Optional[bool] = False, + tools: Optional[List[OpenAIResponseInputTool]] = None, + ) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]: + """Create a new OpenAI response. + + :param input: Input message(s) to create the response. + :param model: The underlying LLM used for completions. + :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses. + """ diff --git a/llama_stack/apis/openai_responses/openai_responses.py b/llama_stack/apis/agents/openai_responses.py similarity index 78% rename from llama_stack/apis/openai_responses/openai_responses.py rename to llama_stack/apis/agents/openai_responses.py index 0b21f3f28..72f16e224 100644 --- a/llama_stack/apis/openai_responses/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -4,12 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import AsyncIterator, List, Literal, Optional, Protocol, Union, runtime_checkable +from typing import List, Literal, Optional, Union from pydantic import BaseModel, Field from typing_extensions import Annotated -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack.schema_utils import json_schema_type, register_schema @json_schema_type @@ -104,7 +104,7 @@ class OpenAIResponseInputMessageContentText(BaseModel): @json_schema_type class OpenAIResponseInputMessageContentImage(BaseModel): - detail: Literal["low", "high", "auto"] = "auto" + detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto" type: Literal["input_image"] = "input_image" # TODO: handle file_id image_url: Optional[str] = None @@ -121,13 +121,13 @@ register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMess @json_schema_type class OpenAIResponseInputMessage(BaseModel): content: Union[str, List[OpenAIResponseInputMessageContent]] - role: Literal["system", "developer", "user", "assistant"] + role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"] type: Optional[Literal["message"]] = "message" @json_schema_type class OpenAIResponseInputToolWebSearch(BaseModel): - type: Literal["web_search", "web_search_preview_2025_03_11"] = "web_search" + type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search" # TODO: actually use search_context_size somewhere... search_context_size: Optional[str] = Field(default="medium", pattern="^low|medium|high$") # TODO: add user_location @@ -138,27 +138,3 @@ OpenAIResponseInputTool = Annotated[ Field(discriminator="type"), ] register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool") - - -@runtime_checkable -class OpenAIResponses(Protocol): - """ - OpenAI Responses API implementation. - """ - - @webmethod(route="/openai/v1/responses/{id}", method="GET") - async def get_openai_response( - self, - id: str, - ) -> OpenAIResponseObject: ... - - @webmethod(route="/openai/v1/responses", method="POST") - async def create_openai_response( - self, - input: Union[str, List[OpenAIResponseInputMessage]], - model: str, - previous_response_id: Optional[str] = None, - store: Optional[bool] = True, - stream: Optional[bool] = False, - tools: Optional[List[OpenAIResponseInputTool]] = None, - ) -> Union[OpenAIResponseObject, AsyncIterator[OpenAIResponseObjectStream]]: ... diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py index 85c0ecc6b..25f3ab1ab 100644 --- a/llama_stack/apis/datatypes.py +++ b/llama_stack/apis/datatypes.py @@ -24,7 +24,6 @@ class Api(Enum): eval = "eval" post_training = "post_training" tool_runtime = "tool_runtime" - openai_responses = "openai_responses" telemetry = "telemetry" diff --git a/llama_stack/apis/openai_responses/__init__.py b/llama_stack/apis/openai_responses/__init__.py deleted file mode 100644 index a3b32ff71..000000000 --- a/llama_stack/apis/openai_responses/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .openai_responses import * # noqa: F401 F403 diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index 25c91fca1..e9a594eba 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -16,7 +16,6 @@ from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference from llama_stack.apis.inspect import Inspect from llama_stack.apis.models import Models -from llama_stack.apis.openai_responses.openai_responses import OpenAIResponses from llama_stack.apis.post_training import PostTraining from llama_stack.apis.providers import Providers as ProvidersAPI from llama_stack.apis.safety import Safety @@ -81,7 +80,6 @@ def api_protocol_map() -> Dict[Api, Any]: Api.tool_groups: ToolGroups, Api.tool_runtime: ToolRuntime, Api.files: Files, - Api.openai_responses: OpenAIResponses, } diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 50416f338..18b0c891f 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -149,8 +149,6 @@ class CommonRoutingTableImpl(RoutingTable): p.benchmark_store = self elif api == Api.tool_runtime: p.tool_store = self - elif api == Api.openai_responses: - p.model_store = self async def shutdown(self) -> None: for p in self.impls_by_provider_id.values(): diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 656178773..38aa6fd97 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -23,6 +23,9 @@ from llama_stack.apis.agents import ( Document, ListAgentSessionsResponse, ListAgentsResponse, + OpenAIResponseInputMessage, + OpenAIResponseInputTool, + OpenAIResponseObject, Session, Turn, ) @@ -40,6 +43,7 @@ from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_imp from .agent_instance import ChatAgent from .config import MetaReferenceAgentsImplConfig +from .openai_responses import OpenAIResponsesImpl logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -63,9 +67,16 @@ class MetaReferenceAgentsImpl(Agents): self.tool_groups_api = tool_groups_api self.in_memory_store = InmemoryKVStoreImpl() + self.openai_responses_impl = None async def initialize(self) -> None: self.persistence_store = await kvstore_impl(self.config.persistence_store) + self.openai_responses_impl = OpenAIResponsesImpl( + self.persistence_store, + inference_api=self.inference_api, + tool_groups_api=self.tool_groups_api, + tool_runtime_api=self.tool_runtime_api, + ) # check if "bwrap" is available if not shutil.which("bwrap"): @@ -244,3 +255,23 @@ class MetaReferenceAgentsImpl(Agents): agent_id: str, ) -> ListAgentSessionsResponse: pass + + # OpenAI responses + async def get_openai_response( + self, + id: str, + ) -> OpenAIResponseObject: + return await self.openai_responses_impl.get_openai_response(id) + + async def create_openai_response( + self, + input: Union[str, List[OpenAIResponseInputMessage]], + model: str, + previous_response_id: Optional[str] = None, + store: Optional[bool] = True, + stream: Optional[bool] = False, + tools: Optional[List[OpenAIResponseInputTool]] = None, + ) -> OpenAIResponseObject: + return await self.openai_responses_impl.create_openai_response( + input, model, previous_response_id, store, stream, tools + ) diff --git a/llama_stack/providers/inline/openai_responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py similarity index 91% rename from llama_stack/providers/inline/openai_responses/openai_responses.py rename to llama_stack/providers/inline/agents/meta_reference/openai_responses.py index c7d767f73..db1e32f8b 100644 --- a/llama_stack/providers/inline/openai_responses/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -10,6 +10,20 @@ from typing import AsyncIterator, List, Optional, Union, cast from openai.types.chat import ChatCompletionToolParam +from llama_stack.apis.agents.openai_responses import ( + OpenAIResponseInputMessage, + OpenAIResponseInputMessageContentImage, + OpenAIResponseInputMessageContentText, + OpenAIResponseInputTool, + OpenAIResponseObject, + OpenAIResponseObjectStream, + OpenAIResponseObjectStreamResponseCompleted, + OpenAIResponseObjectStreamResponseCreated, + OpenAIResponseOutput, + OpenAIResponseOutputMessage, + OpenAIResponseOutputMessageContentOutputText, + OpenAIResponseOutputMessageWebSearchToolCall, +) from llama_stack.apis.inference.inference import ( Inference, OpenAIAssistantMessageParam, @@ -24,29 +38,11 @@ from llama_stack.apis.inference.inference import ( OpenAIToolMessageParam, OpenAIUserMessageParam, ) -from llama_stack.apis.models.models import Models, ModelType -from llama_stack.apis.openai_responses import OpenAIResponses -from llama_stack.apis.openai_responses.openai_responses import ( - OpenAIResponseInputMessage, - OpenAIResponseInputMessageContentImage, - OpenAIResponseInputMessageContentText, - OpenAIResponseInputTool, - OpenAIResponseObject, - OpenAIResponseObjectStream, - OpenAIResponseObjectStreamResponseCompleted, - OpenAIResponseObjectStreamResponseCreated, - OpenAIResponseOutput, - OpenAIResponseOutputMessage, - OpenAIResponseOutputMessageContentOutputText, - OpenAIResponseOutputMessageWebSearchToolCall, -) from llama_stack.apis.tools.tools import ToolGroups, ToolInvocationResult, ToolRuntime from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool -from llama_stack.providers.utils.kvstore import kvstore_impl - -from .config import OpenAIResponsesImplConfig +from llama_stack.providers.utils.kvstore import KVStore logger = get_logger(name=__name__, category="openai_responses") @@ -80,34 +76,25 @@ async def _openai_choices_to_output_messages(choices: List[OpenAIChoice]) -> Lis return output_messages -class OpenAIResponsesImpl(OpenAIResponses): +class OpenAIResponsesImpl: def __init__( self, - config: OpenAIResponsesImplConfig, - models_api: Models, + persistence_store: KVStore, inference_api: Inference, tool_groups_api: ToolGroups, tool_runtime_api: ToolRuntime, ): - self.config = config - self.models_api = models_api + self.persistence_store = persistence_store self.inference_api = inference_api self.tool_groups_api = tool_groups_api self.tool_runtime_api = tool_runtime_api - async def initialize(self) -> None: - self.kvstore = await kvstore_impl(self.config.kvstore) - - async def shutdown(self) -> None: - logger.debug("OpenAIResponsesImpl.shutdown") - pass - async def get_openai_response( self, id: str, ) -> OpenAIResponseObject: key = f"{OPENAI_RESPONSES_PREFIX}{id}" - response_json = await self.kvstore.get(key=key) + response_json = await self.persistence_store.get(key=key) if response_json is None: raise ValueError(f"OpenAI response with id '{id}' not found") return OpenAIResponseObject.model_validate_json(response_json) @@ -122,11 +109,6 @@ class OpenAIResponsesImpl(OpenAIResponses): tools: Optional[List[OpenAIResponseInputTool]] = None, ): stream = False if stream is None else stream - model_obj = await self.models_api.get_model(model) - if model_obj is None: - raise ValueError(f"Model '{model}' not found") - if model_obj.model_type == ModelType.embedding: - raise ValueError(f"Model '{model}' is an embedding model and does not support chat completions") messages: List[OpenAIMessageParam] = [] if previous_response_id: @@ -155,7 +137,7 @@ class OpenAIResponsesImpl(OpenAIResponses): chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None chat_response = await self.inference_api.openai_chat_completion( - model=model_obj.identifier, + model=model, messages=messages, tools=chat_tools, stream=stream, @@ -198,14 +180,14 @@ class OpenAIResponsesImpl(OpenAIResponses): output_messages: List[OpenAIResponseOutput] = [] if chat_response.choices[0].finish_reason == "tool_calls": output_messages.extend( - await self._execute_tool_and_return_final_output(model_obj.identifier, stream, chat_response, messages) + await self._execute_tool_and_return_final_output(model, stream, chat_response, messages) ) else: output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices)) response = OpenAIResponseObject( created_at=chat_response.created, id=f"resp-{uuid.uuid4()}", - model=model_obj.identifier, + model=model, object="response", status="completed", output=output_messages, @@ -214,7 +196,7 @@ class OpenAIResponsesImpl(OpenAIResponses): if store: # Store in kvstore key = f"{OPENAI_RESPONSES_PREFIX}{response.id}" - await self.kvstore.set( + await self.persistence_store.set( key=key, value=response.model_dump_json(), ) diff --git a/llama_stack/providers/inline/openai_responses/__init__.py b/llama_stack/providers/inline/openai_responses/__init__.py deleted file mode 100644 index 76f15d478..000000000 --- a/llama_stack/providers/inline/openai_responses/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any, Dict - -from llama_stack.apis.datatypes import Api - -from .config import OpenAIResponsesImplConfig - - -async def get_provider_impl(config: OpenAIResponsesImplConfig, deps: Dict[Api, Any]): - from .openai_responses import OpenAIResponsesImpl - - impl = OpenAIResponsesImpl( - config, deps[Api.models], deps[Api.inference], deps[Api.tool_groups], deps[Api.tool_runtime] - ) - await impl.initialize() - return impl diff --git a/llama_stack/providers/inline/openai_responses/config.py b/llama_stack/providers/inline/openai_responses/config.py deleted file mode 100644 index f97b2fe68..000000000 --- a/llama_stack/providers/inline/openai_responses/config.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any, Dict - -from pydantic import BaseModel - -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig - - -class OpenAIResponsesImplConfig(BaseModel): - kvstore: KVStoreConfig - - @classmethod - def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]: - return { - "kvstore": SqliteKVStoreConfig.sample_run_config( - __distro_dir__=__distro_dir__, - db_name="openai_responses.db", - ) - } diff --git a/llama_stack/providers/registry/openai_responses.py b/llama_stack/providers/registry/openai_responses.py deleted file mode 100644 index b7f8d17a0..000000000 --- a/llama_stack/providers/registry/openai_responses.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import List - -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec - - -def available_providers() -> List[ProviderSpec]: - return [ - InlineProviderSpec( - api=Api.openai_responses, - provider_type="inline::openai-responses", - pip_packages=[], - module="llama_stack.providers.inline.openai_responses", - config_class="llama_stack.providers.inline.openai_responses.config.OpenAIResponsesImplConfig", - api_dependencies=[ - Api.models, - Api.inference, - Api.tool_groups, - Api.tool_runtime, - ], - ), - ] diff --git a/llama_stack/strong_typing/schema.py b/llama_stack/strong_typing/schema.py index 0f5121906..e755b4c12 100644 --- a/llama_stack/strong_typing/schema.py +++ b/llama_stack/strong_typing/schema.py @@ -478,6 +478,8 @@ class JsonSchemaGenerator: } return ret elif origin_type is Literal: + if len(typing.get_args(typ)) != 1: + print(f"Literal type {typ} has {len(typing.get_args(typ))} arguments") (literal_value,) = typing.get_args(typ) # unpack value of literal type schema = self.type_to_schema(type(literal_value)) schema["const"] = literal_value diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml index b344f5e5a..b2bbf853a 100644 --- a/llama_stack/templates/remote-vllm/build.yaml +++ b/llama_stack/templates/remote-vllm/build.yaml @@ -24,8 +24,6 @@ distribution_spec: - inline::braintrust telemetry: - inline::meta-reference - openai_responses: - - inline::openai-responses tool_runtime: - remote::brave-search - remote::tavily-search diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index a58417714..bb69496aa 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -5,7 +5,6 @@ apis: - datasetio - eval - inference -- openai_responses - safety - scoring - telemetry @@ -92,14 +91,6 @@ providers: service_name: "${env.OTEL_SERVICE_NAME:\u200B}" sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} - openai_responses: - - provider_id: openai-responses - provider_type: inline::openai-responses - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/openai_responses.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index 58087bba3..14f2da37e 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -5,7 +5,6 @@ apis: - datasetio - eval - inference -- openai_responses - safety - scoring - telemetry @@ -85,14 +84,6 @@ providers: service_name: "${env.OTEL_SERVICE_NAME:\u200B}" sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} - openai_responses: - - provider_id: openai-responses - provider_type: inline::openai-responses - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/openai_responses.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 12515d1ad..0f6c7659e 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -31,7 +31,6 @@ def get_distribution_template() -> DistributionTemplate: "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "telemetry": ["inline::meta-reference"], - "openai_responses": ["inline::openai-responses"], "tool_runtime": [ "remote::brave-search", "remote::tavily-search", diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml index 81a47c5cd..834a3ecaf 100644 --- a/llama_stack/templates/together/build.yaml +++ b/llama_stack/templates/together/build.yaml @@ -24,8 +24,6 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust - openai_responses: - - inline::openai-responses tool_runtime: - remote::brave-search - remote::tavily-search diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index fbeafce19..105ce896d 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -5,7 +5,6 @@ apis: - datasetio - eval - inference -- openai_responses - safety - scoring - telemetry @@ -88,14 +87,6 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} - openai_responses: - - provider_id: openai-responses - provider_type: inline::openai-responses - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/openai_responses.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 0c5d82c13..1f1613655 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -5,7 +5,6 @@ apis: - datasetio - eval - inference -- openai_responses - safety - scoring - telemetry @@ -83,14 +82,6 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} - openai_responses: - - provider_id: openai-responses - provider_type: inline::openai-responses - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/openai_responses.db tool_runtime: - provider_id: brave-search provider_type: remote::brave-search diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index 85b7645b3..a2bd87c97 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -36,7 +36,6 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], - "openai_responses": ["inline::openai-responses"], "tool_runtime": [ "remote::brave-search", "remote::tavily-search",