feat(responses)!: introduce OpenAI compatible prompts to Responses API (#3942)

# What does this PR do?  This PR is responsible for making changes to Responses API scheme to introduce OpenAI compatible prompts there. Change to the API only, therefore currently no implementation at all. However, the follow up PR with actual implementation will be submitted after current PR lands. The need of this functionality was initiated in #3514. > Note, #3514 is divided on three separate PRs. Current PR is the second of three.   ## Test Plan  CI
2025-12-03 09:53:45 +00:00 · 2025-10-28 16:31:27 +00:00 · 2025-10-28 16:31:27 +00:00 · 5598f61e12
commit 5598f61e12
parent e5ca7e6450
12 changed files with 593 additions and 8 deletions
--- a/src/llama_stack/apis/agents/agents.py
+++ b/src/llama_stack/apis/agents/agents.py
@ -38,6 +38,7 @@ from .openai_responses import (
    OpenAIResponseInputTool,
    OpenAIResponseObject,
    OpenAIResponseObjectStream,
+    OpenAIResponsePrompt,
    OpenAIResponseText,
 )

@ -810,6 +811,7 @@ class Agents(Protocol):
        self,
        input: str | list[OpenAIResponseInput],
        model: str,
+        prompt: OpenAIResponsePrompt | None = None,
        instructions: str | None = None,
        previous_response_id: str | None = None,
        conversation: str | None = None,
@ -831,6 +833,7 @@ class Agents(Protocol):

        :param input: Input message(s) to create the response.
        :param model: The underlying LLM used for completions.
+        :param prompt: (Optional) Prompt object with ID, version, and variables.
        :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
        :param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
        :param include: (Optional) Additional fields to include in the response.
--- a/src/llama_stack/apis/agents/openai_responses.py
+++ b/src/llama_stack/apis/agents/openai_responses.py
@ -6,7 +6,7 @@

 from typing import Annotated, Any, Literal

-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 from typing_extensions import TypedDict

 from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
@ -46,23 +46,66 @@ class OpenAIResponseInputMessageContentImage(BaseModel):

    :param detail: Level of detail for image processing, can be "low", "high", or "auto"
    :param type: Content type identifier, always "input_image"
+    :param file_id: (Optional) The ID of the file to be sent to the model.
    :param image_url: (Optional) URL of the image content
    """

    detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
    type: Literal["input_image"] = "input_image"
-    # TODO: handle file_id
+    file_id: str | None = None
    image_url: str | None = None


-# TODO: handle file content types
+@json_schema_type
+class OpenAIResponseInputMessageContentFile(BaseModel):
+    """File content for input messages in OpenAI response format.
+
+    :param type: The type of the input item. Always `input_file`.
+    :param file_data: The data of the file to be sent to the model.
+    :param file_id: (Optional) The ID of the file to be sent to the model.
+    :param file_url: The URL of the file to be sent to the model.
+    :param filename: The name of the file to be sent to the model.
+    """
+
+    type: Literal["input_file"] = "input_file"
+    file_data: str | None = None
+    file_id: str | None = None
+    file_url: str | None = None
+    filename: str | None = None
+
+    @model_validator(mode="after")
+    def validate_file_source(self) -> "OpenAIResponseInputMessageContentFile":
+        if not any([self.file_data, self.file_id, self.file_url, self.filename]):
+            raise ValueError(
+                "At least one of 'file_data', 'file_id', 'file_url', or 'filename' must be provided for file content"
+            )
+        return self
+
+
 OpenAIResponseInputMessageContent = Annotated[
-    OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText
+    | OpenAIResponseInputMessageContentImage
+    | OpenAIResponseInputMessageContentFile,
    Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")


+@json_schema_type
+class OpenAIResponsePrompt(BaseModel):
+    """OpenAI compatible Prompt object that is used in OpenAI responses.
+
+    :param id: Unique identifier of the prompt template
+    :param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types
+    like images or files.
+    :param version: Version number of the prompt to use (defaults to latest if not specified)
+    """
+
+    id: str
+    variables: dict[str, OpenAIResponseInputMessageContent] | None = None
+    version: str | None = None
+
+
@json_schema_type
 class OpenAIResponseAnnotationFileCitation(BaseModel):
    """File citation annotation for referencing specific files in response content.
@ -538,6 +581,7 @@ class OpenAIResponseObject(BaseModel):
    :param output: List of generated output items (messages, tool calls, etc.)
    :param parallel_tool_calls: Whether tool calls can be executed in parallel
    :param previous_response_id: (Optional) ID of the previous response in a conversation
+    :param prompt: (Optional) Reference to a prompt template and its variables.
    :param status: Current status of the response generation
    :param temperature: (Optional) Sampling temperature used for generation
    :param text: Text formatting configuration for the response
@ -556,6 +600,7 @@ class OpenAIResponseObject(BaseModel):
    output: list[OpenAIResponseOutput]
    parallel_tool_calls: bool = False
    previous_response_id: str | None = None
+    prompt: OpenAIResponsePrompt | None = None
    status: str
    temperature: float | None = None
    # Default to text format to avoid breaking the loading of old responses