feat(responses)!: add support for OpenAI compatible Prompts in Responses API

2025-12-12 12:06:04 +00:00 · 2025-09-21 13:52:55 +01:00 · 2025-09-21 13:52:55 +01:00 · 59169bfd25
commit 59169bfd25
parent bd3c473208
33 changed files with 1667 additions and 34 deletions
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -38,6 +38,7 @@ from .openai_responses import (
    OpenAIResponseInputTool,
    OpenAIResponseObject,
    OpenAIResponseObjectStream,
+    OpenAIResponsePromptParam,
    OpenAIResponseText,
 )

@ -810,6 +811,7 @@ class Agents(Protocol):
        self,
        input: str | list[OpenAIResponseInput],
        model: str,
+        prompt: OpenAIResponsePromptParam | None = None,
        instructions: str | None = None,
        previous_response_id: str | None = None,
        conversation: str | None = None,
@ -831,6 +833,7 @@ class Agents(Protocol):

        :param input: Input message(s) to create the response.
        :param model: The underlying LLM used for completions.
+        :param prompt: Prompt object with ID, version, and variables.
        :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
        :param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
        :param include: (Optional) Additional fields to include in the response.
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -6,9 +6,10 @@

 from typing import Annotated, Any, Literal

-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 from typing_extensions import TypedDict

+from llama_stack.apis.prompts.prompts import Prompt
 from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
 from llama_stack.schema_utils import json_schema_type, register_schema

@ -46,18 +47,44 @@ class OpenAIResponseInputMessageContentImage(BaseModel):

    :param detail: Level of detail for image processing, can be "low", "high", or "auto"
    :param type: Content type identifier, always "input_image"
+    :param file_id: (Optional) The ID of the file to be sent to the model.
    :param image_url: (Optional) URL of the image content
    """

    detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
    type: Literal["input_image"] = "input_image"
-    # TODO: handle file_id
+    file_id: str | None = None
    image_url: str | None = None


-# TODO: handle file content types
+@json_schema_type
+class OpenAIResponseInputMessageContentFile(BaseModel):
+    """File content for input messages in OpenAI response format.
+
+    :param type: The type of the input item. Always `input_file`.
+    :param file_data: The data of the file to be sent to the model.
+    :param file_id: (Optional) The ID of the file to be sent to the model.
+    :param file_url: The URL of the file to be sent to the model.
+    :param filename: The name of the file to be sent to the model.
+    """
+
+    type: Literal["input_file"] = "input_file"
+    file_data: str | None = None
+    file_id: str | None = None
+    file_url: str | None = None
+    filename: str | None = None
+
+    @model_validator(mode="after")
+    def validate_file_source(self) -> "OpenAIResponseInputMessageContentFile":
+        if not any([self.file_id, self.file_data, self.file_url]):
+            raise ValueError("At least one of 'file_id', 'file_data', or 'file_url' must be provided for file content")
+        return self
+
+
 OpenAIResponseInputMessageContent = Annotated[
-    OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText
+    | OpenAIResponseInputMessageContentImage
+    | OpenAIResponseInputMessageContentFile,
    Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@ -348,6 +375,20 @@ class OpenAIResponseTextFormat(TypedDict, total=False):
    strict: bool | None


+@json_schema_type
+class OpenAIResponsePromptParam(BaseModel):
+    """Prompt object that is used for OpenAI responses.
+
+    :param id: Unique identifier of the prompt template
+    :param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution
+    :param version: Version number of the prompt to use (defaults to latest if not specified)
+    """
+
+    id: str
+    variables: dict[str, OpenAIResponseInputMessageContent] | None = None
+    version: str | None = None
+
+
@json_schema_type
 class OpenAIResponseText(BaseModel):
    """Text response configuration for OpenAI responses.
@ -537,6 +578,7 @@ class OpenAIResponseObject(BaseModel):
    :param object: Object type identifier, always "response"
    :param output: List of generated output items (messages, tool calls, etc.)
    :param parallel_tool_calls: Whether tool calls can be executed in parallel
+    :param prompt: (Optional) Prompt object with ID, version, and variables
    :param previous_response_id: (Optional) ID of the previous response in a conversation
    :param status: Current status of the response generation
    :param temperature: (Optional) Sampling temperature used for generation
@ -556,6 +598,7 @@ class OpenAIResponseObject(BaseModel):
    output: list[OpenAIResponseOutput]
    parallel_tool_calls: bool = False
    previous_response_id: str | None = None
+    prompt: Prompt | None = None
    status: str
    temperature: float | None = None
    # Default to text format to avoid breaking the loading of old responses