feat: add Prompts API to Responses API

2025-10-04 12:07:34 +00:00 · 2025-09-21 13:52:55 +01:00 · 2025-09-21 13:52:55 +01:00 · bdc16ea392
commit bdc16ea392
parent 9f6c658f2a
15 changed files with 526 additions and 4 deletions
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -38,6 +38,7 @@ from .openai_responses import (
    OpenAIResponseInputTool,
    OpenAIResponseObject,
    OpenAIResponseObjectStream,
+    OpenAIResponsePromptParam,
    OpenAIResponseText,
 )

@ -796,6 +797,7 @@ class Agents(Protocol):
        self,
        input: str | list[OpenAIResponseInput],
        model: str,
+        prompt: OpenAIResponsePromptParam | None = None,
        instructions: str | None = None,
        previous_response_id: str | None = None,
        store: bool | None = True,
@ -807,9 +809,9 @@ class Agents(Protocol):
        max_infer_iters: int | None = 10,  # this is an extension to the OpenAI API
    ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
        """Create a new OpenAI response.
-
        :param input: Input message(s) to create the response.
        :param model: The underlying LLM used for completions.
+        :param prompt: Prompt object with ID, version, and variables.
        :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
        :param include: (Optional) Additional fields to include in the response.
        :returns: An OpenAIResponseObject.
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -9,6 +9,7 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict

+from llama_stack.apis.prompts.prompts import Prompt
 from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
 from llama_stack.schema_utils import json_schema_type, register_schema

@ -336,6 +337,20 @@ class OpenAIResponseTextFormat(TypedDict, total=False):
    strict: bool | None


+@json_schema_type
+class OpenAIResponsePromptParam(BaseModel):
+    """Prompt object that is used for OpenAI responses.
+
+    :param id: Unique identifier of the prompt template
+    :param variables: Dictionary of variable names to values for template substitution
+    :param version: Version number of the prompt to use (defaults to latest if not specified)
+    """
+
+    id: str
+    variables: dict[str, Any] | None = None
+    version: str | None = None
+
+
@json_schema_type
 class OpenAIResponseText(BaseModel):
    """Text response configuration for OpenAI responses.
@ -357,6 +372,7 @@ class OpenAIResponseObject(BaseModel):
    :param object: Object type identifier, always "response"
    :param output: List of generated output items (messages, tool calls, etc.)
    :param parallel_tool_calls: Whether tool calls can be executed in parallel
+    :param prompt: (Optional) Prompt object with ID, version, and variables
    :param previous_response_id: (Optional) ID of the previous response in a conversation
    :param status: Current status of the response generation
    :param temperature: (Optional) Sampling temperature used for generation
@ -373,6 +389,7 @@ class OpenAIResponseObject(BaseModel):
    output: list[OpenAIResponseOutput]
    parallel_tool_calls: bool = False
    previous_response_id: str | None = None
+    prompt: Prompt | None = None
    status: str
    temperature: float | None = None
    # Default to text format to avoid breaking the loading of old responses