From bdc16ea3924153ca250a6ed52921f75d930fe66b Mon Sep 17 00:00:00 2001
From: r3v5 <milleryan2003@gmail.com>
Date: Sun, 21 Sep 2025 13:52:55 +0100
Subject: [PATCH] feat: add Prompts API to Responses API

---
 docs/static/deprecated-llama-stack-spec.html  | 95 +++++++++++++++++++
 docs/static/deprecated-llama-stack-spec.yaml  | 78 +++++++++++++++
 docs/static/llama-stack-spec.html             | 57 +++++++++++
 docs/static/llama-stack-spec.yaml             | 40 ++++++++
 docs/static/stainless-llama-stack-spec.html   | 57 +++++++++++
 docs/static/stainless-llama-stack-spec.yaml   | 40 ++++++++
 llama_stack/apis/agents/agents.py             |  4 +-
 llama_stack/apis/agents/openai_responses.py   | 17 ++++
 llama_stack/core/stack.py                     |  4 +
 .../inline/agents/meta_reference/__init__.py  |  1 +
 .../inline/agents/meta_reference/agents.py    | 13 ++-
 .../responses/openai_responses.py             | 48 ++++++++++
 .../meta_reference/responses/streaming.py     |  5 +
 .../agent/test_meta_reference_agent.py        |  3 +-
 .../meta_reference/test_openai_responses.py   | 68 ++++++++++++-
 15 files changed, 526 insertions(+), 4 deletions(-)

diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html
index 7edfe3f5d..84a4ab491 100644
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@@ -9056,6 +9056,10 @@
                         "type": "string",
                         "description": "(Optional) ID of the previous response in a conversation"
                     },
+                    "prompt": {
+                        "$ref": "#/components/schemas/Prompt",
+                        "description": "(Optional) Prompt object with ID, version, and variables"
+                    },
                     "status": {
                         "type": "string",
                         "description": "Current status of the response generation"
@@ -9521,6 +9525,44 @@
                 "title": "OpenAIResponseText",
                 "description": "Text response configuration for OpenAI responses."
             },
+            "Prompt": {
+                "type": "object",
+                "properties": {
+                    "prompt": {
+                        "type": "string",
+                        "description": "The system prompt text with variable placeholders. Variables are only supported when using the Responses API."
+                    },
+                    "version": {
+                        "type": "integer",
+                        "description": "Version (integer starting at 1, incremented on save)"
+                    },
+                    "prompt_id": {
+                        "type": "string",
+                        "description": "Unique identifier formatted as 'pmpt_<48-digit-hash>'"
+                    },
+                    "variables": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "List of prompt variable names that can be used in the prompt template"
+                    },
+                    "is_default": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "Boolean indicating whether this version is the default version for this prompt"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "version",
+                    "prompt_id",
+                    "variables",
+                    "is_default"
+                ],
+                "title": "Prompt",
+                "description": "A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack."
+            },
             "OpenAIResponseInputTool": {
                 "oneOf": [
                     {
@@ -9824,6 +9866,51 @@
                 "title": "OpenAIResponseInputToolWebSearch",
                 "description": "Web search tool configuration for OpenAI response inputs."
             },
+            "OpenAIResponsePromptParam": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "Unique identifier of the prompt template"
+                    },
+                    "variables": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Dictionary of variable names to values for template substitution"
+                    },
+                    "version": {
+                        "type": "string",
+                        "description": "Version number of the prompt to use (defaults to latest if not specified)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id"
+                ],
+                "title": "OpenAIResponsePromptParam",
+                "description": "Prompt object that is used for OpenAI responses."
+            },
             "CreateOpenaiResponseRequest": {
                 "type": "object",
                 "properties": {
@@ -9845,6 +9932,10 @@
                         "type": "string",
                         "description": "The underlying LLM used for completions."
                     },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePromptParam",
+                        "description": "Prompt object with ID, version, and variables."
+                    },
                     "instructions": {
                         "type": "string"
                     },
@@ -9929,6 +10020,10 @@
                         "type": "string",
                         "description": "(Optional) ID of the previous response in a conversation"
                     },
+                    "prompt": {
+                        "$ref": "#/components/schemas/Prompt",
+                        "description": "(Optional) Prompt object with ID, version, and variables"
+                    },
                     "status": {
                         "type": "string",
                         "description": "Current status of the response generation"
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index ca832d46b..14e03c105 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -6708,6 +6708,10 @@ components:
           type: string
           description: >-
             (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/Prompt'
+          description: >-
+            (Optional) Prompt object with ID, version, and variables
         status:
           type: string
           description: >-
@@ -7076,6 +7080,44 @@ components:
       title: OpenAIResponseText
       description: >-
         Text response configuration for OpenAI responses.
+    Prompt:
+      type: object
+      properties:
+        prompt:
+          type: string
+          description: >-
+            The system prompt text with variable placeholders. Variables are only
+            supported when using the Responses API.
+        version:
+          type: integer
+          description: >-
+            Version (integer starting at 1, incremented on save)
+        prompt_id:
+          type: string
+          description: >-
+            Unique identifier formatted as 'pmpt_<48-digit-hash>'
+        variables:
+          type: array
+          items:
+            type: string
+          description: >-
+            List of prompt variable names that can be used in the prompt template
+        is_default:
+          type: boolean
+          default: false
+          description: >-
+            Boolean indicating whether this version is the default version for this
+            prompt
+      additionalProperties: false
+      required:
+        - version
+        - prompt_id
+        - variables
+        - is_default
+      title: Prompt
+      description: >-
+        A prompt resource representing a stored OpenAI Compatible prompt template
+        in Llama Stack.
     OpenAIResponseInputTool:
       oneOf:
         - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
@@ -7286,6 +7328,34 @@ components:
       title: OpenAIResponseInputToolWebSearch
       description: >-
         Web search tool configuration for OpenAI response inputs.
+    OpenAIResponsePromptParam:
+      type: object
+      properties:
+        id:
+          type: string
+          description: Unique identifier of the prompt template
+        variables:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Dictionary of variable names to values for template substitution
+        version:
+          type: string
+          description: >-
+            Version number of the prompt to use (defaults to latest if not specified)
+      additionalProperties: false
+      required:
+        - id
+      title: OpenAIResponsePromptParam
+      description: >-
+        Prompt object that is used for OpenAI responses.
     CreateOpenaiResponseRequest:
       type: object
       properties:
@@ -7299,6 +7369,10 @@ components:
         model:
           type: string
           description: The underlying LLM used for completions.
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePromptParam'
+          description: >-
+            Prompt object with ID, version, and variables.
         instructions:
           type: string
         previous_response_id:
@@ -7370,6 +7444,10 @@ components:
           type: string
           description: >-
             (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/Prompt'
+          description: >-
+            (Optional) Prompt object with ID, version, and variables
         status:
           type: string
           description: >-
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 96e97035f..c168db1d6 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -7463,6 +7463,10 @@
                         "type": "string",
                         "description": "(Optional) ID of the previous response in a conversation"
                     },
+                    "prompt": {
+                        "$ref": "#/components/schemas/Prompt",
+                        "description": "(Optional) Prompt object with ID, version, and variables"
+                    },
                     "status": {
                         "type": "string",
                         "description": "Current status of the response generation"
@@ -7919,6 +7923,51 @@
                 "title": "OpenAIResponseInputToolWebSearch",
                 "description": "Web search tool configuration for OpenAI response inputs."
             },
+            "OpenAIResponsePromptParam": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "Unique identifier of the prompt template"
+                    },
+                    "variables": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Dictionary of variable names to values for template substitution"
+                    },
+                    "version": {
+                        "type": "string",
+                        "description": "Version number of the prompt to use (defaults to latest if not specified)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id"
+                ],
+                "title": "OpenAIResponsePromptParam",
+                "description": "Prompt object that is used for OpenAI responses."
+            },
             "CreateOpenaiResponseRequest": {
                 "type": "object",
                 "properties": {
@@ -7940,6 +7989,10 @@
                         "type": "string",
                         "description": "The underlying LLM used for completions."
                     },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePromptParam",
+                        "description": "Prompt object with ID, version, and variables."
+                    },
                     "instructions": {
                         "type": "string"
                     },
@@ -8024,6 +8077,10 @@
                         "type": "string",
                         "description": "(Optional) ID of the previous response in a conversation"
                     },
+                    "prompt": {
+                        "$ref": "#/components/schemas/Prompt",
+                        "description": "(Optional) Prompt object with ID, version, and variables"
+                    },
                     "status": {
                         "type": "string",
                         "description": "Current status of the response generation"
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index b9e03d614..c069f8451 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -5628,6 +5628,10 @@ components:
           type: string
           description: >-
             (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/Prompt'
+          description: >-
+            (Optional) Prompt object with ID, version, and variables
         status:
           type: string
           description: >-
@@ -5949,6 +5953,34 @@ components:
       title: OpenAIResponseInputToolWebSearch
       description: >-
         Web search tool configuration for OpenAI response inputs.
+    OpenAIResponsePromptParam:
+      type: object
+      properties:
+        id:
+          type: string
+          description: Unique identifier of the prompt template
+        variables:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Dictionary of variable names to values for template substitution
+        version:
+          type: string
+          description: >-
+            Version number of the prompt to use (defaults to latest if not specified)
+      additionalProperties: false
+      required:
+        - id
+      title: OpenAIResponsePromptParam
+      description: >-
+        Prompt object that is used for OpenAI responses.
     CreateOpenaiResponseRequest:
       type: object
       properties:
@@ -5962,6 +5994,10 @@ components:
         model:
           type: string
           description: The underlying LLM used for completions.
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePromptParam'
+          description: >-
+            Prompt object with ID, version, and variables.
         instructions:
           type: string
         previous_response_id:
@@ -6033,6 +6069,10 @@ components:
           type: string
           description: >-
             (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/Prompt'
+          description: >-
+            (Optional) Prompt object with ID, version, and variables
         status:
           type: string
           description: >-
diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html
index 7ec48ef74..4b4a73cb2 100644
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@@ -9472,6 +9472,10 @@
                         "type": "string",
                         "description": "(Optional) ID of the previous response in a conversation"
                     },
+                    "prompt": {
+                        "$ref": "#/components/schemas/Prompt",
+                        "description": "(Optional) Prompt object with ID, version, and variables"
+                    },
                     "status": {
                         "type": "string",
                         "description": "Current status of the response generation"
@@ -9928,6 +9932,51 @@
                 "title": "OpenAIResponseInputToolWebSearch",
                 "description": "Web search tool configuration for OpenAI response inputs."
             },
+            "OpenAIResponsePromptParam": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "Unique identifier of the prompt template"
+                    },
+                    "variables": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Dictionary of variable names to values for template substitution"
+                    },
+                    "version": {
+                        "type": "string",
+                        "description": "Version number of the prompt to use (defaults to latest if not specified)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id"
+                ],
+                "title": "OpenAIResponsePromptParam",
+                "description": "Prompt object that is used for OpenAI responses."
+            },
             "CreateOpenaiResponseRequest": {
                 "type": "object",
                 "properties": {
@@ -9949,6 +9998,10 @@
                         "type": "string",
                         "description": "The underlying LLM used for completions."
                     },
+                    "prompt": {
+                        "$ref": "#/components/schemas/OpenAIResponsePromptParam",
+                        "description": "Prompt object with ID, version, and variables."
+                    },
                     "instructions": {
                         "type": "string"
                     },
@@ -10033,6 +10086,10 @@
                         "type": "string",
                         "description": "(Optional) ID of the previous response in a conversation"
                     },
+                    "prompt": {
+                        "$ref": "#/components/schemas/Prompt",
+                        "description": "(Optional) Prompt object with ID, version, and variables"
+                    },
                     "status": {
                         "type": "string",
                         "description": "Current status of the response generation"
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 3bede159b..4dd82b75c 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -7073,6 +7073,10 @@ components:
           type: string
           description: >-
             (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/Prompt'
+          description: >-
+            (Optional) Prompt object with ID, version, and variables
         status:
           type: string
           description: >-
@@ -7394,6 +7398,34 @@ components:
       title: OpenAIResponseInputToolWebSearch
       description: >-
         Web search tool configuration for OpenAI response inputs.
+    OpenAIResponsePromptParam:
+      type: object
+      properties:
+        id:
+          type: string
+          description: Unique identifier of the prompt template
+        variables:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Dictionary of variable names to values for template substitution
+        version:
+          type: string
+          description: >-
+            Version number of the prompt to use (defaults to latest if not specified)
+      additionalProperties: false
+      required:
+        - id
+      title: OpenAIResponsePromptParam
+      description: >-
+        Prompt object that is used for OpenAI responses.
     CreateOpenaiResponseRequest:
       type: object
       properties:
@@ -7407,6 +7439,10 @@ components:
         model:
           type: string
           description: The underlying LLM used for completions.
+        prompt:
+          $ref: '#/components/schemas/OpenAIResponsePromptParam'
+          description: >-
+            Prompt object with ID, version, and variables.
         instructions:
           type: string
         previous_response_id:
@@ -7478,6 +7514,10 @@ components:
           type: string
           description: >-
             (Optional) ID of the previous response in a conversation
+        prompt:
+          $ref: '#/components/schemas/Prompt'
+          description: >-
+            (Optional) Prompt object with ID, version, and variables
         status:
           type: string
           description: >-
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index 811fe6aa2..ada2a5029 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -38,6 +38,7 @@ from .openai_responses import (
     OpenAIResponseInputTool,
     OpenAIResponseObject,
     OpenAIResponseObjectStream,
+    OpenAIResponsePromptParam,
     OpenAIResponseText,
 )
 
@@ -796,6 +797,7 @@ class Agents(Protocol):
         self,
         input: str | list[OpenAIResponseInput],
         model: str,
+        prompt: OpenAIResponsePromptParam | None = None,
         instructions: str | None = None,
         previous_response_id: str | None = None,
         store: bool | None = True,
@@ -807,9 +809,9 @@ class Agents(Protocol):
         max_infer_iters: int | None = 10,  # this is an extension to the OpenAI API
     ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
         """Create a new OpenAI response.
-
         :param input: Input message(s) to create the response.
         :param model: The underlying LLM used for completions.
+        :param prompt: Prompt object with ID, version, and variables.
         :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
         :param include: (Optional) Additional fields to include in the response.
         :returns: An OpenAIResponseObject.
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index 0f3511ea3..356bbffb2 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -9,6 +9,7 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
 
+from llama_stack.apis.prompts.prompts import Prompt
 from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
 from llama_stack.schema_utils import json_schema_type, register_schema
 
@@ -336,6 +337,20 @@ class OpenAIResponseTextFormat(TypedDict, total=False):
     strict: bool | None
 
 
+@json_schema_type
+class OpenAIResponsePromptParam(BaseModel):
+    """Prompt object that is used for OpenAI responses.
+
+    :param id: Unique identifier of the prompt template
+    :param variables: Dictionary of variable names to values for template substitution
+    :param version: Version number of the prompt to use (defaults to latest if not specified)
+    """
+
+    id: str
+    variables: dict[str, Any] | None = None
+    version: str | None = None
+
+
 @json_schema_type
 class OpenAIResponseText(BaseModel):
     """Text response configuration for OpenAI responses.
@@ -357,6 +372,7 @@ class OpenAIResponseObject(BaseModel):
     :param object: Object type identifier, always "response"
     :param output: List of generated output items (messages, tool calls, etc.)
     :param parallel_tool_calls: Whether tool calls can be executed in parallel
+    :param prompt: (Optional) Prompt object with ID, version, and variables
     :param previous_response_id: (Optional) ID of the previous response in a conversation
     :param status: Current status of the response generation
     :param temperature: (Optional) Sampling temperature used for generation
@@ -373,6 +389,7 @@ class OpenAIResponseObject(BaseModel):
     output: list[OpenAIResponseOutput]
     parallel_tool_calls: bool = False
     previous_response_id: str | None = None
+    prompt: Prompt | None = None
     status: str
     temperature: float | None = None
     # Default to text format to avoid breaking the loading of old responses
diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py
index d5d55319a..6fdc9c1c3 100644
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@@ -321,6 +321,10 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
     )
     impls[Api.conversations] = conversations_impl
 
+    # Set prompts API on agents provider if it exists
+    if Api.agents in impls and hasattr(impls[Api.agents], "set_prompts_api"):
+        impls[Api.agents].set_prompts_api(prompts_impl)
+
 
 class Stack:
     def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py
index 334c32e15..bfe14aa32 100644
--- a/llama_stack/providers/inline/agents/meta_reference/__init__.py
+++ b/llama_stack/providers/inline/agents/meta_reference/__init__.py
@@ -21,6 +21,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
         deps[Api.safety],
         deps[Api.tool_runtime],
         deps[Api.tool_groups],
+        None,  # prompts_api will be set later when available
         policy,
     )
     await impl.initialize()
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 8bdde86b0..0f789b5c5 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -28,7 +28,7 @@ from llama_stack.apis.agents import (
     Session,
     Turn,
 )
-from llama_stack.apis.agents.openai_responses import OpenAIResponseText
+from llama_stack.apis.agents.openai_responses import OpenAIResponsePromptParam, OpenAIResponseText
 from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.apis.inference import (
     Inference,
@@ -37,6 +37,7 @@ from llama_stack.apis.inference import (
     ToolResponseMessage,
     UserMessage,
 )
+from llama_stack.apis.prompts import Prompts
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
@@ -63,6 +64,7 @@ class MetaReferenceAgentsImpl(Agents):
         safety_api: Safety,
         tool_runtime_api: ToolRuntime,
         tool_groups_api: ToolGroups,
+        prompts_api: Prompts | None,
         policy: list[AccessRule],
     ):
         self.config = config
@@ -71,6 +73,7 @@ class MetaReferenceAgentsImpl(Agents):
         self.safety_api = safety_api
         self.tool_runtime_api = tool_runtime_api
         self.tool_groups_api = tool_groups_api
+        self.prompts_api = prompts_api
 
         self.in_memory_store = InmemoryKVStoreImpl()
         self.openai_responses_impl: OpenAIResponsesImpl | None = None
@@ -86,8 +89,14 @@ class MetaReferenceAgentsImpl(Agents):
             tool_runtime_api=self.tool_runtime_api,
             responses_store=self.responses_store,
             vector_io_api=self.vector_io_api,
+            prompts_api=self.prompts_api,
         )
 
+    def set_prompts_api(self, prompts_api: Prompts) -> None:
+        self.prompts_api = prompts_api
+        if hasattr(self, "openai_responses_impl") and self.openai_responses_impl:
+            self.openai_responses_impl.prompts_api = prompts_api
+
     async def create_agent(
         self,
         agent_config: AgentConfig,
@@ -320,6 +329,7 @@ class MetaReferenceAgentsImpl(Agents):
         self,
         input: str | list[OpenAIResponseInput],
         model: str,
+        prompt: OpenAIResponsePromptParam | None = None,
         instructions: str | None = None,
         previous_response_id: str | None = None,
         store: bool | None = True,
@@ -333,6 +343,7 @@ class MetaReferenceAgentsImpl(Agents):
         return await self.openai_responses_impl.create_openai_response(
             input,
             model,
+            prompt,
             instructions,
             previous_response_id,
             store,
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 352be3ded..f37f3e22d 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -21,6 +21,7 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseMessage,
     OpenAIResponseObject,
     OpenAIResponseObjectStream,
+    OpenAIResponsePromptParam,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
 )
@@ -29,6 +30,8 @@ from llama_stack.apis.inference import (
     OpenAIMessageParam,
     OpenAISystemMessageParam,
 )
+from llama_stack.apis.prompts import Prompts
+from llama_stack.apis.prompts.prompts import Prompt
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.log import get_logger
@@ -61,12 +64,14 @@ class OpenAIResponsesImpl:
         tool_runtime_api: ToolRuntime,
         responses_store: ResponsesStore,
         vector_io_api: VectorIO,  # VectorIO
+        prompts_api: Prompts,
     ):
         self.inference_api = inference_api
         self.tool_groups_api = tool_groups_api
         self.tool_runtime_api = tool_runtime_api
         self.responses_store = responses_store
         self.vector_io_api = vector_io_api
+        self.prompts_api = prompts_api
         self.tool_executor = ToolExecutor(
             tool_groups_api=tool_groups_api,
             tool_runtime_api=tool_runtime_api,
@@ -123,6 +128,41 @@ class OpenAIResponsesImpl:
         if instructions:
             messages.insert(0, OpenAISystemMessageParam(content=instructions))
 
+    async def _prepend_prompt(
+        self, messages: list[OpenAIMessageParam], prompt_params: OpenAIResponsePromptParam
+    ) -> Prompt:
+        if not prompt_params or not prompt_params.id:
+            return None
+
+        try:
+            # Check if prompt exists in Llama Stack and retrieve it
+            prompt_version = int(prompt_params.version) if prompt_params.version else None
+            cur_prompt = await self.prompts_api.get_prompt(prompt_params.id, prompt_version)
+            if cur_prompt and cur_prompt.prompt:
+                cur_prompt_text = cur_prompt.prompt
+                cur_prompt_variables = cur_prompt.variables
+
+                final_prompt_text = cur_prompt_text
+                if prompt_params.variables:
+                    # check if the variables are valid
+                    for name in prompt_params.variables.keys():
+                        if name not in cur_prompt_variables:
+                            raise ValueError(f"Variable {name} not found in prompt {prompt_params.id}")
+
+                    # replace the variables in the prompt text
+                    for name, value in prompt_params.variables.items():
+                        final_prompt_text = final_prompt_text.replace(f"{{{{ {name} }}}}", str(value))
+
+                messages.insert(0, OpenAISystemMessageParam(content=final_prompt_text))
+                logger.info(f"Prompt {prompt_params.id} found and applied\nFinal prompt text: {final_prompt_text}")
+                return cur_prompt
+
+        except ValueError:
+            logger.warning(
+                f"Prompt {prompt_params.id} with version {prompt_params.version} not found, skipping prompt prepending"
+            )
+            return None
+
     async def get_openai_response(
         self,
         response_id: str,
@@ -199,6 +239,7 @@ class OpenAIResponsesImpl:
         self,
         input: str | list[OpenAIResponseInput],
         model: str,
+        prompt: OpenAIResponsePromptParam | None = None,
         instructions: str | None = None,
         previous_response_id: str | None = None,
         store: bool | None = True,
@@ -215,6 +256,7 @@ class OpenAIResponsesImpl:
         stream_gen = self._create_streaming_response(
             input=input,
             model=model,
+            prompt=prompt,
             instructions=instructions,
             previous_response_id=previous_response_id,
             store=store,
@@ -243,6 +285,7 @@ class OpenAIResponsesImpl:
         self,
         input: str | list[OpenAIResponseInput],
         model: str,
+        prompt: OpenAIResponsePromptParam | None = None,
         instructions: str | None = None,
         previous_response_id: str | None = None,
         store: bool | None = True,
@@ -253,6 +296,10 @@ class OpenAIResponsesImpl:
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         # Input preprocessing
         all_input, messages = await self._process_input_with_previous_response(input, previous_response_id)
+
+        # Prepend reusable prompt (if provided)
+        prompt_obj = await self._prepend_prompt(messages, prompt)
+
         await self._prepend_instructions(messages, instructions)
 
         # Structured outputs
@@ -276,6 +323,7 @@ class OpenAIResponsesImpl:
             ctx=ctx,
             response_id=response_id,
             created_at=created_at,
+            prompt=prompt_obj,
             text=text,
             max_infer_iters=max_infer_iters,
             tool_executor=self.tool_executor,
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 0bb524f5c..df47753e9 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -45,6 +45,7 @@ from llama_stack.apis.inference import (
     OpenAIChoice,
     OpenAIMessageParam,
 )
+from llama_stack.apis.prompts.prompts import Prompt
 from llama_stack.log import get_logger
 
 from .types import ChatCompletionContext, ChatCompletionResult
@@ -81,6 +82,7 @@ class StreamingResponseOrchestrator:
         ctx: ChatCompletionContext,
         response_id: str,
         created_at: int,
+        prompt: Prompt | None,
         text: OpenAIResponseText,
         max_infer_iters: int,
         tool_executor,  # Will be the tool execution logic from the main class
@@ -89,6 +91,7 @@ class StreamingResponseOrchestrator:
         self.ctx = ctx
         self.response_id = response_id
         self.created_at = created_at
+        self.prompt = prompt
         self.text = text
         self.max_infer_iters = max_infer_iters
         self.tool_executor = tool_executor
@@ -109,6 +112,7 @@ class StreamingResponseOrchestrator:
             object="response",
             status="in_progress",
             output=output_messages.copy(),
+            prompt=self.prompt,
             text=self.text,
         )
 
@@ -195,6 +199,7 @@ class StreamingResponseOrchestrator:
             model=self.ctx.model,
             object="response",
             status="completed",
+            prompt=self.prompt,
             text=self.text,
             output=output_messages,
         )
diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py
index fdbb2b8e9..beb622eb2 100644
--- a/tests/unit/providers/agent/test_meta_reference_agent.py
+++ b/tests/unit/providers/agent/test_meta_reference_agent.py
@@ -59,7 +59,8 @@ async def agents_impl(config, mock_apis):
         mock_apis["safety_api"],
         mock_apis["tool_runtime_api"],
         mock_apis["tool_groups_api"],
-        {},
+        None,  # prompts_api (will be set later via set_prompts_api if needed)
+        [],  # policy (empty list for tests)
     )
     await impl.initialize()
     yield impl
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index f2b29c1f7..ca701bc22 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -38,6 +38,7 @@ from llama_stack.apis.inference import (
     OpenAIResponseFormatJSONSchema,
     OpenAIUserMessageParam,
 )
+from llama_stack.apis.prompts import Prompt
 from llama_stack.apis.tools.tools import ToolDef, ToolGroups, ToolInvocationResult, ToolRuntime
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.datatypes import ResponsesStoreConfig
@@ -82,9 +83,20 @@ def mock_vector_io_api():
     return vector_io_api
 
 
+@pytest.fixture
+def mock_prompts_api():
+    prompts_api = AsyncMock()
+    return prompts_api
+
+
 @pytest.fixture
 def openai_responses_impl(
-    mock_inference_api, mock_tool_groups_api, mock_tool_runtime_api, mock_responses_store, mock_vector_io_api
+    mock_inference_api,
+    mock_tool_groups_api,
+    mock_tool_runtime_api,
+    mock_responses_store,
+    mock_vector_io_api,
+    mock_prompts_api,
 ):
     return OpenAIResponsesImpl(
         inference_api=mock_inference_api,
@@ -92,6 +104,7 @@ def openai_responses_impl(
         tool_runtime_api=mock_tool_runtime_api,
         responses_store=mock_responses_store,
         vector_io_api=mock_vector_io_api,
+        prompts_api=mock_prompts_api,
     )
 
 
@@ -1004,3 +1017,56 @@ async def test_create_openai_response_with_invalid_text_format(openai_responses_
             model=model,
             text=OpenAIResponseText(format={"type": "invalid"}),
         )
+
+
+async def test_create_openai_response_with_prompt(openai_responses_impl, mock_inference_api, mock_prompts_api):
+    """Test creating an OpenAI response with a prompt."""
+    input_text = "What is the capital of Ireland?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
+    prompt = Prompt(
+        prompt="You are a helpful {{ area_name }} assistant at {{ company_name }}. Always provide accurate information.",
+        prompt_id=prompt_id,
+        version=1,
+        variables=["area_name", "company_name"],
+        is_default=True,
+    )
+
+    from llama_stack.apis.agents.openai_responses import OpenAIResponsePromptParam
+
+    prompt_params_with_version_1 = OpenAIResponsePromptParam(
+        id=prompt_id, version="1", variables={"area_name": "geography", "company_name": "Dummy Company"}
+    )
+
+    mock_prompts_api.get_prompt.return_value = prompt
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        prompt=prompt_params_with_version_1,
+    )
+
+    mock_prompts_api.get_prompt.assert_called_with(prompt_id, 1)
+    mock_inference_api.openai_chat_completion.assert_called()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    sent_messages = call_args.kwargs["messages"]
+    assert len(sent_messages) == 2
+
+    system_messages = [msg for msg in sent_messages if msg.role == "system"]
+    assert len(system_messages) == 1
+    assert (
+        system_messages[0].content
+        == "You are a helpful geography assistant at Dummy Company. Always provide accurate information."
+    )
+
+    user_messages = [msg for msg in sent_messages if msg.role == "user"]
+    assert len(user_messages) == 1
+    assert user_messages[0].content == input_text
+
+    assert result.model == model
+    assert result.status == "completed"
+    assert result.prompt.prompt_id == prompt_id
+    assert result.prompt.variables == ["area_name", "company_name"]
+    assert result.prompt.version == 1
+    assert result.prompt.prompt == prompt.prompt