From bdc16ea3924153ca250a6ed52921f75d930fe66b Mon Sep 17 00:00:00 2001 From: r3v5 Date: Sun, 21 Sep 2025 13:52:55 +0100 Subject: [PATCH] feat: add Prompts API to Responses API --- docs/static/deprecated-llama-stack-spec.html | 95 +++++++++++++++++++ docs/static/deprecated-llama-stack-spec.yaml | 78 +++++++++++++++ docs/static/llama-stack-spec.html | 57 +++++++++++ docs/static/llama-stack-spec.yaml | 40 ++++++++ docs/static/stainless-llama-stack-spec.html | 57 +++++++++++ docs/static/stainless-llama-stack-spec.yaml | 40 ++++++++ llama_stack/apis/agents/agents.py | 4 +- llama_stack/apis/agents/openai_responses.py | 17 ++++ llama_stack/core/stack.py | 4 + .../inline/agents/meta_reference/__init__.py | 1 + .../inline/agents/meta_reference/agents.py | 13 ++- .../responses/openai_responses.py | 48 ++++++++++ .../meta_reference/responses/streaming.py | 5 + .../agent/test_meta_reference_agent.py | 3 +- .../meta_reference/test_openai_responses.py | 68 ++++++++++++- 15 files changed, 526 insertions(+), 4 deletions(-) diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 7edfe3f5d..84a4ab491 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -9056,6 +9056,10 @@ "type": "string", "description": "(Optional) ID of the previous response in a conversation" }, + "prompt": { + "$ref": "#/components/schemas/Prompt", + "description": "(Optional) Prompt object with ID, version, and variables" + }, "status": { "type": "string", "description": "Current status of the response generation" @@ -9521,6 +9525,44 @@ "title": "OpenAIResponseText", "description": "Text response configuration for OpenAI responses." }, + "Prompt": { + "type": "object", + "properties": { + "prompt": { + "type": "string", + "description": "The system prompt text with variable placeholders. Variables are only supported when using the Responses API." + }, + "version": { + "type": "integer", + "description": "Version (integer starting at 1, incremented on save)" + }, + "prompt_id": { + "type": "string", + "description": "Unique identifier formatted as 'pmpt_<48-digit-hash>'" + }, + "variables": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of prompt variable names that can be used in the prompt template" + }, + "is_default": { + "type": "boolean", + "default": false, + "description": "Boolean indicating whether this version is the default version for this prompt" + } + }, + "additionalProperties": false, + "required": [ + "version", + "prompt_id", + "variables", + "is_default" + ], + "title": "Prompt", + "description": "A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack." + }, "OpenAIResponseInputTool": { "oneOf": [ { @@ -9824,6 +9866,51 @@ "title": "OpenAIResponseInputToolWebSearch", "description": "Web search tool configuration for OpenAI response inputs." }, + "OpenAIResponsePromptParam": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier of the prompt template" + }, + "variables": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Dictionary of variable names to values for template substitution" + }, + "version": { + "type": "string", + "description": "Version number of the prompt to use (defaults to latest if not specified)" + } + }, + "additionalProperties": false, + "required": [ + "id" + ], + "title": "OpenAIResponsePromptParam", + "description": "Prompt object that is used for OpenAI responses." + }, "CreateOpenaiResponseRequest": { "type": "object", "properties": { @@ -9845,6 +9932,10 @@ "type": "string", "description": "The underlying LLM used for completions." }, + "prompt": { + "$ref": "#/components/schemas/OpenAIResponsePromptParam", + "description": "Prompt object with ID, version, and variables." + }, "instructions": { "type": "string" }, @@ -9929,6 +10020,10 @@ "type": "string", "description": "(Optional) ID of the previous response in a conversation" }, + "prompt": { + "$ref": "#/components/schemas/Prompt", + "description": "(Optional) Prompt object with ID, version, and variables" + }, "status": { "type": "string", "description": "Current status of the response generation" diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index ca832d46b..14e03c105 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -6708,6 +6708,10 @@ components: type: string description: >- (Optional) ID of the previous response in a conversation + prompt: + $ref: '#/components/schemas/Prompt' + description: >- + (Optional) Prompt object with ID, version, and variables status: type: string description: >- @@ -7076,6 +7080,44 @@ components: title: OpenAIResponseText description: >- Text response configuration for OpenAI responses. + Prompt: + type: object + properties: + prompt: + type: string + description: >- + The system prompt text with variable placeholders. Variables are only + supported when using the Responses API. + version: + type: integer + description: >- + Version (integer starting at 1, incremented on save) + prompt_id: + type: string + description: >- + Unique identifier formatted as 'pmpt_<48-digit-hash>' + variables: + type: array + items: + type: string + description: >- + List of prompt variable names that can be used in the prompt template + is_default: + type: boolean + default: false + description: >- + Boolean indicating whether this version is the default version for this + prompt + additionalProperties: false + required: + - version + - prompt_id + - variables + - is_default + title: Prompt + description: >- + A prompt resource representing a stored OpenAI Compatible prompt template + in Llama Stack. OpenAIResponseInputTool: oneOf: - $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch' @@ -7286,6 +7328,34 @@ components: title: OpenAIResponseInputToolWebSearch description: >- Web search tool configuration for OpenAI response inputs. + OpenAIResponsePromptParam: + type: object + properties: + id: + type: string + description: Unique identifier of the prompt template + variables: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Dictionary of variable names to values for template substitution + version: + type: string + description: >- + Version number of the prompt to use (defaults to latest if not specified) + additionalProperties: false + required: + - id + title: OpenAIResponsePromptParam + description: >- + Prompt object that is used for OpenAI responses. CreateOpenaiResponseRequest: type: object properties: @@ -7299,6 +7369,10 @@ components: model: type: string description: The underlying LLM used for completions. + prompt: + $ref: '#/components/schemas/OpenAIResponsePromptParam' + description: >- + Prompt object with ID, version, and variables. instructions: type: string previous_response_id: @@ -7370,6 +7444,10 @@ components: type: string description: >- (Optional) ID of the previous response in a conversation + prompt: + $ref: '#/components/schemas/Prompt' + description: >- + (Optional) Prompt object with ID, version, and variables status: type: string description: >- diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 96e97035f..c168db1d6 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -7463,6 +7463,10 @@ "type": "string", "description": "(Optional) ID of the previous response in a conversation" }, + "prompt": { + "$ref": "#/components/schemas/Prompt", + "description": "(Optional) Prompt object with ID, version, and variables" + }, "status": { "type": "string", "description": "Current status of the response generation" @@ -7919,6 +7923,51 @@ "title": "OpenAIResponseInputToolWebSearch", "description": "Web search tool configuration for OpenAI response inputs." }, + "OpenAIResponsePromptParam": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier of the prompt template" + }, + "variables": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Dictionary of variable names to values for template substitution" + }, + "version": { + "type": "string", + "description": "Version number of the prompt to use (defaults to latest if not specified)" + } + }, + "additionalProperties": false, + "required": [ + "id" + ], + "title": "OpenAIResponsePromptParam", + "description": "Prompt object that is used for OpenAI responses." + }, "CreateOpenaiResponseRequest": { "type": "object", "properties": { @@ -7940,6 +7989,10 @@ "type": "string", "description": "The underlying LLM used for completions." }, + "prompt": { + "$ref": "#/components/schemas/OpenAIResponsePromptParam", + "description": "Prompt object with ID, version, and variables." + }, "instructions": { "type": "string" }, @@ -8024,6 +8077,10 @@ "type": "string", "description": "(Optional) ID of the previous response in a conversation" }, + "prompt": { + "$ref": "#/components/schemas/Prompt", + "description": "(Optional) Prompt object with ID, version, and variables" + }, "status": { "type": "string", "description": "Current status of the response generation" diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index b9e03d614..c069f8451 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -5628,6 +5628,10 @@ components: type: string description: >- (Optional) ID of the previous response in a conversation + prompt: + $ref: '#/components/schemas/Prompt' + description: >- + (Optional) Prompt object with ID, version, and variables status: type: string description: >- @@ -5949,6 +5953,34 @@ components: title: OpenAIResponseInputToolWebSearch description: >- Web search tool configuration for OpenAI response inputs. + OpenAIResponsePromptParam: + type: object + properties: + id: + type: string + description: Unique identifier of the prompt template + variables: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Dictionary of variable names to values for template substitution + version: + type: string + description: >- + Version number of the prompt to use (defaults to latest if not specified) + additionalProperties: false + required: + - id + title: OpenAIResponsePromptParam + description: >- + Prompt object that is used for OpenAI responses. CreateOpenaiResponseRequest: type: object properties: @@ -5962,6 +5994,10 @@ components: model: type: string description: The underlying LLM used for completions. + prompt: + $ref: '#/components/schemas/OpenAIResponsePromptParam' + description: >- + Prompt object with ID, version, and variables. instructions: type: string previous_response_id: @@ -6033,6 +6069,10 @@ components: type: string description: >- (Optional) ID of the previous response in a conversation + prompt: + $ref: '#/components/schemas/Prompt' + description: >- + (Optional) Prompt object with ID, version, and variables status: type: string description: >- diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 7ec48ef74..4b4a73cb2 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -9472,6 +9472,10 @@ "type": "string", "description": "(Optional) ID of the previous response in a conversation" }, + "prompt": { + "$ref": "#/components/schemas/Prompt", + "description": "(Optional) Prompt object with ID, version, and variables" + }, "status": { "type": "string", "description": "Current status of the response generation" @@ -9928,6 +9932,51 @@ "title": "OpenAIResponseInputToolWebSearch", "description": "Web search tool configuration for OpenAI response inputs." }, + "OpenAIResponsePromptParam": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier of the prompt template" + }, + "variables": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Dictionary of variable names to values for template substitution" + }, + "version": { + "type": "string", + "description": "Version number of the prompt to use (defaults to latest if not specified)" + } + }, + "additionalProperties": false, + "required": [ + "id" + ], + "title": "OpenAIResponsePromptParam", + "description": "Prompt object that is used for OpenAI responses." + }, "CreateOpenaiResponseRequest": { "type": "object", "properties": { @@ -9949,6 +9998,10 @@ "type": "string", "description": "The underlying LLM used for completions." }, + "prompt": { + "$ref": "#/components/schemas/OpenAIResponsePromptParam", + "description": "Prompt object with ID, version, and variables." + }, "instructions": { "type": "string" }, @@ -10033,6 +10086,10 @@ "type": "string", "description": "(Optional) ID of the previous response in a conversation" }, + "prompt": { + "$ref": "#/components/schemas/Prompt", + "description": "(Optional) Prompt object with ID, version, and variables" + }, "status": { "type": "string", "description": "Current status of the response generation" diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 3bede159b..4dd82b75c 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -7073,6 +7073,10 @@ components: type: string description: >- (Optional) ID of the previous response in a conversation + prompt: + $ref: '#/components/schemas/Prompt' + description: >- + (Optional) Prompt object with ID, version, and variables status: type: string description: >- @@ -7394,6 +7398,34 @@ components: title: OpenAIResponseInputToolWebSearch description: >- Web search tool configuration for OpenAI response inputs. + OpenAIResponsePromptParam: + type: object + properties: + id: + type: string + description: Unique identifier of the prompt template + variables: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Dictionary of variable names to values for template substitution + version: + type: string + description: >- + Version number of the prompt to use (defaults to latest if not specified) + additionalProperties: false + required: + - id + title: OpenAIResponsePromptParam + description: >- + Prompt object that is used for OpenAI responses. CreateOpenaiResponseRequest: type: object properties: @@ -7407,6 +7439,10 @@ components: model: type: string description: The underlying LLM used for completions. + prompt: + $ref: '#/components/schemas/OpenAIResponsePromptParam' + description: >- + Prompt object with ID, version, and variables. instructions: type: string previous_response_id: @@ -7478,6 +7514,10 @@ components: type: string description: >- (Optional) ID of the previous response in a conversation + prompt: + $ref: '#/components/schemas/Prompt' + description: >- + (Optional) Prompt object with ID, version, and variables status: type: string description: >- diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index 811fe6aa2..ada2a5029 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -38,6 +38,7 @@ from .openai_responses import ( OpenAIResponseInputTool, OpenAIResponseObject, OpenAIResponseObjectStream, + OpenAIResponsePromptParam, OpenAIResponseText, ) @@ -796,6 +797,7 @@ class Agents(Protocol): self, input: str | list[OpenAIResponseInput], model: str, + prompt: OpenAIResponsePromptParam | None = None, instructions: str | None = None, previous_response_id: str | None = None, store: bool | None = True, @@ -807,9 +809,9 @@ class Agents(Protocol): max_infer_iters: int | None = 10, # this is an extension to the OpenAI API ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]: """Create a new OpenAI response. - :param input: Input message(s) to create the response. :param model: The underlying LLM used for completions. + :param prompt: Prompt object with ID, version, and variables. :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses. :param include: (Optional) Additional fields to include in the response. :returns: An OpenAIResponseObject. diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index 0f3511ea3..356bbffb2 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -9,6 +9,7 @@ from typing import Annotated, Any, Literal from pydantic import BaseModel, Field from typing_extensions import TypedDict +from llama_stack.apis.prompts.prompts import Prompt from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions from llama_stack.schema_utils import json_schema_type, register_schema @@ -336,6 +337,20 @@ class OpenAIResponseTextFormat(TypedDict, total=False): strict: bool | None +@json_schema_type +class OpenAIResponsePromptParam(BaseModel): + """Prompt object that is used for OpenAI responses. + + :param id: Unique identifier of the prompt template + :param variables: Dictionary of variable names to values for template substitution + :param version: Version number of the prompt to use (defaults to latest if not specified) + """ + + id: str + variables: dict[str, Any] | None = None + version: str | None = None + + @json_schema_type class OpenAIResponseText(BaseModel): """Text response configuration for OpenAI responses. @@ -357,6 +372,7 @@ class OpenAIResponseObject(BaseModel): :param object: Object type identifier, always "response" :param output: List of generated output items (messages, tool calls, etc.) :param parallel_tool_calls: Whether tool calls can be executed in parallel + :param prompt: (Optional) Prompt object with ID, version, and variables :param previous_response_id: (Optional) ID of the previous response in a conversation :param status: Current status of the response generation :param temperature: (Optional) Sampling temperature used for generation @@ -373,6 +389,7 @@ class OpenAIResponseObject(BaseModel): output: list[OpenAIResponseOutput] parallel_tool_calls: bool = False previous_response_id: str | None = None + prompt: Prompt | None = None status: str temperature: float | None = None # Default to text format to avoid breaking the loading of old responses diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py index d5d55319a..6fdc9c1c3 100644 --- a/llama_stack/core/stack.py +++ b/llama_stack/core/stack.py @@ -321,6 +321,10 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf ) impls[Api.conversations] = conversations_impl + # Set prompts API on agents provider if it exists + if Api.agents in impls and hasattr(impls[Api.agents], "set_prompts_api"): + impls[Api.agents].set_prompts_api(prompts_impl) + class Stack: def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None): diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py index 334c32e15..bfe14aa32 100644 --- a/llama_stack/providers/inline/agents/meta_reference/__init__.py +++ b/llama_stack/providers/inline/agents/meta_reference/__init__.py @@ -21,6 +21,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap deps[Api.safety], deps[Api.tool_runtime], deps[Api.tool_groups], + None, # prompts_api will be set later when available policy, ) await impl.initialize() diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 8bdde86b0..0f789b5c5 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -28,7 +28,7 @@ from llama_stack.apis.agents import ( Session, Turn, ) -from llama_stack.apis.agents.openai_responses import OpenAIResponseText +from llama_stack.apis.agents.openai_responses import OpenAIResponsePromptParam, OpenAIResponseText from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.apis.inference import ( Inference, @@ -37,6 +37,7 @@ from llama_stack.apis.inference import ( ToolResponseMessage, UserMessage, ) +from llama_stack.apis.prompts import Prompts from llama_stack.apis.safety import Safety from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.vector_io import VectorIO @@ -63,6 +64,7 @@ class MetaReferenceAgentsImpl(Agents): safety_api: Safety, tool_runtime_api: ToolRuntime, tool_groups_api: ToolGroups, + prompts_api: Prompts | None, policy: list[AccessRule], ): self.config = config @@ -71,6 +73,7 @@ class MetaReferenceAgentsImpl(Agents): self.safety_api = safety_api self.tool_runtime_api = tool_runtime_api self.tool_groups_api = tool_groups_api + self.prompts_api = prompts_api self.in_memory_store = InmemoryKVStoreImpl() self.openai_responses_impl: OpenAIResponsesImpl | None = None @@ -86,8 +89,14 @@ class MetaReferenceAgentsImpl(Agents): tool_runtime_api=self.tool_runtime_api, responses_store=self.responses_store, vector_io_api=self.vector_io_api, + prompts_api=self.prompts_api, ) + def set_prompts_api(self, prompts_api: Prompts) -> None: + self.prompts_api = prompts_api + if hasattr(self, "openai_responses_impl") and self.openai_responses_impl: + self.openai_responses_impl.prompts_api = prompts_api + async def create_agent( self, agent_config: AgentConfig, @@ -320,6 +329,7 @@ class MetaReferenceAgentsImpl(Agents): self, input: str | list[OpenAIResponseInput], model: str, + prompt: OpenAIResponsePromptParam | None = None, instructions: str | None = None, previous_response_id: str | None = None, store: bool | None = True, @@ -333,6 +343,7 @@ class MetaReferenceAgentsImpl(Agents): return await self.openai_responses_impl.create_openai_response( input, model, + prompt, instructions, previous_response_id, store, diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index 352be3ded..f37f3e22d 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -21,6 +21,7 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseMessage, OpenAIResponseObject, OpenAIResponseObjectStream, + OpenAIResponsePromptParam, OpenAIResponseText, OpenAIResponseTextFormat, ) @@ -29,6 +30,8 @@ from llama_stack.apis.inference import ( OpenAIMessageParam, OpenAISystemMessageParam, ) +from llama_stack.apis.prompts import Prompts +from llama_stack.apis.prompts.prompts import Prompt from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.vector_io import VectorIO from llama_stack.log import get_logger @@ -61,12 +64,14 @@ class OpenAIResponsesImpl: tool_runtime_api: ToolRuntime, responses_store: ResponsesStore, vector_io_api: VectorIO, # VectorIO + prompts_api: Prompts, ): self.inference_api = inference_api self.tool_groups_api = tool_groups_api self.tool_runtime_api = tool_runtime_api self.responses_store = responses_store self.vector_io_api = vector_io_api + self.prompts_api = prompts_api self.tool_executor = ToolExecutor( tool_groups_api=tool_groups_api, tool_runtime_api=tool_runtime_api, @@ -123,6 +128,41 @@ class OpenAIResponsesImpl: if instructions: messages.insert(0, OpenAISystemMessageParam(content=instructions)) + async def _prepend_prompt( + self, messages: list[OpenAIMessageParam], prompt_params: OpenAIResponsePromptParam + ) -> Prompt: + if not prompt_params or not prompt_params.id: + return None + + try: + # Check if prompt exists in Llama Stack and retrieve it + prompt_version = int(prompt_params.version) if prompt_params.version else None + cur_prompt = await self.prompts_api.get_prompt(prompt_params.id, prompt_version) + if cur_prompt and cur_prompt.prompt: + cur_prompt_text = cur_prompt.prompt + cur_prompt_variables = cur_prompt.variables + + final_prompt_text = cur_prompt_text + if prompt_params.variables: + # check if the variables are valid + for name in prompt_params.variables.keys(): + if name not in cur_prompt_variables: + raise ValueError(f"Variable {name} not found in prompt {prompt_params.id}") + + # replace the variables in the prompt text + for name, value in prompt_params.variables.items(): + final_prompt_text = final_prompt_text.replace(f"{{{{ {name} }}}}", str(value)) + + messages.insert(0, OpenAISystemMessageParam(content=final_prompt_text)) + logger.info(f"Prompt {prompt_params.id} found and applied\nFinal prompt text: {final_prompt_text}") + return cur_prompt + + except ValueError: + logger.warning( + f"Prompt {prompt_params.id} with version {prompt_params.version} not found, skipping prompt prepending" + ) + return None + async def get_openai_response( self, response_id: str, @@ -199,6 +239,7 @@ class OpenAIResponsesImpl: self, input: str | list[OpenAIResponseInput], model: str, + prompt: OpenAIResponsePromptParam | None = None, instructions: str | None = None, previous_response_id: str | None = None, store: bool | None = True, @@ -215,6 +256,7 @@ class OpenAIResponsesImpl: stream_gen = self._create_streaming_response( input=input, model=model, + prompt=prompt, instructions=instructions, previous_response_id=previous_response_id, store=store, @@ -243,6 +285,7 @@ class OpenAIResponsesImpl: self, input: str | list[OpenAIResponseInput], model: str, + prompt: OpenAIResponsePromptParam | None = None, instructions: str | None = None, previous_response_id: str | None = None, store: bool | None = True, @@ -253,6 +296,10 @@ class OpenAIResponsesImpl: ) -> AsyncIterator[OpenAIResponseObjectStream]: # Input preprocessing all_input, messages = await self._process_input_with_previous_response(input, previous_response_id) + + # Prepend reusable prompt (if provided) + prompt_obj = await self._prepend_prompt(messages, prompt) + await self._prepend_instructions(messages, instructions) # Structured outputs @@ -276,6 +323,7 @@ class OpenAIResponsesImpl: ctx=ctx, response_id=response_id, created_at=created_at, + prompt=prompt_obj, text=text, max_infer_iters=max_infer_iters, tool_executor=self.tool_executor, diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 0bb524f5c..df47753e9 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -45,6 +45,7 @@ from llama_stack.apis.inference import ( OpenAIChoice, OpenAIMessageParam, ) +from llama_stack.apis.prompts.prompts import Prompt from llama_stack.log import get_logger from .types import ChatCompletionContext, ChatCompletionResult @@ -81,6 +82,7 @@ class StreamingResponseOrchestrator: ctx: ChatCompletionContext, response_id: str, created_at: int, + prompt: Prompt | None, text: OpenAIResponseText, max_infer_iters: int, tool_executor, # Will be the tool execution logic from the main class @@ -89,6 +91,7 @@ class StreamingResponseOrchestrator: self.ctx = ctx self.response_id = response_id self.created_at = created_at + self.prompt = prompt self.text = text self.max_infer_iters = max_infer_iters self.tool_executor = tool_executor @@ -109,6 +112,7 @@ class StreamingResponseOrchestrator: object="response", status="in_progress", output=output_messages.copy(), + prompt=self.prompt, text=self.text, ) @@ -195,6 +199,7 @@ class StreamingResponseOrchestrator: model=self.ctx.model, object="response", status="completed", + prompt=self.prompt, text=self.text, output=output_messages, ) diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py index fdbb2b8e9..beb622eb2 100644 --- a/tests/unit/providers/agent/test_meta_reference_agent.py +++ b/tests/unit/providers/agent/test_meta_reference_agent.py @@ -59,7 +59,8 @@ async def agents_impl(config, mock_apis): mock_apis["safety_api"], mock_apis["tool_runtime_api"], mock_apis["tool_groups_api"], - {}, + None, # prompts_api (will be set later via set_prompts_api if needed) + [], # policy (empty list for tests) ) await impl.initialize() yield impl diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index f2b29c1f7..ca701bc22 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -38,6 +38,7 @@ from llama_stack.apis.inference import ( OpenAIResponseFormatJSONSchema, OpenAIUserMessageParam, ) +from llama_stack.apis.prompts import Prompt from llama_stack.apis.tools.tools import ToolDef, ToolGroups, ToolInvocationResult, ToolRuntime from llama_stack.core.access_control.access_control import default_policy from llama_stack.core.datatypes import ResponsesStoreConfig @@ -82,9 +83,20 @@ def mock_vector_io_api(): return vector_io_api +@pytest.fixture +def mock_prompts_api(): + prompts_api = AsyncMock() + return prompts_api + + @pytest.fixture def openai_responses_impl( - mock_inference_api, mock_tool_groups_api, mock_tool_runtime_api, mock_responses_store, mock_vector_io_api + mock_inference_api, + mock_tool_groups_api, + mock_tool_runtime_api, + mock_responses_store, + mock_vector_io_api, + mock_prompts_api, ): return OpenAIResponsesImpl( inference_api=mock_inference_api, @@ -92,6 +104,7 @@ def openai_responses_impl( tool_runtime_api=mock_tool_runtime_api, responses_store=mock_responses_store, vector_io_api=mock_vector_io_api, + prompts_api=mock_prompts_api, ) @@ -1004,3 +1017,56 @@ async def test_create_openai_response_with_invalid_text_format(openai_responses_ model=model, text=OpenAIResponseText(format={"type": "invalid"}), ) + + +async def test_create_openai_response_with_prompt(openai_responses_impl, mock_inference_api, mock_prompts_api): + """Test creating an OpenAI response with a prompt.""" + input_text = "What is the capital of Ireland?" + model = "meta-llama/Llama-3.1-8B-Instruct" + prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef" + prompt = Prompt( + prompt="You are a helpful {{ area_name }} assistant at {{ company_name }}. Always provide accurate information.", + prompt_id=prompt_id, + version=1, + variables=["area_name", "company_name"], + is_default=True, + ) + + from llama_stack.apis.agents.openai_responses import OpenAIResponsePromptParam + + prompt_params_with_version_1 = OpenAIResponsePromptParam( + id=prompt_id, version="1", variables={"area_name": "geography", "company_name": "Dummy Company"} + ) + + mock_prompts_api.get_prompt.return_value = prompt + mock_inference_api.openai_chat_completion.return_value = fake_stream() + + result = await openai_responses_impl.create_openai_response( + input=input_text, + model=model, + prompt=prompt_params_with_version_1, + ) + + mock_prompts_api.get_prompt.assert_called_with(prompt_id, 1) + mock_inference_api.openai_chat_completion.assert_called() + call_args = mock_inference_api.openai_chat_completion.call_args + sent_messages = call_args.kwargs["messages"] + assert len(sent_messages) == 2 + + system_messages = [msg for msg in sent_messages if msg.role == "system"] + assert len(system_messages) == 1 + assert ( + system_messages[0].content + == "You are a helpful geography assistant at Dummy Company. Always provide accurate information." + ) + + user_messages = [msg for msg in sent_messages if msg.role == "user"] + assert len(user_messages) == 1 + assert user_messages[0].content == input_text + + assert result.model == model + assert result.status == "completed" + assert result.prompt.prompt_id == prompt_id + assert result.prompt.variables == ["area_name", "company_name"] + assert result.prompt.version == 1 + assert result.prompt.prompt == prompt.prompt