feat(responses)!: add support for OpenAI compatible Prompts in Responses API

This commit is contained in:
r3v5 2025-09-21 13:52:55 +01:00
parent bd3c473208
commit 59169bfd25
No known key found for this signature in database
GPG key ID: C7611ACB4FECAD54
33 changed files with 1667 additions and 34 deletions

View file

@ -20,15 +20,17 @@ async def get_provider_impl(
from .agents import MetaReferenceAgentsImpl
impl = MetaReferenceAgentsImpl(
config,
deps[Api.inference],
deps[Api.vector_io],
deps[Api.safety],
deps[Api.tool_runtime],
deps[Api.tool_groups],
deps[Api.conversations],
policy,
telemetry_enabled,
config=config,
inference_api=deps[Api.inference],
vector_io_api=deps[Api.vector_io],
safety_api=deps[Api.safety],
tool_runtime_api=deps[Api.tool_runtime],
tool_groups_api=deps[Api.tool_groups],
conversations_api=deps[Api.conversations],
prompts_api=deps[Api.prompts],
files_api=deps[Api.files],
telemetry_enabled=Api.telemetry in deps,
policy=policy,
)
await impl.initialize()
return impl

View file

@ -29,9 +29,10 @@ from llama_stack.apis.agents import (
Turn,
)
from llama_stack.apis.agents.agents import ResponseGuardrail
from llama_stack.apis.agents.openai_responses import OpenAIResponseText
from llama_stack.apis.agents.openai_responses import OpenAIResponsePromptParam, OpenAIResponseText
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.conversations import Conversations
from llama_stack.apis.files import Files
from llama_stack.apis.inference import (
Inference,
ToolConfig,
@ -39,6 +40,7 @@ from llama_stack.apis.inference import (
ToolResponseMessage,
UserMessage,
)
from llama_stack.apis.prompts import Prompts
from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
@ -66,6 +68,8 @@ class MetaReferenceAgentsImpl(Agents):
tool_runtime_api: ToolRuntime,
tool_groups_api: ToolGroups,
conversations_api: Conversations,
prompts_api: Prompts,
files_api: Files,
policy: list[AccessRule],
telemetry_enabled: bool = False,
):
@ -77,7 +81,8 @@ class MetaReferenceAgentsImpl(Agents):
self.tool_groups_api = tool_groups_api
self.conversations_api = conversations_api
self.telemetry_enabled = telemetry_enabled
self.prompts_api = prompts_api
self.files_api = files_api
self.in_memory_store = InmemoryKVStoreImpl()
self.openai_responses_impl: OpenAIResponsesImpl | None = None
self.policy = policy
@ -94,6 +99,8 @@ class MetaReferenceAgentsImpl(Agents):
vector_io_api=self.vector_io_api,
safety_api=self.safety_api,
conversations_api=self.conversations_api,
prompts_api=self.prompts_api,
files_api=self.files_api,
)
async def create_agent(
@ -329,6 +336,7 @@ class MetaReferenceAgentsImpl(Agents):
self,
input: str | list[OpenAIResponseInput],
model: str,
prompt: OpenAIResponsePromptParam | None = None,
instructions: str | None = None,
previous_response_id: str | None = None,
conversation: str | None = None,
@ -344,6 +352,7 @@ class MetaReferenceAgentsImpl(Agents):
return await self.openai_responses_impl.create_openai_response(
input,
model,
prompt,
instructions,
previous_response_id,
conversation,

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import re
import time
import uuid
from collections.abc import AsyncIterator
@ -17,11 +18,14 @@ from llama_stack.apis.agents.openai_responses import (
ListOpenAIResponseObject,
OpenAIDeleteResponseObject,
OpenAIResponseInput,
OpenAIResponseInputMessageContentFile,
OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool,
OpenAIResponseMessage,
OpenAIResponseObject,
OpenAIResponseObjectStream,
OpenAIResponsePromptParam,
OpenAIResponseText,
OpenAIResponseTextFormat,
)
@ -30,11 +34,17 @@ from llama_stack.apis.common.errors import (
)
from llama_stack.apis.conversations import Conversations
from llama_stack.apis.conversations.conversations import ConversationItem
from llama_stack.apis.files import Files
from llama_stack.apis.inference import (
Inference,
OpenAIChatCompletionContentPartParam,
OpenAIChatCompletionContentPartTextParam,
OpenAIMessageParam,
OpenAISystemMessageParam,
OpenAIUserMessageParam,
)
from llama_stack.apis.prompts import Prompts
from llama_stack.apis.prompts.prompts import Prompt
from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
@ -71,6 +81,8 @@ class OpenAIResponsesImpl:
vector_io_api: VectorIO, # VectorIO
safety_api: Safety,
conversations_api: Conversations,
prompts_api: Prompts,
files_api: Files,
):
self.inference_api = inference_api
self.tool_groups_api = tool_groups_api
@ -84,6 +96,8 @@ class OpenAIResponsesImpl:
tool_runtime_api=tool_runtime_api,
vector_io_api=vector_io_api,
)
self.prompts_api = prompts_api
self.files_api = files_api
async def _prepend_previous_response(
self,
@ -123,11 +137,13 @@ class OpenAIResponsesImpl:
# Use stored messages directly and convert only new input
message_adapter = TypeAdapter(list[OpenAIMessageParam])
messages = message_adapter.validate_python(previous_response.messages)
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
new_messages = await convert_response_input_to_chat_messages(
input, previous_messages=messages, files_api=self.files_api
)
messages.extend(new_messages)
else:
# Backward compatibility: reconstruct from inputs
messages = await convert_response_input_to_chat_messages(all_input)
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
tool_context.recover_tools_from_previous_response(previous_response)
elif conversation is not None:
@ -139,7 +155,7 @@ class OpenAIResponsesImpl:
all_input = input
if not conversation_items.data:
# First turn - just convert the new input
messages = await convert_response_input_to_chat_messages(input)
messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
else:
if not stored_messages:
all_input = conversation_items.data
@ -155,14 +171,114 @@ class OpenAIResponsesImpl:
all_input = input
messages = stored_messages or []
new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages)
new_messages = await convert_response_input_to_chat_messages(
all_input, previous_messages=messages, files_api=self.files_api
)
messages.extend(new_messages)
else:
all_input = input
messages = await convert_response_input_to_chat_messages(all_input)
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
return all_input, messages, tool_context
async def _prepend_prompt(
self,
messages: list[OpenAIMessageParam],
prompt_params: OpenAIResponsePromptParam,
) -> Prompt:
"""Prepend prompt template to messages, resolving text/image/file variables.
For text-only prompts: Inserts as system message
For prompts with media: Inserts text as system message + media into first user message
"""
if not prompt_params or not prompt_params.id:
return None
prompt_version = int(prompt_params.version) if prompt_params.version else None
cur_prompt = await self.prompts_api.get_prompt(prompt_params.id, prompt_version)
if not cur_prompt:
return None
cur_prompt_text = cur_prompt.prompt
cur_prompt_variables = cur_prompt.variables
if not prompt_params.variables:
messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
return cur_prompt
# Validate that all provided variables exist in the prompt
for name in prompt_params.variables.keys():
if name not in cur_prompt_variables:
raise ValueError(f"Variable {name} not found in prompt {prompt_params.id}")
# Separate text and media variables
text_substitutions = {}
media_content_parts = []
for name, value in prompt_params.variables.items():
# Text variable found
if isinstance(value, OpenAIResponseInputMessageContentText):
text_substitutions[name] = value.text
# Media variable found
elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
# use existing converter to achieve OpenAI Chat Completion format
from .utils import convert_response_content_to_chat_content
converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
media_content_parts.extend(converted_parts)
# Eg: {{product_photo}} becomes "[Image: product_photo]"
# This gives the model textual context about what media exists in the prompt
var_type = value.type.replace("input_", "").replace("_", " ").title()
text_substitutions[name] = f"[{var_type}: {name}]"
def replace_variable(match: re.Match[str]) -> str:
var_name = match.group(1).strip()
return str(text_substitutions.get(var_name, match.group(0)))
pattern = r"\{\{\s*(\w+)\s*\}\}"
resolved_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)
# Insert system message with resolved text
messages.insert(0, OpenAISystemMessageParam(content=resolved_prompt_text))
# If we have media, prepend to first user message
if media_content_parts:
self._prepend_media_into_first_user_message(messages, media_content_parts)
return cur_prompt
def _prepend_media_into_first_user_message(
self, messages: list[OpenAIMessageParam], media_parts: list[OpenAIChatCompletionContentPartParam]
) -> None:
"""Prepend media content parts into the first user message."""
# Find first user message (skip the system message we just added)
first_user_msg_index = None
for i, message in enumerate(messages):
if isinstance(message, OpenAIUserMessageParam):
first_user_msg_index = i
break
if first_user_msg_index is not None:
user_msg = messages[first_user_msg_index]
# Convert string content to parts if needed, otherwise use existing parts directly
if isinstance(user_msg.content, str):
existing_parts = [OpenAIChatCompletionContentPartTextParam(text=user_msg.content)]
else:
existing_parts = user_msg.content
# Prepend media before user's content
combined_parts = media_parts + existing_parts
messages[first_user_msg_index] = OpenAIUserMessageParam(content=combined_parts, name=user_msg.name)
else:
# No user message exists - append one with just media
messages.append(OpenAIUserMessageParam(content=media_parts))
async def get_openai_response(
self,
response_id: str,
@ -239,6 +355,7 @@ class OpenAIResponsesImpl:
self,
input: str | list[OpenAIResponseInput],
model: str,
prompt: OpenAIResponsePromptParam | None = None,
instructions: str | None = None,
previous_response_id: str | None = None,
conversation: str | None = None,
@ -269,6 +386,7 @@ class OpenAIResponsesImpl:
input=input,
conversation=conversation,
model=model,
prompt=prompt,
instructions=instructions,
previous_response_id=previous_response_id,
store=store,
@ -314,6 +432,7 @@ class OpenAIResponsesImpl:
self,
input: str | list[OpenAIResponseInput],
model: str,
prompt: OpenAIResponsePromptParam | None = None,
instructions: str | None = None,
previous_response_id: str | None = None,
conversation: str | None = None,
@ -332,6 +451,9 @@ class OpenAIResponsesImpl:
if instructions:
messages.insert(0, OpenAISystemMessageParam(content=instructions))
# Prepend reusable prompt (if provided)
prompt_obj = await self._prepend_prompt(messages, prompt)
# Structured outputs
response_format = await convert_response_text_to_chat_response_format(text)
@ -354,6 +476,7 @@ class OpenAIResponsesImpl:
ctx=ctx,
response_id=response_id,
created_at=created_at,
prompt=prompt_obj,
text=text,
max_infer_iters=max_infer_iters,
tool_executor=self.tool_executor,

View file

@ -65,6 +65,7 @@ from llama_stack.apis.inference import (
OpenAIChoice,
OpenAIMessageParam,
)
from llama_stack.apis.prompts.prompts import Prompt
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
from llama_stack.providers.utils.telemetry import tracing
@ -107,6 +108,7 @@ class StreamingResponseOrchestrator:
ctx: ChatCompletionContext,
response_id: str,
created_at: int,
prompt: Prompt | None,
text: OpenAIResponseText,
max_infer_iters: int,
tool_executor, # Will be the tool execution logic from the main class
@ -118,6 +120,7 @@ class StreamingResponseOrchestrator:
self.ctx = ctx
self.response_id = response_id
self.created_at = created_at
self.prompt = prompt
self.text = text
self.max_infer_iters = max_infer_iters
self.tool_executor = tool_executor
@ -175,6 +178,7 @@ class StreamingResponseOrchestrator:
object="response",
status=status,
output=self._clone_outputs(outputs),
prompt=self.prompt,
text=self.text,
tools=self.ctx.available_tools(),
error=error,

View file

@ -5,6 +5,7 @@
# the root directory of this source tree.
import asyncio
import base64
import re
import uuid
@ -14,6 +15,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInput,
OpenAIResponseInputFunctionToolCallOutput,
OpenAIResponseInputMessageContent,
OpenAIResponseInputMessageContentFile,
OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool,
@ -27,6 +29,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageMCPListTools,
OpenAIResponseText,
)
from llama_stack.apis.files import Files
from llama_stack.apis.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletionContentPartImageParam,
@ -36,6 +39,8 @@ from llama_stack.apis.inference import (
OpenAIChatCompletionToolCallFunction,
OpenAIChoice,
OpenAIDeveloperMessageParam,
OpenAIFile,
OpenAIFileFile,
OpenAIImageURL,
OpenAIJSONSchema,
OpenAIMessageParam,
@ -50,6 +55,49 @@ from llama_stack.apis.inference import (
from llama_stack.apis.safety import Safety
async def extract_file_content(file_id: str, files_api: Files) -> bytes:
"""
Retrieve file content directly using the Files API.
:param file_id: The file identifier (e.g., "file-abc123")
:param files_api: Files API instance
:returns: Raw file content as bytes
:raises: ValueError if file cannot be retrieved
"""
try:
response = await files_api.openai_retrieve_file_content(file_id)
if hasattr(response, "body"):
return response.body
elif hasattr(response, "content"):
return response.content
else:
raise AttributeError(f"Response object has no 'body' or 'content' attribute. Type: {type(response)}")
except Exception as e:
raise ValueError(f"Failed to retrieve file content for file_id '{file_id}': {str(e)}") from e
def get_mime_type_from_filename(filename: str | None) -> str:
"""
Determine MIME type from filename extension.
:param filename: The filename to analyze
:returns: MIME type string (defaults to "application/octet-stream" if unknown)
"""
if not filename:
return "application/octet-stream"
filename_lower = filename.lower()
if filename_lower.endswith(".pdf"):
return "application/pdf"
elif filename_lower.endswith((".png", ".jpg", ".jpeg")):
ext = filename_lower.split(".")[-1]
return f"image/{ext.replace('jpg', 'jpeg')}"
elif filename_lower.endswith(".txt"):
return "text/plain"
else:
return "application/octet-stream"
async def convert_chat_choice_to_response_message(
choice: OpenAIChoice,
citation_files: dict[str, str] | None = None,
@ -79,11 +127,15 @@ async def convert_chat_choice_to_response_message(
async def convert_response_content_to_chat_content(
content: (str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]),
files_api: Files,
) -> str | list[OpenAIChatCompletionContentPartParam]:
"""
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
The content schemas of each API look similar, but are not exactly the same.
:param content: The content to convert
:param files_api: Files API for resolving file_id to raw file content (required)
"""
if isinstance(content, str):
return content
@ -95,9 +147,69 @@ async def convert_response_content_to_chat_content(
elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
detail = content_part.detail
if content_part.image_url:
image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
image_url = OpenAIImageURL(url=content_part.image_url, detail=detail)
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
elif content_part.file_id:
file_content = await extract_file_content(content_part.file_id, files_api)
encoded_content = base64.b64encode(file_content).decode("utf-8")
data_url = f"data:image/png;base64,{encoded_content}"
image_url = OpenAIImageURL(url=data_url, detail=detail)
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
else:
raise ValueError(
f"Image content must have either 'image_url' or 'file_id'. "
f"Got image_url={content_part.image_url}, file_id={content_part.file_id}"
)
elif isinstance(content_part, OpenAIResponseInputMessageContentFile):
file_data = getattr(content_part, "file_data", None)
file_id = getattr(content_part, "file_id", None)
file_url = getattr(content_part, "file_url", None)
filename = getattr(content_part, "filename", None)
if not any([file_id, file_data, file_url]):
raise ValueError(
f"File content must have at least one of 'file_id', 'file_data', or 'file_url'. "
f"Got file_id={file_id}, file_data={'<data>' if file_data else None}, file_url={file_url}"
)
resolved_file_data = None
if file_id:
file_content = await extract_file_content(file_id, files_api)
# If filename is not provided, fetch it from the Files API
if not filename:
file_metadata = await files_api.openai_retrieve_file(file_id)
filename = file_metadata.filename
# Determine MIME type and encode as data URL
mime_type = get_mime_type_from_filename(filename)
base64_content = base64.b64encode(file_content).decode("utf-8")
resolved_file_data = f"data:{mime_type};base64,{base64_content}"
elif file_data:
# If file_data provided directly
if file_data.startswith("data:"):
resolved_file_data = file_data
else:
# Raw base64 data, wrap in data URL format
mime_type = get_mime_type_from_filename(filename)
resolved_file_data = f"data:{mime_type};base64,{file_data}"
elif file_url:
resolved_file_data = file_url
converted_parts.append(
OpenAIFile(
file=OpenAIFileFile(
file_data=resolved_file_data,
filename=filename,
)
)
)
elif isinstance(content_part, str):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
else:
@ -110,12 +222,14 @@ async def convert_response_content_to_chat_content(
async def convert_response_input_to_chat_messages(
input: str | list[OpenAIResponseInput],
previous_messages: list[OpenAIMessageParam] | None = None,
files_api: Files | None = None,
) -> list[OpenAIMessageParam]:
"""
Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
:param input: The input to convert
:param previous_messages: Optional previous messages to check for function_call references
:param files_api: Files API for resolving file_id to raw file content (optional, required for file/image content)
"""
messages: list[OpenAIMessageParam] = []
if isinstance(input, list):
@ -173,7 +287,7 @@ async def convert_response_input_to_chat_messages(
# these are handled by the responses impl itself and not pass through to chat completions
pass
else:
content = await convert_response_content_to_chat_content(input_item.content)
content = await convert_response_content_to_chat_content(input_item.content, files_api)
message_type = await get_message_type_by_role(input_item.role)
if message_type is None:
raise ValueError(