feat(responses)!: introduce OpenAI compatible prompts to Responses API (#3942)

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->
This PR is responsible for making changes to Responses API scheme to
introduce OpenAI compatible prompts there. Change to the API only,
therefore currently no implementation at all. However, the follow up PR
with actual implementation will be submitted after current PR lands.

The need of this functionality was initiated in #3514. 

> Note, #3514 is divided on three separate PRs. Current PR is the second
of three.

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
CI
This commit is contained in:
Ian Miller 2025-10-28 16:31:27 +00:00 committed by GitHub
parent e5ca7e6450
commit 5598f61e12
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 593 additions and 8 deletions

View file

@ -29,7 +29,7 @@ from llama_stack.apis.agents import (
Turn,
)
from llama_stack.apis.agents.agents import ResponseGuardrail
from llama_stack.apis.agents.openai_responses import OpenAIResponseText
from llama_stack.apis.agents.openai_responses import OpenAIResponsePrompt, OpenAIResponseText
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.conversations import Conversations
from llama_stack.apis.inference import (
@ -329,6 +329,7 @@ class MetaReferenceAgentsImpl(Agents):
self,
input: str | list[OpenAIResponseInput],
model: str,
prompt: OpenAIResponsePrompt | None = None,
instructions: str | None = None,
previous_response_id: str | None = None,
conversation: str | None = None,
@ -344,6 +345,7 @@ class MetaReferenceAgentsImpl(Agents):
return await self.openai_responses_impl.create_openai_response(
input,
model,
prompt,
instructions,
previous_response_id,
conversation,

View file

@ -22,6 +22,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseMessage,
OpenAIResponseObject,
OpenAIResponseObjectStream,
OpenAIResponsePrompt,
OpenAIResponseText,
OpenAIResponseTextFormat,
)
@ -239,6 +240,7 @@ class OpenAIResponsesImpl:
self,
input: str | list[OpenAIResponseInput],
model: str,
prompt: OpenAIResponsePrompt | None = None,
instructions: str | None = None,
previous_response_id: str | None = None,
conversation: str | None = None,

View file

@ -49,6 +49,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageMCPCall,
OpenAIResponseOutputMessageMCPListTools,
OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponsePrompt,
OpenAIResponseText,
OpenAIResponseUsage,
OpenAIResponseUsageInputTokensDetails,
@ -113,6 +114,7 @@ class StreamingResponseOrchestrator:
instructions: str,
safety_api,
guardrail_ids: list[str] | None = None,
prompt: OpenAIResponsePrompt | None = None,
):
self.inference_api = inference_api
self.ctx = ctx
@ -123,6 +125,7 @@ class StreamingResponseOrchestrator:
self.tool_executor = tool_executor
self.safety_api = safety_api
self.guardrail_ids = guardrail_ids or []
self.prompt = prompt
self.sequence_number = 0
# Store MCP tool mapping that gets built during tool processing
self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = ctx.tool_context.previous_tools or {}
@ -180,6 +183,7 @@ class StreamingResponseOrchestrator:
error=error,
usage=self.accumulated_usage,
instructions=self.instructions,
prompt=self.prompt,
)
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: