feat: Structured output for Responses API

This adds the missing `text` parameter to the Responses API that is
how users control structured outputs. All we do with that parameter is
map it to the corresponding chat completion response_format.

The unit tests exercise the various permutations allowed for this
property, while a couple of new verification tests actually use it for
real to verify the model outputs are following the format as expected.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-05-31 13:44:20 -04:00
parent 4540c9b3e5
commit badf8594d1
8 changed files with 323 additions and 2 deletions

View file

@ -29,6 +29,7 @@ from llama_stack.apis.agents import (
Session,
Turn,
)
from llama_stack.apis.agents.openai_responses import OpenAIResponseText
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.inference import (
Inference,
@ -324,11 +325,12 @@ class MetaReferenceAgentsImpl(Agents):
store: bool | None = True,
stream: bool | None = False,
temperature: float | None = None,
text: OpenAIResponseText | None = None,
tools: list[OpenAIResponseInputTool] | None = None,
max_infer_iters: int | None = 10,
) -> OpenAIResponseObject:
return await self.openai_responses_impl.create_openai_response(
input, model, instructions, previous_response_id, store, stream, temperature, tools, max_infer_iters
input, model, instructions, previous_response_id, store, stream, temperature, text, tools, max_infer_iters
)
async def list_openai_responses(