feat: Structured output for Responses API (#2324)

# What does this PR do?

This adds the missing `text` parameter to the Responses API that is how
users control structured outputs. All we do with that parameter is map
it to the corresponding chat completion response_format.

## Test Plan

The new unit tests exercise the various permutations allowed for this
property, while a couple of new verification tests actually use it for
real to verify the model outputs are following the format as expected.

Unit tests:

`python -m pytest -s -v
tests/unit/providers/agents/meta_reference/test_openai_responses.py`

Verification tests:

```
llama stack run llama_stack/templates/together/run.yaml
pytest -s -vv 'tests/verifications/openai_api/test_responses.py' \
  --base-url=http://localhost:8321/v1/openai/v1 \
  --model meta-llama/Llama-4-Scout-17B-16E-Instruct
```

Note that the verification tests can only be run with a real Llama Stack
server (as opposed to using the library client via
`--provider=stack:together`) because the Llama Stack python client is
not yet updated to accept this text field.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-06-03 17:43:00 -04:00 committed by GitHub
parent c70ca8344f
commit 8bee2954be
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 323 additions and 2 deletions

View file

@ -37,6 +37,7 @@ from .openai_responses import (
OpenAIResponseInputTool,
OpenAIResponseObject,
OpenAIResponseObjectStream,
OpenAIResponseText,
)
# TODO: use enum.StrEnum when we drop support for python 3.10
@ -603,6 +604,7 @@ class Agents(Protocol):
store: bool | None = True,
stream: bool | None = False,
temperature: float | None = None,
text: OpenAIResponseText | None = None,
tools: list[OpenAIResponseInputTool] | None = None,
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:

View file

@ -7,6 +7,7 @@
from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field
from typing_extensions import TypedDict
from llama_stack.schema_utils import json_schema_type, register_schema
@ -126,6 +127,32 @@ OpenAIResponseOutput = Annotated[
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
# This has to be a TypedDict because we need a "schema" field and our strong
# typing code in the schema generator doesn't support Pydantic aliases. That also
# means we can't use a discriminator field here, because TypedDicts don't support
# default values which the strong typing code requires for discriminators.
class OpenAIResponseTextFormat(TypedDict, total=False):
"""Configuration for Responses API text format.
:param type: Must be "text", "json_schema", or "json_object" to identify the format type
:param name: The name of the response format. Only used for json_schema.
:param schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. Only used for json_schema.
:param description: (Optional) A description of the response format. Only used for json_schema.
:param strict: (Optional) Whether to strictly enforce the JSON schema. If true, the response must match the schema exactly. Only used for json_schema.
"""
type: Literal["text"] | Literal["json_schema"] | Literal["json_object"]
name: str | None
schema: dict[str, Any] | None
description: str | None
strict: bool | None
@json_schema_type
class OpenAIResponseText(BaseModel):
format: OpenAIResponseTextFormat | None = None
@json_schema_type
class OpenAIResponseObject(BaseModel):
created_at: int
@ -138,6 +165,9 @@ class OpenAIResponseObject(BaseModel):
previous_response_id: str | None = None
status: str
temperature: float | None = None
# Default to text format to avoid breaking the loading of old responses
# before the field was added. New responses will have this set always.
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
top_p: float | None = None
truncation: str | None = None
user: str | None = None