feat: Structured output for Responses API (#2324)

# What does this PR do? This adds the missing `text` parameter to the Responses API that is how users control structured outputs. All we do with that parameter is map it to the corresponding chat completion response_format. ## Test Plan The new unit tests exercise the various permutations allowed for this property, while a couple of new verification tests actually use it for real to verify the model outputs are following the format as expected. Unit tests: `python -m pytest -s -v tests/unit/providers/agents/meta_reference/test_openai_responses.py` Verification tests: ``` llama stack run llama_stack/templates/together/run.yaml pytest -s -vv 'tests/verifications/openai_api/test_responses.py' \ --base-url=http://localhost:8321/v1/openai/v1 \ --model meta-llama/Llama-4-Scout-17B-16E-Instruct ``` Note that the verification tests can only be run with a real Llama Stack server (as opposed to using the library client via `--provider=stack:together`) because the Llama Stack python client is not yet updated to accept this text field. Signed-off-by: Ben Browning <bbrownin@redhat.com>
2025-12-06 02:30:58 +00:00 · 2025-06-03 17:43:00 -04:00 · 2025-06-03 17:43:00 -04:00 · 8bee2954be
commit 8bee2954be
parent c70ca8344f
8 changed files with 323 additions and 2 deletions
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -5118,6 +5118,57 @@ components:
        - type
      title: >-
        OpenAIResponseOutputMessageWebSearchToolCall
+    OpenAIResponseText:
+      type: object
+      properties:
+        format:
+          type: object
+          properties:
+            type:
+              oneOf:
+                - type: string
+                  const: text
+                - type: string
+                  const: json_schema
+                - type: string
+                  const: json_object
+              description: >-
+                Must be "text", "json_schema", or "json_object" to identify the format
+                type
+            name:
+              type: string
+              description: >-
+                The name of the response format. Only used for json_schema.
+            schema:
+              type: object
+              additionalProperties:
+                oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+              description: >-
+                The JSON schema the response should conform to. In a Python SDK, this
+                is often a `pydantic` model. Only used for json_schema.
+            description:
+              type: string
+              description: >-
+                (Optional) A description of the response format. Only used for json_schema.
+            strict:
+              type: boolean
+              description: >-
+                (Optional) Whether to strictly enforce the JSON schema. If true, the
+                response must match the schema exactly. Only used for json_schema.
+          additionalProperties: false
+          required:
+            - type
+          title: OpenAIResponseTextFormat
+          description: >-
+            Configuration for Responses API text format.
+      additionalProperties: false
+      title: OpenAIResponseText
    CreateOpenaiResponseRequest:
      type: object
      properties:
@ -5145,6 +5196,8 @@ components:
          type: boolean
        temperature:
          type: number
+        text:
+          $ref: '#/components/schemas/OpenAIResponseText'
        tools:
          type: array
          items:
@ -5196,6 +5249,8 @@ components:
          type: string
        temperature:
          type: number
+        text:
+          $ref: '#/components/schemas/OpenAIResponseText'
        top_p:
          type: number
        truncation:
@ -5211,6 +5266,7 @@ components:
        - output
        - parallel_tool_calls
        - status
+        - text
      title: OpenAIResponseObject
    OpenAIResponseOutput:
      oneOf:
@ -7288,6 +7344,8 @@ components:
          type: string
        temperature:
          type: number
+        text:
+          $ref: '#/components/schemas/OpenAIResponseText'
        top_p:
          type: number
        truncation:
@ -7307,6 +7365,7 @@ components:
        - output
        - parallel_tool_calls
        - status
+        - text
        - input
      title: OpenAIResponseObjectWithInput
    ListProvidersResponse: