mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
feat: Structured output for Responses API (#2324)
# What does this PR do? This adds the missing `text` parameter to the Responses API that is how users control structured outputs. All we do with that parameter is map it to the corresponding chat completion response_format. ## Test Plan The new unit tests exercise the various permutations allowed for this property, while a couple of new verification tests actually use it for real to verify the model outputs are following the format as expected. Unit tests: `python -m pytest -s -v tests/unit/providers/agents/meta_reference/test_openai_responses.py` Verification tests: ``` llama stack run llama_stack/templates/together/run.yaml pytest -s -vv 'tests/verifications/openai_api/test_responses.py' \ --base-url=http://localhost:8321/v1/openai/v1 \ --model meta-llama/Llama-4-Scout-17B-16E-Instruct ``` Note that the verification tests can only be run with a real Llama Stack server (as opposed to using the library client via `--provider=stack:together`) because the Llama Stack python client is not yet updated to accept this text field. Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
c70ca8344f
commit
8bee2954be
8 changed files with 323 additions and 2 deletions
86
docs/_static/llama-stack-spec.html
vendored
86
docs/_static/llama-stack-spec.html
vendored
|
@ -7241,6 +7241,79 @@
|
|||
],
|
||||
"title": "OpenAIResponseOutputMessageWebSearchToolCall"
|
||||
},
|
||||
"OpenAIResponseText": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"format": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"const": "text"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "json_schema"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "json_object"
|
||||
}
|
||||
],
|
||||
"description": "Must be \"text\", \"json_schema\", or \"json_object\" to identify the format type"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "The name of the response format. Only used for json_schema."
|
||||
},
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. Only used for json_schema."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "(Optional) A description of the response format. Only used for json_schema."
|
||||
},
|
||||
"strict": {
|
||||
"type": "boolean",
|
||||
"description": "(Optional) Whether to strictly enforce the JSON schema. If true, the response must match the schema exactly. Only used for json_schema."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseTextFormat",
|
||||
"description": "Configuration for Responses API text format."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "OpenAIResponseText"
|
||||
},
|
||||
"CreateOpenaiResponseRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -7278,6 +7351,9 @@
|
|||
"temperature": {
|
||||
"type": "number"
|
||||
},
|
||||
"text": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseText"
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
@ -7351,6 +7427,9 @@
|
|||
"temperature": {
|
||||
"type": "number"
|
||||
},
|
||||
"text": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseText"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
},
|
||||
|
@ -7369,7 +7448,8 @@
|
|||
"object",
|
||||
"output",
|
||||
"parallel_tool_calls",
|
||||
"status"
|
||||
"status",
|
||||
"text"
|
||||
],
|
||||
"title": "OpenAIResponseObject"
|
||||
},
|
||||
|
@ -10406,6 +10486,9 @@
|
|||
"temperature": {
|
||||
"type": "number"
|
||||
},
|
||||
"text": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseText"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
},
|
||||
|
@ -10431,6 +10514,7 @@
|
|||
"output",
|
||||
"parallel_tool_calls",
|
||||
"status",
|
||||
"text",
|
||||
"input"
|
||||
],
|
||||
"title": "OpenAIResponseObjectWithInput"
|
||||
|
|
59
docs/_static/llama-stack-spec.yaml
vendored
59
docs/_static/llama-stack-spec.yaml
vendored
|
@ -5118,6 +5118,57 @@ components:
|
|||
- type
|
||||
title: >-
|
||||
OpenAIResponseOutputMessageWebSearchToolCall
|
||||
OpenAIResponseText:
|
||||
type: object
|
||||
properties:
|
||||
format:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
oneOf:
|
||||
- type: string
|
||||
const: text
|
||||
- type: string
|
||||
const: json_schema
|
||||
- type: string
|
||||
const: json_object
|
||||
description: >-
|
||||
Must be "text", "json_schema", or "json_object" to identify the format
|
||||
type
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
The name of the response format. Only used for json_schema.
|
||||
schema:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The JSON schema the response should conform to. In a Python SDK, this
|
||||
is often a `pydantic` model. Only used for json_schema.
|
||||
description:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) A description of the response format. Only used for json_schema.
|
||||
strict:
|
||||
type: boolean
|
||||
description: >-
|
||||
(Optional) Whether to strictly enforce the JSON schema. If true, the
|
||||
response must match the schema exactly. Only used for json_schema.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: OpenAIResponseTextFormat
|
||||
description: >-
|
||||
Configuration for Responses API text format.
|
||||
additionalProperties: false
|
||||
title: OpenAIResponseText
|
||||
CreateOpenaiResponseRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5145,6 +5196,8 @@ components:
|
|||
type: boolean
|
||||
temperature:
|
||||
type: number
|
||||
text:
|
||||
$ref: '#/components/schemas/OpenAIResponseText'
|
||||
tools:
|
||||
type: array
|
||||
items:
|
||||
|
@ -5196,6 +5249,8 @@ components:
|
|||
type: string
|
||||
temperature:
|
||||
type: number
|
||||
text:
|
||||
$ref: '#/components/schemas/OpenAIResponseText'
|
||||
top_p:
|
||||
type: number
|
||||
truncation:
|
||||
|
@ -5211,6 +5266,7 @@ components:
|
|||
- output
|
||||
- parallel_tool_calls
|
||||
- status
|
||||
- text
|
||||
title: OpenAIResponseObject
|
||||
OpenAIResponseOutput:
|
||||
oneOf:
|
||||
|
@ -7288,6 +7344,8 @@ components:
|
|||
type: string
|
||||
temperature:
|
||||
type: number
|
||||
text:
|
||||
$ref: '#/components/schemas/OpenAIResponseText'
|
||||
top_p:
|
||||
type: number
|
||||
truncation:
|
||||
|
@ -7307,6 +7365,7 @@ components:
|
|||
- output
|
||||
- parallel_tool_calls
|
||||
- status
|
||||
- text
|
||||
- input
|
||||
title: OpenAIResponseObjectWithInput
|
||||
ListProvidersResponse:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue