feat: Add max_output_tokens to Response API

Responses and Completions have a max_output_tokens field. It is currently
missing from the create and response object in Responses API.

This PR fixes it.

fixes: #3562
Signed-off-by: Abhishek Bongale <abhishekbongale@outlook.com>
This commit is contained in:
Abhishek Bongale 2025-10-06 09:46:09 +01:00
parent 92219fd8fb
commit bb58da22a1
14 changed files with 127 additions and 20 deletions

View file

@ -9096,6 +9096,10 @@
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"max_output_tokens": {
"type": "integer",
"description": "(Optional) Upper bound for response tokens generation"
},
"input": {
"type": "array",
"items": {
@ -9914,6 +9918,9 @@
},
"max_infer_iters": {
"type": "integer"
},
"max_output_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
@ -9983,6 +9990,10 @@
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"max_output_tokens": {
"type": "integer",
"description": "(Optional) Upper bound for response tokens generation"
}
},
"additionalProperties": false,

View file

@ -6740,6 +6740,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
max_output_tokens:
type: integer
description: >-
(Optional) Upper bound for response tokens generation
input:
type: array
items:
@ -7351,6 +7355,8 @@ components:
(Optional) Additional fields to include in the response.
max_infer_iters:
type: integer
max_output_tokens:
type: integer
additionalProperties: false
required:
- input
@ -7414,6 +7420,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
max_output_tokens:
type: integer
description: >-
(Optional) Upper bound for response tokens generation
additionalProperties: false
required:
- created_at

View file

@ -7503,6 +7503,10 @@
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"max_output_tokens": {
"type": "integer",
"description": "(Optional) Upper bound for response tokens generation"
},
"input": {
"type": "array",
"items": {
@ -8009,6 +8013,9 @@
},
"max_infer_iters": {
"type": "integer"
},
"max_output_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
@ -8078,6 +8085,10 @@
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"max_output_tokens": {
"type": "integer",
"description": "(Optional) Upper bound for response tokens generation"
}
},
"additionalProperties": false,

View file

@ -5660,6 +5660,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
max_output_tokens:
type: integer
description: >-
(Optional) Upper bound for response tokens generation
input:
type: array
items:
@ -6014,6 +6018,8 @@ components:
(Optional) Additional fields to include in the response.
max_infer_iters:
type: integer
max_output_tokens:
type: integer
additionalProperties: false
required:
- input
@ -6077,6 +6083,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
max_output_tokens:
type: integer
description: >-
(Optional) Upper bound for response tokens generation
additionalProperties: false
required:
- created_at

View file

@ -9512,6 +9512,10 @@
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"max_output_tokens": {
"type": "integer",
"description": "(Optional) Upper bound for response tokens generation"
},
"input": {
"type": "array",
"items": {
@ -10018,6 +10022,9 @@
},
"max_infer_iters": {
"type": "integer"
},
"max_output_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
@ -10087,6 +10094,10 @@
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"max_output_tokens": {
"type": "integer",
"description": "(Optional) Upper bound for response tokens generation"
}
},
"additionalProperties": false,

View file

@ -7105,6 +7105,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
max_output_tokens:
type: integer
description: >-
(Optional) Upper bound for response tokens generation
input:
type: array
items:
@ -7459,6 +7463,8 @@ components:
(Optional) Additional fields to include in the response.
max_infer_iters:
type: integer
max_output_tokens:
type: integer
additionalProperties: false
required:
- input
@ -7522,6 +7528,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
max_output_tokens:
type: integer
description: >-
(Optional) Upper bound for response tokens generation
additionalProperties: false
required:
- created_at

View file

@ -825,6 +825,7 @@ class Agents(Protocol):
"List of shields to apply during response generation. Shields provide safety and content moderation."
),
] = None,
max_output_tokens: int | None = None,
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
"""Create a new OpenAI response.

View file

@ -363,6 +363,7 @@ class OpenAIResponseObject(BaseModel):
:param text: Text formatting configuration for the response
:param top_p: (Optional) Nucleus sampling parameter used for generation
:param truncation: (Optional) Truncation strategy applied to the response
:param max_output_tokens: (Optional) Upper bound for response tokens generation
"""
created_at: int
@ -380,6 +381,7 @@ class OpenAIResponseObject(BaseModel):
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
top_p: float | None = None
truncation: str | None = None
max_output_tokens: int | None = None
@json_schema_type

View file

@ -204,6 +204,7 @@ class OpenAIResponsesImpl:
store: bool | None = True,
stream: bool | None = False,
temperature: float | None = None,
max_output_tokens: int | None = None,
text: OpenAIResponseText | None = None,
tools: list[OpenAIResponseInputTool] | None = None,
include: list[str] | None = None,
@ -224,6 +225,7 @@ class OpenAIResponsesImpl:
previous_response_id=previous_response_id,
store=store,
temperature=temperature,
max_output_tokens=max_output_tokens,
text=text,
tools=tools,
max_infer_iters=max_infer_iters,
@ -252,6 +254,7 @@ class OpenAIResponsesImpl:
previous_response_id: str | None = None,
store: bool | None = True,
temperature: float | None = None,
max_output_tokens: int | None = None,
text: OpenAIResponseText | None = None,
tools: list[OpenAIResponseInputTool] | None = None,
max_infer_iters: int | None = 10,
@ -268,6 +271,7 @@ class OpenAIResponsesImpl:
messages=messages,
response_tools=tools,
temperature=temperature,
max_tokens=max_output_tokens,
response_format=response_format,
inputs=input,
)

View file

@ -63,6 +63,7 @@ class ChatCompletionContext(BaseModel):
response_format: OpenAIResponseFormatParam
approval_requests: list[OpenAIResponseMCPApprovalRequest] = []
approval_responses: dict[str, OpenAIResponseMCPApprovalResponse] = {}
max_tokens: int | None = None
def __init__(
self,
@ -72,6 +73,7 @@ class ChatCompletionContext(BaseModel):
temperature: float | None,
response_format: OpenAIResponseFormatParam,
inputs: list[OpenAIResponseInput] | str,
max_tokens: int | None = None,
):
super().__init__(
model=model,

View file

@ -297,3 +297,38 @@ def test_function_call_output_response_with_none_arguments(openai_client, client
assert response.output[0].type == "function_call"
assert response.output[0].arguments == "{}"
_ = response.output[0].call_id
def test_response_with_max_output_tokens(compat_client, text_model_id):
"""Test that the `max_output_tokens` parameter is used."""
if not isinstance(compat_client, OpenAI):
pytest.skip("This test requires the OpenAI client.")
response = compat_client.responses.create(
model=text_model_id,
input=[
{
"role": "user",
"content": "what's the current time? You MUST call the `get_current_time` function to find out.",
}
],
max_output_tokens=15,
stream=False,
)
assert response.id is not None
assert response.model == text_model_id
assert hasattr(response, "max_output_tokens")
assert response.max_output_tokens == 15
output_text = ""
for item in response.output:
if item.type == "message" and item.role == "assistant":
if item.content and item.content.type == "text":
output_text = item.content.text
break
assert output_text, "Assistant response content should not be empty"
assert len(output_text.split()) < 30