fix: Avoid BadRequestError due to invalid max_tokens (#3667)

This patch ensures if max tokens is not defined, then is set to None
instead of 0 when calling openai_chat_completion. This way some
providers (like gemini) that cannot handle the `max_tokens = 0` will not
fail

Issue: #3666
This commit is contained in:
Luis Tomas Bolivar 2025-10-27 17:27:21 +01:00 committed by Ashwin Bharambe
parent ddd32b187a
commit f7c2973aa5
171 changed files with 12728 additions and 8 deletions

View file

@ -3901,7 +3901,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {

View file

@ -2862,7 +2862,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's

View file

@ -2376,7 +2376,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {

View file

@ -1695,7 +1695,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's

View file

@ -15485,7 +15485,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {

View file

@ -11700,7 +11700,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's