Fix BadRequestError due to unvalid max_tokens

This patch ensures if max tokens is not defined it is set to None.
This avoid some providers to fail, as they don't have protection for
it being set to 0

Issue: #3666
This commit is contained in:
Luis Tomas Bolivar 2025-10-03 17:37:04 +02:00
parent 4dfbe46954
commit 43fb18928b
8 changed files with 3 additions and 7 deletions

View file

@ -4218,7 +4218,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {

View file

@ -3068,7 +3068,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's

View file

@ -2713,7 +2713,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {

View file

@ -1927,7 +1927,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's

View file

@ -14753,7 +14753,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {

View file

@ -10909,7 +10909,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's

View file

@ -96,7 +96,7 @@ class SamplingParams(BaseModel):
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
max_tokens: int | None = 0
max_tokens: int | None = None
repetition_penalty: float | None = 1.0
stop: list[str] | None = None

View file

@ -55,6 +55,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
"model": text_model_id,
"sampling_params": {
"temperature": 0.0,
"max_tokens": 512,
},
},
},
@ -88,6 +89,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
"model": text_model_id,
"sampling_params": {
"temperature": 0.0,
"max_tokens": 512,
},
},
},