mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
Fix BadRequestError due to unvalid max_tokens
This patch ensures if max tokens is not defined it is set to None. This avoid some providers to fail, as they don't have protection for it being set to 0 Issue: #3666
This commit is contained in:
parent
4dfbe46954
commit
43fb18928b
8 changed files with 3 additions and 7 deletions
1
docs/static/deprecated-llama-stack-spec.html
vendored
1
docs/static/deprecated-llama-stack-spec.html
vendored
|
@ -4218,7 +4218,6 @@
|
|||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||
},
|
||||
"repetition_penalty": {
|
||||
|
|
1
docs/static/deprecated-llama-stack-spec.yaml
vendored
1
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
@ -3068,7 +3068,6 @@ components:
|
|||
description: The sampling strategy.
|
||||
max_tokens:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
The maximum number of tokens that can be generated in the completion.
|
||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||
|
|
|
@ -2713,7 +2713,6 @@
|
|||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||
},
|
||||
"repetition_penalty": {
|
||||
|
|
|
@ -1927,7 +1927,6 @@ components:
|
|||
description: The sampling strategy.
|
||||
max_tokens:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
The maximum number of tokens that can be generated in the completion.
|
||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||
|
|
1
docs/static/stainless-llama-stack-spec.html
vendored
1
docs/static/stainless-llama-stack-spec.html
vendored
|
@ -14753,7 +14753,6 @@
|
|||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||
},
|
||||
"repetition_penalty": {
|
||||
|
|
1
docs/static/stainless-llama-stack-spec.yaml
vendored
1
docs/static/stainless-llama-stack-spec.yaml
vendored
|
@ -10909,7 +10909,6 @@ components:
|
|||
description: The sampling strategy.
|
||||
max_tokens:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
The maximum number of tokens that can be generated in the completion.
|
||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||
|
|
|
@ -96,7 +96,7 @@ class SamplingParams(BaseModel):
|
|||
|
||||
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
|
||||
|
||||
max_tokens: int | None = 0
|
||||
max_tokens: int | None = None
|
||||
repetition_penalty: float | None = 1.0
|
||||
stop: list[str] | None = None
|
||||
|
||||
|
|
|
@ -55,6 +55,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
|||
"model": text_model_id,
|
||||
"sampling_params": {
|
||||
"temperature": 0.0,
|
||||
"max_tokens": 512,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -88,6 +89,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
|
|||
"model": text_model_id,
|
||||
"sampling_params": {
|
||||
"temperature": 0.0,
|
||||
"max_tokens": 512,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue