This commit is contained in:
Luis Tomas Bolivar 2025-10-03 12:12:16 -07:00 committed by GitHub
commit dba0a47a3f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 3 additions and 7 deletions

View file

@ -4218,7 +4218,6 @@
}, },
"max_tokens": { "max_tokens": {
"type": "integer", "type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length." "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
}, },
"repetition_penalty": { "repetition_penalty": {

View file

@ -3068,7 +3068,6 @@ components:
description: The sampling strategy. description: The sampling strategy.
max_tokens: max_tokens:
type: integer type: integer
default: 0
description: >- description: >-
The maximum number of tokens that can be generated in the completion. The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's The token count of your prompt plus max_tokens cannot exceed the model's

View file

@ -2713,7 +2713,6 @@
}, },
"max_tokens": { "max_tokens": {
"type": "integer", "type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length." "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
}, },
"repetition_penalty": { "repetition_penalty": {

View file

@ -1927,7 +1927,6 @@ components:
description: The sampling strategy. description: The sampling strategy.
max_tokens: max_tokens:
type: integer type: integer
default: 0
description: >- description: >-
The maximum number of tokens that can be generated in the completion. The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's The token count of your prompt plus max_tokens cannot exceed the model's

View file

@ -15437,7 +15437,6 @@
}, },
"max_tokens": { "max_tokens": {
"type": "integer", "type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length." "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
}, },
"repetition_penalty": { "repetition_penalty": {

View file

@ -11477,7 +11477,6 @@ components:
description: The sampling strategy. description: The sampling strategy.
max_tokens: max_tokens:
type: integer type: integer
default: 0
description: >- description: >-
The maximum number of tokens that can be generated in the completion. The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's The token count of your prompt plus max_tokens cannot exceed the model's

View file

@ -96,7 +96,7 @@ class SamplingParams(BaseModel):
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy) strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
max_tokens: int | None = 0 max_tokens: int | None = None
repetition_penalty: float | None = 1.0 repetition_penalty: float | None = 1.0
stop: list[str] | None = None stop: list[str] | None = None

View file

@ -55,6 +55,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
"model": text_model_id, "model": text_model_id,
"sampling_params": { "sampling_params": {
"temperature": 0.0, "temperature": 0.0,
"max_tokens": 512,
}, },
}, },
}, },
@ -88,6 +89,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
"model": text_model_id, "model": text_model_id,
"sampling_params": { "sampling_params": {
"temperature": 0.0, "temperature": 0.0,
"max_tokens": 512,
}, },
}, },
}, },