mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
Merge 43fb18928b
into 188a56af5c
This commit is contained in:
commit
dba0a47a3f
8 changed files with 3 additions and 7 deletions
1
docs/static/deprecated-llama-stack-spec.html
vendored
1
docs/static/deprecated-llama-stack-spec.html
vendored
|
@ -4218,7 +4218,6 @@
|
||||||
},
|
},
|
||||||
"max_tokens": {
|
"max_tokens": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 0,
|
|
||||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||||
},
|
},
|
||||||
"repetition_penalty": {
|
"repetition_penalty": {
|
||||||
|
|
1
docs/static/deprecated-llama-stack-spec.yaml
vendored
1
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
@ -3068,7 +3068,6 @@ components:
|
||||||
description: The sampling strategy.
|
description: The sampling strategy.
|
||||||
max_tokens:
|
max_tokens:
|
||||||
type: integer
|
type: integer
|
||||||
default: 0
|
|
||||||
description: >-
|
description: >-
|
||||||
The maximum number of tokens that can be generated in the completion.
|
The maximum number of tokens that can be generated in the completion.
|
||||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||||
|
|
|
@ -2713,7 +2713,6 @@
|
||||||
},
|
},
|
||||||
"max_tokens": {
|
"max_tokens": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 0,
|
|
||||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||||
},
|
},
|
||||||
"repetition_penalty": {
|
"repetition_penalty": {
|
||||||
|
|
|
@ -1927,7 +1927,6 @@ components:
|
||||||
description: The sampling strategy.
|
description: The sampling strategy.
|
||||||
max_tokens:
|
max_tokens:
|
||||||
type: integer
|
type: integer
|
||||||
default: 0
|
|
||||||
description: >-
|
description: >-
|
||||||
The maximum number of tokens that can be generated in the completion.
|
The maximum number of tokens that can be generated in the completion.
|
||||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||||
|
|
1
docs/static/stainless-llama-stack-spec.html
vendored
1
docs/static/stainless-llama-stack-spec.html
vendored
|
@ -15437,7 +15437,6 @@
|
||||||
},
|
},
|
||||||
"max_tokens": {
|
"max_tokens": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 0,
|
|
||||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||||
},
|
},
|
||||||
"repetition_penalty": {
|
"repetition_penalty": {
|
||||||
|
|
1
docs/static/stainless-llama-stack-spec.yaml
vendored
1
docs/static/stainless-llama-stack-spec.yaml
vendored
|
@ -11477,7 +11477,6 @@ components:
|
||||||
description: The sampling strategy.
|
description: The sampling strategy.
|
||||||
max_tokens:
|
max_tokens:
|
||||||
type: integer
|
type: integer
|
||||||
default: 0
|
|
||||||
description: >-
|
description: >-
|
||||||
The maximum number of tokens that can be generated in the completion.
|
The maximum number of tokens that can be generated in the completion.
|
||||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||||
|
|
|
@ -96,7 +96,7 @@ class SamplingParams(BaseModel):
|
||||||
|
|
||||||
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
|
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
|
||||||
|
|
||||||
max_tokens: int | None = 0
|
max_tokens: int | None = None
|
||||||
repetition_penalty: float | None = 1.0
|
repetition_penalty: float | None = 1.0
|
||||||
stop: list[str] | None = None
|
stop: list[str] | None = None
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
||||||
"model": text_model_id,
|
"model": text_model_id,
|
||||||
"sampling_params": {
|
"sampling_params": {
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 512,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -88,6 +89,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
|
||||||
"model": text_model_id,
|
"model": text_model_id,
|
||||||
"sampling_params": {
|
"sampling_params": {
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
|
"max_tokens": 512,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue