mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-28 19:38:46 +00:00
Merge branch 'main' into add-watsonx-inference-adapter
This commit is contained in:
commit
4b53171139
6 changed files with 79 additions and 4 deletions
|
|
@ -195,11 +195,23 @@ register_schema(SamplingStrategy, name="SamplingStrategy")
|
|||
|
||||
@json_schema_type
|
||||
class SamplingParams(BaseModel):
|
||||
"""Sampling parameters.
|
||||
|
||||
:param strategy: The sampling strategy.
|
||||
:param max_tokens: The maximum number of tokens that can be generated in the completion. The token count of
|
||||
your prompt plus max_tokens cannot exceed the model's context length.
|
||||
:param repetition_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens
|
||||
based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
|
||||
:param stop: Up to 4 sequences where the API will stop generating further tokens.
|
||||
The returned text will not contain the stop sequence.
|
||||
"""
|
||||
|
||||
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
|
||||
|
||||
max_tokens: Optional[int] = 0
|
||||
repetition_penalty: Optional[float] = 1.0
|
||||
additional_params: Optional[dict] = {}
|
||||
stop: Optional[List[str]] = None
|
||||
|
||||
|
||||
class CheckpointQuantizationFormat(Enum):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue