diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py index 4f5a6e9ef..060472e27 100644 --- a/llama_stack/models/llama/datatypes.py +++ b/llama_stack/models/llama/datatypes.py @@ -197,6 +197,16 @@ SamplingStrategy = register_schema( @json_schema_type class SamplingParams(BaseModel): + """Sampling parameters. + + :param strategy: The sampling strategy. + :param max_tokens: The maximum number of tokens that can be generated in the completion. The token count of + your prompt plus max_tokens cannot exceed the model's context length. + :param repetition_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens + based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + :param stop: Up to 4 sequences where the API will stop generating further tokens. + The returned text will not contain the stop sequence. + """ strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy) max_tokens: Optional[int] = 0