From 7968148bca0ebcf3f92d30cf82e0446ef42a5818 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Fri, 21 Mar 2025 11:23:27 -0400 Subject: [PATCH] Add docs Signed-off-by: Yuan Tang --- llama_stack/models/llama/datatypes.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py index 4f5a6e9ef..060472e27 100644 --- a/llama_stack/models/llama/datatypes.py +++ b/llama_stack/models/llama/datatypes.py @@ -197,6 +197,16 @@ SamplingStrategy = register_schema( @json_schema_type class SamplingParams(BaseModel): + """Sampling parameters. + + :param strategy: The sampling strategy. + :param max_tokens: The maximum number of tokens that can be generated in the completion. The token count of + your prompt plus max_tokens cannot exceed the model's context length. + :param repetition_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens + based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + :param stop: Up to 4 sequences where the API will stop generating further tokens. + The returned text will not contain the stop sequence. + """ strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy) max_tokens: Optional[int] = 0