diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index cd514668f..7aaf19c5d 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -4053,28 +4053,33 @@
"type": "object",
"properties": {
"strategy": {
- "$ref": "#/components/schemas/SamplingStrategy"
+ "$ref": "#/components/schemas/SamplingStrategy",
+ "description": "The sampling strategy."
},
"max_tokens": {
"type": "integer",
- "default": 0
+ "default": 0,
+ "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {
"type": "number",
- "default": 1.0
+ "default": 1.0,
+ "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics."
},
"stop": {
"type": "array",
"items": {
"type": "string"
- }
+ },
+ "description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."
}
},
"additionalProperties": false,
"required": [
"strategy"
],
- "title": "SamplingParams"
+ "title": "SamplingParams",
+ "description": "Sampling parameters."
},
"SamplingStrategy": {
"oneOf": [
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 6c96c3805..d25b9afb3 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -2787,20 +2787,33 @@ components:
properties:
strategy:
$ref: '#/components/schemas/SamplingStrategy'
+ description: The sampling strategy.
max_tokens:
type: integer
default: 0
+ description: >-
+ The maximum number of tokens that can be generated in the completion.
+ The token count of your prompt plus max_tokens cannot exceed the model's
+ context length.
repetition_penalty:
type: number
default: 1.0
+ description: >-
+ Number between -2.0 and 2.0. Positive values penalize new tokens based
+ on whether they appear in the text so far, increasing the model's likelihood
+ to talk about new topics.
stop:
type: array
items:
type: string
+ description: >-
+ Up to 4 sequences where the API will stop generating further tokens. The
+ returned text will not contain the stop sequence.
additionalProperties: false
required:
- strategy
title: SamplingParams
+ description: Sampling parameters.
SamplingStrategy:
oneOf:
- $ref: '#/components/schemas/GreedySamplingStrategy'