fix spec

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
2025-12-31 03:43:53 +00:00 · 2025-03-21 11:30:57 -04:00 · 2025-03-21 11:30:57 -04:00 · 9032544b2e
commit 9032544b2e
parent 98274574f0
2 changed files with 23 additions and 5 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -4053,28 +4053,33 @@
                "type": "object",
                "properties": {
                    "strategy": {
-                        "$ref": "#/components/schemas/SamplingStrategy"
+                        "$ref": "#/components/schemas/SamplingStrategy",
                        "description": "The sampling strategy."
                    },
                    "max_tokens": {
                        "type": "integer",
-                        "default": 0
+                        "default": 0,
                        "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
                    },
                    "repetition_penalty": {
                        "type": "number",
-                        "default": 1.0
+                        "default": 1.0,
                        "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics."
                    },
                    "stop": {
                        "type": "array",
                        "items": {
                            "type": "string"
-                        }
+                        },
                        "description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "strategy"
                ],
-                "title": "SamplingParams"
+                "title": "SamplingParams",
                "description": "Sampling parameters."
            },
            "SamplingStrategy": {
                "oneOf": [
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -2787,20 +2787,33 @@ components:
      properties:
        strategy:
          $ref: '#/components/schemas/SamplingStrategy'
          description: The sampling strategy.
        max_tokens:
          type: integer
          default: 0
          description: >-
            The maximum number of tokens that can be generated in the completion.
            The token count of your prompt plus max_tokens cannot exceed the model's
            context length.
        repetition_penalty:
          type: number
          default: 1.0
          description: >-
            Number between -2.0 and 2.0. Positive values penalize new tokens based
            on whether they appear in the text so far, increasing the model's likelihood
            to talk about new topics.
        stop:
          type: array
          items:
            type: string
          description: >-
            Up to 4 sequences where the API will stop generating further tokens. The
            returned text will not contain the stop sequence.
      additionalProperties: false
      required:
        - strategy
      title: SamplingParams
      description: Sampling parameters.
    SamplingStrategy:
      oneOf:
        - $ref: '#/components/schemas/GreedySamplingStrategy'