feat: Add max_output_tokens to Response API

OpenAI Responses and Completions have a max_output_tokens field. It is currently missing from the create and response object in Responses API. This PR fixes it. fixes: #3562 Signed-off-by: Abhishek Bongale <abhishekbongale@outlook.com>
2025-12-04 02:03:44 +00:00 · 2025-11-03 10:25:04 +00:00 · 2025-11-03 10:25:04 +00:00 · 97b345b3f8
commit 97b345b3f8
parent 939a2db58f
9 changed files with 59 additions and 0 deletions
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -6164,6 +6164,10 @@ components:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
+        max_output_tokens:
+          type: integer
+          description: >-
+            (Optional) Upper bound for response tokens generation.
        input:
          type: array
          items:
@ -6522,6 +6526,10 @@ components:
            (Optional) Additional fields to include in the response.
        max_infer_iters:
          type: integer
+        max_output_tokens:
+          type: integer
+          description: >-
+            (Optional) Maximum tokens generated in a response.
      additionalProperties: false
      required:
        - input
@ -6603,6 +6611,10 @@ components:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
+        max_output_tokens:
+          type: integer
+          description: >-
+            (Optional) Upper bound for response tokens generation.
      additionalProperties: false
      required:
        - created_at
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -6880,6 +6880,10 @@ components:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
+        max_output_tokens:
+          type: integer
+          description: >-
+            (Optional) Upper bound for response tokens generation.
        input:
          type: array
          items:
@ -7238,6 +7242,10 @@ components:
            (Optional) Additional fields to include in the response.
        max_infer_iters:
          type: integer
+        max_output_tokens:
+          type: integer
+          description: >-
+            (Optional) Maximum tokens generated in a response.
      additionalProperties: false
      required:
        - input
@ -7319,6 +7327,10 @@ components:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
+        max_output_tokens:
+          type: integer
+          description: >-
+            (Optional) Upper bound for response tokens generation.
      additionalProperties: false
      required:
        - created_at