mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-14 07:52:37 +00:00
feat: Add max_output_tokens to Response API
Responses and Completions have a max_output_tokens field. It is currently missing from the create and response object in Responses API. This PR fixes it. fixes: #3562 Signed-off-by: Abhishek Bongale <abhishekbongale@outlook.com>
This commit is contained in:
parent
92219fd8fb
commit
bb58da22a1
14 changed files with 127 additions and 20 deletions
11
docs/static/llama-stack-spec.html
vendored
11
docs/static/llama-stack-spec.html
vendored
|
|
@ -7503,6 +7503,10 @@
|
|||
"type": "string",
|
||||
"description": "(Optional) Truncation strategy applied to the response"
|
||||
},
|
||||
"max_output_tokens": {
|
||||
"type": "integer",
|
||||
"description": "(Optional) Upper bound for response tokens generation"
|
||||
},
|
||||
"input": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
|
@ -8009,6 +8013,9 @@
|
|||
},
|
||||
"max_infer_iters": {
|
||||
"type": "integer"
|
||||
},
|
||||
"max_output_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
@ -8078,6 +8085,10 @@
|
|||
"truncation": {
|
||||
"type": "string",
|
||||
"description": "(Optional) Truncation strategy applied to the response"
|
||||
},
|
||||
"max_output_tokens": {
|
||||
"type": "integer",
|
||||
"description": "(Optional) Upper bound for response tokens generation"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue