mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
fix: Avoid BadRequestError due to invalid max_tokens (#3667)
This patch ensures if max tokens is not defined, then is set to None instead of 0 when calling openai_chat_completion. This way some providers (like gemini) that cannot handle the `max_tokens = 0` will not fail Issue: #3666
This commit is contained in:
parent
00d8414597
commit
f18b5eb537
171 changed files with 12728 additions and 8 deletions
1
docs/static/deprecated-llama-stack-spec.html
vendored
1
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -3901,7 +3901,6 @@
|
|||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||
},
|
||||
"repetition_penalty": {
|
||||
|
|
|
|||
1
docs/static/deprecated-llama-stack-spec.yaml
vendored
1
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -2862,7 +2862,6 @@ components:
|
|||
description: The sampling strategy.
|
||||
max_tokens:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
The maximum number of tokens that can be generated in the completion.
|
||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||
|
|
|
|||
|
|
@ -2376,7 +2376,6 @@
|
|||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||
},
|
||||
"repetition_penalty": {
|
||||
|
|
|
|||
|
|
@ -1695,7 +1695,6 @@ components:
|
|||
description: The sampling strategy.
|
||||
max_tokens:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
The maximum number of tokens that can be generated in the completion.
|
||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||
|
|
|
|||
1
docs/static/stainless-llama-stack-spec.html
vendored
1
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -15452,7 +15452,6 @@
|
|||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||
},
|
||||
"repetition_penalty": {
|
||||
|
|
|
|||
1
docs/static/stainless-llama-stack-spec.yaml
vendored
1
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -11600,7 +11600,6 @@ components:
|
|||
description: The sampling strategy.
|
||||
max_tokens:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
The maximum number of tokens that can be generated in the completion.
|
||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue