mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 04:00:42 +00:00
updated with vllm based values
rh-pre-commit.version: 2.3.2 rh-pre-commit.check-secrets: ENABLED
This commit is contained in:
parent
a701f68bd7
commit
17e74251e2
11 changed files with 551 additions and 102 deletions
|
|
@ -98,7 +98,7 @@ class VLLMInferenceAdapter(OpenAIMixin):
|
|||
params = params.model_copy()
|
||||
|
||||
# Apply vLLM-specific defaults
|
||||
if params.max_tokens is None and self.config.max_tokens:
|
||||
if (params.max_tokens is None or params.max_tokens == 0) and self.config.max_tokens:
|
||||
params.max_tokens = self.config.max_tokens
|
||||
|
||||
# This is to be consistent with OpenAI API and support vLLM <= v0.6.3
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue