updated with vllm based values

rh-pre-commit.version: 2.3.2
rh-pre-commit.check-secrets: ENABLED
This commit is contained in:
Antony Sallas 2025-10-22 18:20:32 +08:00
parent a701f68bd7
commit 17e74251e2
11 changed files with 551 additions and 102 deletions

View file

@ -98,7 +98,7 @@ class VLLMInferenceAdapter(OpenAIMixin):
params = params.model_copy()
# Apply vLLM-specific defaults
if params.max_tokens is None and self.config.max_tokens:
if (params.max_tokens is None or params.max_tokens == 0) and self.config.max_tokens:
params.max_tokens = self.config.max_tokens
# This is to be consistent with OpenAI API and support vLLM <= v0.6.3