updated with vllm based values

rh-pre-commit.version: 2.3.2 rh-pre-commit.check-secrets: ENABLED
2025-12-12 04:00:42 +00:00 · 2025-10-22 18:20:32 +08:00 · 2025-10-22 18:20:32 +08:00 · 17e74251e2
commit 17e74251e2
parent a701f68bd7
11 changed files with 551 additions and 102 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -98,7 +98,7 @@ class VLLMInferenceAdapter(OpenAIMixin):
        params = params.model_copy()

        # Apply vLLM-specific defaults
-        if params.max_tokens is None and self.config.max_tokens:
+        if (params.max_tokens is None or params.max_tokens == 0) and self.config.max_tokens:
            params.max_tokens = self.config.max_tokens

        # This is to be consistent with OpenAI API and support vLLM <= v0.6.3