Merge branch 'main' into add-watsonx-inference-adapter

2025-12-30 01:24:16 +00:00 · 2025-04-17 10:43:38 +05:30 · 2025-04-17 10:43:38 +05:30 · 34a3f1a749
commit 34a3f1a749
parent 35dd1c27d3 b44f84ce18
12 changed files with 237 additions and 18 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -374,7 +374,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
            options["max_tokens"] = self.config.max_tokens

        input_dict: dict[str, Any] = {}
-        if isinstance(request, ChatCompletionRequest) and request.tools is not None:
+        # Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM.
+        if isinstance(request, ChatCompletionRequest) and request.tools:
            input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}

        if isinstance(request, ChatCompletionRequest):