From c64f0d5888b89394ad6a8f390781ab37dd3f0bc6 Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Wed, 26 Feb 2025 15:25:47 -0500
Subject: [PATCH] fix: Get builtin tool calling working in remote-vllm (#1236)

# What does this PR do?

This PR makes a couple of changes required to get the test
`tests/client-sdk/agents/test_agents.py::test_builtin_tool_web_search`
passing on the remote-vllm provider.

First, we adjust agent_instance to also pass in the description and
parameters of builtin tools. We need these parameters so we can pass the
tool's expected parameters into vLLM. The meta-reference implementations
may not have needed these for builtin tools, as they are able to take
advantage of the Llama-model specific support for certain builtin tools.
However, with vLLM, our server-side chat templates for tool calling
treat all tools the same and don't separate out Llama builtin vs custom
tools. So, we need to pass the full set of parameter definitions and
list of required parameters for builtin tools as well.

Next, we adjust the vllm streaming chat completion code to fix up some
edge cases where it was returning an extra ChatCompletionResponseEvent
with an empty ToolCall with empty string call_id, tool_name, and
arguments properties. This is a bug discovered after the above fix,
where after a successful tool invocation we were sending extra chunks
back to the client with these empty ToolCalls.

## Test Plan

With these changes, the following test that previously failed now
passes:

```
VLLM_URL="http://localhost:8000/v1" \
INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" \
LLAMA_STACK_CONFIG=remote-vllm \
python -m pytest -v \
tests/client-sdk/agents/test_agents.py::test_builtin_tool_web_search \
--inference-model "meta-llama/Llama-3.2-3B-Instruct"
```

Additionally, I ran the remote-vllm client-sdk and provider inference
tests as below to ensure they all still passed with this change:

```
VLLM_URL="http://localhost:8000/v1" \
INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" \
LLAMA_STACK_CONFIG=remote-vllm \
python -m pytest -v \
tests/client-sdk/inference/test_text_inference.py \
--inference-model "meta-llama/Llama-3.2-3B-Instruct"
```

```
VLLM_URL="http://localhost:8000/v1" \
python -m pytest -s -v \
llama_stack/providers/tests/inference/test_text_inference.py \
--providers "inference=vllm_remote"
```


[//]: # (## Documentation)

Signed-off-by: Ben Browning <bbrownin@redhat.com>
---
 .../inline/agents/meta_reference/agent_instance.py | 14 +++++++++++++-
 .../providers/remote/inference/vllm/vllm.py        |  4 ++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index 8c10c6baf..4a1421245 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -909,7 +909,19 @@ class ChatAgent(ShieldRunnerMixin):
                     if tool_def_map.get(built_in_type, None):
                         raise ValueError(f"Tool {built_in_type} already exists")
 
-                    tool_def_map[built_in_type] = ToolDefinition(tool_name=built_in_type)
+                    tool_def_map[built_in_type] = ToolDefinition(
+                        tool_name=built_in_type,
+                        description=tool_def.description,
+                        parameters={
+                            param.name: ToolParamDefinition(
+                                param_type=param.parameter_type,
+                                description=param.description,
+                                required=param.required,
+                                default=param.default,
+                            )
+                            for param in tool_def.parameters
+                        },
+                    )
                     tool_to_group[built_in_type] = tool_def.toolgroup_id
                     continue
 
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index b9422d85d..967a3e44d 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -169,7 +169,7 @@ async def _process_vllm_chat_completion_stream_response(
                 args = {} if not args_str else json.loads(args_str)
             except Exception as e:
                 log.warning(f"Failed to parse tool call buffer arguments: {args_str} \nError: {e}")
-            if args is not None:
+            if args:
                 yield ChatCompletionResponseStreamChunk(
                     event=ChatCompletionResponseEvent(
                         event_type=event_type,
@@ -183,7 +183,7 @@ async def _process_vllm_chat_completion_stream_response(
                         ),
                     )
                 )
-            else:
+            elif args_str:
                 yield ChatCompletionResponseStreamChunk(
                     event=ChatCompletionResponseEvent(
                         event_type=ChatCompletionResponseEventType.progress,