fix: remote vLLM tool execution now works when the last chunk contains the call arguments (#2112)

# What does this PR do?
Closes #2111.
Fixes an error causing Llama Stack to just return `<tool_call>` and
complete the turn without actually executing the tool. See the issue
description for more detail.

## Test Plan
1) Ran existing unit tests
2) Added a dedicated test verifying correct behavior in this edge case
3) Ran the code snapshot from #2111
This commit is contained in:
Ilya Kolchinsky 2025-05-14 11:38:00 +02:00 committed by GitHub
parent 1de0dfaab5
commit 43d4447ff0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 87 additions and 7 deletions

View file

@ -168,6 +168,12 @@ async def _process_vllm_chat_completion_stream_response(
log.warning("vLLM failed to generation any completions - check the vLLM server logs for an error.")
continue
choice = chunk.choices[0]
if choice.delta.tool_calls:
tool_call = convert_tool_call(choice.delta.tool_calls[0])
tool_call_buf.tool_name += str(tool_call.tool_name)
tool_call_buf.call_id += tool_call.call_id
# TODO: remove str() when dict type for 'arguments' is no longer allowed
tool_call_buf.arguments += str(tool_call.arguments)
if choice.finish_reason:
args_str = tool_call_buf.arguments
args = None
@ -208,13 +214,7 @@ async def _process_vllm_chat_completion_stream_response(
stop_reason=_convert_to_vllm_finish_reason(choice.finish_reason),
)
)
elif choice.delta.tool_calls:
tool_call = convert_tool_call(choice.delta.tool_calls[0])
tool_call_buf.tool_name += str(tool_call.tool_name)
tool_call_buf.call_id += tool_call.call_id
# TODO: remove str() when dict type for 'arguments' is no longer allowed
tool_call_buf.arguments += str(tool_call.arguments)
else:
elif not choice.delta.tool_calls:
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=event_type,