forked from phoenix-oss/llama-stack-mirror
fix: Fixed an "out of token budget" error when attempting a tool call via remote vLLM provider (#2114)
# What does this PR do? Closes #2113. Closes #1783. Fixes a bug in handling the end of tool execution request stream where no `finish_reason` is provided by the model. ## Test Plan 1. Ran existing unit tests 2. Added a dedicated test verifying correct behavior in this edge case 3. Ran the code snapshot from #2113 [//]: # (## Documentation)
This commit is contained in:
parent
268725868e
commit
5052c3cbf3
2 changed files with 184 additions and 35 deletions
|
@ -374,3 +374,105 @@ async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_
|
|||
assert chunks[-2].event.delta.type == "tool_call"
|
||||
assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
|
||||
assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
|
||||
"""
|
||||
Tests the edge case where the model requests a tool call and stays idle without explicitly providing the
|
||||
finish reason.
|
||||
We want to make sure that this case is recognized and handled correctly, i.e., as a valid end of message.
|
||||
"""
|
||||
|
||||
mock_tool_name = "mock_tool"
|
||||
mock_tool_arguments = {"arg1": 0, "arg2": 100}
|
||||
mock_tool_arguments_str = '"{\\"arg1\\": 0, \\"arg2\\": 100}"'
|
||||
|
||||
async def mock_stream():
|
||||
mock_chunks = [
|
||||
OpenAIChatCompletionChunk(
|
||||
id="chunk-1",
|
||||
created=1,
|
||||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
{
|
||||
"delta": {
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "mock_id",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": mock_tool_name,
|
||||
"arguments": mock_tool_arguments_str,
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
"finish_reason": None,
|
||||
"logprobs": None,
|
||||
"index": 0,
|
||||
}
|
||||
],
|
||||
),
|
||||
]
|
||||
for chunk in mock_chunks:
|
||||
yield chunk
|
||||
|
||||
chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
|
||||
assert len(chunks) == 2
|
||||
assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete
|
||||
assert chunks[-2].event.delta.type == "tool_call"
|
||||
assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
|
||||
assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_vllm_chat_completion_stream_response_tool_without_args():
|
||||
"""
|
||||
Tests the edge case where no arguments are provided for the tool call.
|
||||
Tool calls with no arguments should be treated as regular tool calls, which was not the case until now.
|
||||
"""
|
||||
mock_tool_name = "mock_tool"
|
||||
|
||||
async def mock_stream():
|
||||
mock_chunks = [
|
||||
OpenAIChatCompletionChunk(
|
||||
id="chunk-1",
|
||||
created=1,
|
||||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
{
|
||||
"delta": {
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "mock_id",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": mock_tool_name,
|
||||
"arguments": "",
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
"finish_reason": None,
|
||||
"logprobs": None,
|
||||
"index": 0,
|
||||
}
|
||||
],
|
||||
),
|
||||
]
|
||||
for chunk in mock_chunks:
|
||||
yield chunk
|
||||
|
||||
chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
|
||||
assert len(chunks) == 2
|
||||
assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete
|
||||
assert chunks[-2].event.delta.type == "tool_call"
|
||||
assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
|
||||
assert chunks[-2].event.delta.tool_call.arguments == {}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue