mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-02 00:34:44 +00:00
Resolving merge conflicts.
This commit is contained in:
parent
89ed374fe3
commit
63cce5673a
2 changed files with 86 additions and 7 deletions
|
@ -234,6 +234,12 @@ async def _process_vllm_chat_completion_stream_response(
|
||||||
log.warning("vLLM failed to generation any completions - check the vLLM server logs for an error.")
|
log.warning("vLLM failed to generation any completions - check the vLLM server logs for an error.")
|
||||||
return
|
return
|
||||||
choice = chunk.choices[0]
|
choice = chunk.choices[0]
|
||||||
|
if choice.delta.tool_calls:
|
||||||
|
tool_call = convert_tool_call(choice.delta.tool_calls[0])
|
||||||
|
tool_call_buf.tool_name += str(tool_call.tool_name)
|
||||||
|
tool_call_buf.call_id += tool_call.call_id
|
||||||
|
# TODO: remove str() when dict type for 'arguments' is no longer allowed
|
||||||
|
tool_call_buf.arguments += str(tool_call.arguments)
|
||||||
if choice.finish_reason:
|
if choice.finish_reason:
|
||||||
chunks = _process_vllm_chat_completion_end_of_stream(
|
chunks = _process_vllm_chat_completion_end_of_stream(
|
||||||
finish_reason=choice.finish_reason,
|
finish_reason=choice.finish_reason,
|
||||||
|
@ -244,13 +250,7 @@ async def _process_vllm_chat_completion_stream_response(
|
||||||
for c in chunks:
|
for c in chunks:
|
||||||
yield c
|
yield c
|
||||||
end_of_stream_processed = True
|
end_of_stream_processed = True
|
||||||
elif choice.delta.tool_calls:
|
elif not choice.delta.tool_calls:
|
||||||
tool_call = convert_tool_call(choice.delta.tool_calls[0])
|
|
||||||
tool_call_buf.tool_name += str(tool_call.tool_name)
|
|
||||||
tool_call_buf.call_id += tool_call.call_id
|
|
||||||
# TODO: remove str() when dict type for 'arguments' is no longer allowed
|
|
||||||
tool_call_buf.arguments += str(tool_call.arguments)
|
|
||||||
else:
|
|
||||||
yield ChatCompletionResponseStreamChunk(
|
yield ChatCompletionResponseStreamChunk(
|
||||||
event=ChatCompletionResponseEvent(
|
event=ChatCompletionResponseEvent(
|
||||||
event_type=event_type,
|
event_type=event_type,
|
||||||
|
|
|
@ -297,6 +297,85 @@ async def test_get_params_empty_tools(vllm_inference_adapter):
|
||||||
assert "tools" not in params
|
assert "tools" not in params
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_chunk():
|
||||||
|
"""
|
||||||
|
Tests the edge case where the model returns the arguments for the tool call in the same chunk that
|
||||||
|
contains the finish reason (i.e., the last one).
|
||||||
|
We want to make sure the tool call is executed in this case, and the parameters are passed correctly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
mock_tool_name = "mock_tool"
|
||||||
|
mock_tool_arguments = {"arg1": 0, "arg2": 100}
|
||||||
|
mock_tool_arguments_str = json.dumps(mock_tool_arguments)
|
||||||
|
|
||||||
|
async def mock_stream():
|
||||||
|
mock_chunks = [
|
||||||
|
OpenAIChatCompletionChunk(
|
||||||
|
id="chunk-1",
|
||||||
|
created=1,
|
||||||
|
model="foo",
|
||||||
|
object="chat.completion.chunk",
|
||||||
|
choices=[
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": None,
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"id": "mock_id",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": mock_tool_name,
|
||||||
|
"arguments": None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"finish_reason": None,
|
||||||
|
"logprobs": None,
|
||||||
|
"index": 0,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
),
|
||||||
|
OpenAIChatCompletionChunk(
|
||||||
|
id="chunk-1",
|
||||||
|
created=1,
|
||||||
|
model="foo",
|
||||||
|
object="chat.completion.chunk",
|
||||||
|
choices=[
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": None,
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"id": None,
|
||||||
|
"function": {
|
||||||
|
"name": None,
|
||||||
|
"arguments": mock_tool_arguments_str,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"finish_reason": "tool_calls",
|
||||||
|
"logprobs": None,
|
||||||
|
"index": 0,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
for chunk in mock_chunks:
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
|
||||||
|
assert len(chunks) == 2
|
||||||
|
assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete
|
||||||
|
assert chunks[-2].event.delta.type == "tool_call"
|
||||||
|
assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
|
||||||
|
assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
|
async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue