From 89ed374fe3601ea6ba29f6824c7a88fe88f19ea1 Mon Sep 17 00:00:00 2001
From: ilya-kolchinsky <ilya.kolchinsky@gmail.com>
Date: Mon, 12 May 2025 20:55:24 +0200
Subject: [PATCH] CR fixes.

---
 .../providers/remote/inference/vllm/vllm.py     | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 30fcf1674..049eb4fcf 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -167,23 +167,16 @@ def _process_vllm_chat_completion_end_of_stream(
     chunks = []
 
     if finish_reason is not None:
-        actual_finish_reason = _convert_to_vllm_finish_reason(finish_reason)
+        stop_reason = _convert_to_vllm_finish_reason(finish_reason)
     else:
-        actual_finish_reason = StopReason.end_of_message
+        stop_reason = StopReason.end_of_message
 
     if tool_call_buf.tool_name:
         # at least one tool call request is received
 
         args_str = tool_call_buf.arguments or "{}"
-        args = {}
-        args_parsed_successfully = True
         try:
             args = json.loads(args_str)
-        except Exception as e:
-            args_parsed_successfully = False
-            log.warning(f"Failed to parse tool call buffer arguments: {args_str} \nError: {e}")
-
-        if args_parsed_successfully:
             chunks.append(
                 ChatCompletionResponseStreamChunk(
                     event=ChatCompletionResponseEvent(
@@ -200,7 +193,9 @@ def _process_vllm_chat_completion_end_of_stream(
                     )
                 )
             )
-        else:
+        except Exception as e:
+            log.warning(f"Failed to parse tool call buffer arguments: {args_str} \nError: {e}")
+
             chunks.append(
                 ChatCompletionResponseStreamChunk(
                     event=ChatCompletionResponseEvent(
@@ -219,7 +214,7 @@ def _process_vllm_chat_completion_end_of_stream(
                 event_type=ChatCompletionResponseEventType.complete,
                 delta=TextDelta(text=last_chunk_content or ""),
                 logprobs=None,
-                stop_reason=actual_finish_reason,
+                stop_reason=stop_reason,
             )
         )
     )