From 34366f0b0108d0b9d09818439e060eac422f2f70 Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Mon, 10 Feb 2025 21:54:36 -0500
Subject: [PATCH] Make utils non-public

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
---
 .../providers/remote/inference/vllm/vllm.py        | 14 ++++++++------
 .../providers/utils/inference/openai_compat.py     |  2 +-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 924aa0e7d..8618abccf 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -69,7 +69,7 @@ def build_model_aliases():
     ]
 
 
-def convert_to_vllm_tool_calls_in_response(
+def _convert_to_vllm_tool_calls_in_response(
     tool_calls,
 ) -> List[ToolCall]:
     if not tool_calls:
@@ -89,7 +89,7 @@ def convert_to_vllm_tool_calls_in_response(
     ]
 
 
-def convert_to_vllm_tools_in_request(tools: List[ToolDefinition]) -> List[dict]:
+def _convert_to_vllm_tools_in_request(tools: List[ToolDefinition]) -> List[dict]:
     if tools is None:
         return tools
 
@@ -128,7 +128,7 @@ def convert_to_vllm_tools_in_request(tools: List[ToolDefinition]) -> List[dict]:
     return None
 
 
-def convert_to_vllm_finish_reason(finish_reason: str) -> StopReason:
+def _convert_to_vllm_finish_reason(finish_reason: str) -> StopReason:
     return {
         "stop": StopReason.end_of_turn,
         "length": StopReason.out_of_tokens,
@@ -214,8 +214,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         result = ChatCompletionResponse(
             completion_message=CompletionMessage(
                 content=choice.message.content or "",
-                stop_reason=convert_to_vllm_finish_reason(choice.finish_reason),
-                tool_calls=convert_to_vllm_tool_calls_in_response(choice.message.tool_calls),
+                stop_reason=_convert_to_vllm_finish_reason(choice.finish_reason),
+                tool_calls=_convert_to_vllm_tool_calls_in_response(choice.message.tool_calls),
             ),
             logprobs=None,
         )
@@ -269,7 +269,9 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         if "max_tokens" not in options:
             options["max_tokens"] = self.config.max_tokens
 
-        input_dict = {"tools": convert_to_vllm_tools_in_request(request.tools)}
+        input_dict = {}
+        if isinstance(request, ChatCompletionRequest) and request.tools is not None:
+            input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}
 
         if isinstance(request, ChatCompletionRequest):
             input_dict["messages"] = [await convert_message_to_openai_dict(m, download=True) for m in request.messages]
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index 1388d14f2..ac9d18312 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -174,7 +174,7 @@ def process_chat_completion_response(
 ) -> ChatCompletionResponse:
     choice = response.choices[0]
 
-    # TODO: This does not work well with tool calls (at least for vLLM remote)
+    # TODO: This does not work well with tool calls for vLLM remote provider
     raw_message = formatter.decode_assistant_message_from_content(
         text_from_choice(choice), get_stop_reason(choice.finish_reason)
     )