Make utils non-public

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
Yuan Tang 2025-02-10 21:54:36 -05:00
parent b2a86532a2
commit 34366f0b01
No known key found for this signature in database
2 changed files with 9 additions and 7 deletions

View file

@ -69,7 +69,7 @@ def build_model_aliases():
] ]
def convert_to_vllm_tool_calls_in_response( def _convert_to_vllm_tool_calls_in_response(
tool_calls, tool_calls,
) -> List[ToolCall]: ) -> List[ToolCall]:
if not tool_calls: if not tool_calls:
@ -89,7 +89,7 @@ def convert_to_vllm_tool_calls_in_response(
] ]
def convert_to_vllm_tools_in_request(tools: List[ToolDefinition]) -> List[dict]: def _convert_to_vllm_tools_in_request(tools: List[ToolDefinition]) -> List[dict]:
if tools is None: if tools is None:
return tools return tools
@ -128,7 +128,7 @@ def convert_to_vllm_tools_in_request(tools: List[ToolDefinition]) -> List[dict]:
return None return None
def convert_to_vllm_finish_reason(finish_reason: str) -> StopReason: def _convert_to_vllm_finish_reason(finish_reason: str) -> StopReason:
return { return {
"stop": StopReason.end_of_turn, "stop": StopReason.end_of_turn,
"length": StopReason.out_of_tokens, "length": StopReason.out_of_tokens,
@ -214,8 +214,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
result = ChatCompletionResponse( result = ChatCompletionResponse(
completion_message=CompletionMessage( completion_message=CompletionMessage(
content=choice.message.content or "", content=choice.message.content or "",
stop_reason=convert_to_vllm_finish_reason(choice.finish_reason), stop_reason=_convert_to_vllm_finish_reason(choice.finish_reason),
tool_calls=convert_to_vllm_tool_calls_in_response(choice.message.tool_calls), tool_calls=_convert_to_vllm_tool_calls_in_response(choice.message.tool_calls),
), ),
logprobs=None, logprobs=None,
) )
@ -269,7 +269,9 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
if "max_tokens" not in options: if "max_tokens" not in options:
options["max_tokens"] = self.config.max_tokens options["max_tokens"] = self.config.max_tokens
input_dict = {"tools": convert_to_vllm_tools_in_request(request.tools)} input_dict = {}
if isinstance(request, ChatCompletionRequest) and request.tools is not None:
input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}
if isinstance(request, ChatCompletionRequest): if isinstance(request, ChatCompletionRequest):
input_dict["messages"] = [await convert_message_to_openai_dict(m, download=True) for m in request.messages] input_dict["messages"] = [await convert_message_to_openai_dict(m, download=True) for m in request.messages]

View file

@ -174,7 +174,7 @@ def process_chat_completion_response(
) -> ChatCompletionResponse: ) -> ChatCompletionResponse:
choice = response.choices[0] choice = response.choices[0]
# TODO: This does not work well with tool calls (at least for vLLM remote) # TODO: This does not work well with tool calls for vLLM remote provider
raw_message = formatter.decode_assistant_message_from_content( raw_message = formatter.decode_assistant_message_from_content(
text_from_choice(choice), get_stop_reason(choice.finish_reason) text_from_choice(choice), get_stop_reason(choice.finish_reason)
) )