From 15f69e75ffaf07c79edf1cdcef1c31d0b67bbc3d Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 28 Feb 2025 11:25:23 -0800 Subject: [PATCH] fix: replace eval with json decoding for format_adapter (#1328) # What does this PR do? - using `eval` is a security risk [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - see https://github.com/meta-llama/llama-stack/pull/1327 cc @SLR722 we will need to update the corresponding dataset via ```python def update_to_json_str(): dataset = datasets.load_dataset(...) processed_dataset = dataset[split].map( lambda x: { "column": json.dumps(eval(x["column"])) } ) processed_dataset.push_to_hub(...) ``` [//]: # (## Documentation) --- .../post_training/torchtune/datasets/format_adapter.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py index 884977803..6b607f1c7 100644 --- a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +++ b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py @@ -10,16 +10,19 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import json from typing import Any, Mapping from llama_stack.providers.utils.common.data_schema_validator import ColumnName -def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Mapping[str, Any]: +def llama_stack_instruct_to_torchtune_instruct( + sample: Mapping[str, Any], +) -> Mapping[str, Any]: assert ColumnName.chat_completion_input.value in sample and ColumnName.expected_answer.value in sample, ( "Invalid input row" ) - input_messages = eval(str(sample[ColumnName.chat_completion_input.value])) + input_messages = json.loads(sample[ColumnName.chat_completion_input.value]) assert len(input_messages) == 1, "llama stack intruct dataset format only supports 1 user message" input_message = input_messages[0] @@ -37,7 +40,7 @@ def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Map def llama_stack_chat_to_torchtune_chat(sample: Mapping[str, Any]) -> Mapping[str, Any]: assert ColumnName.dialog.value in sample, "Invalid input row" role_map = {"user": "human", "assistant": "gpt"} - dialog = eval(str(sample[ColumnName.dialog.value])) + dialog = json.loads(sample[ColumnName.dialog.value]) assert len(dialog) > 1, "dialog must have at least 2 messagse" roles = []