From 15f69e75ffaf07c79edf1cdcef1c31d0b67bbc3d Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Fri, 28 Feb 2025 11:25:23 -0800
Subject: [PATCH] fix: replace eval with json decoding for format_adapter
 (#1328)

# What does this PR do?
- using `eval` is a security risk

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan

- see https://github.com/meta-llama/llama-stack/pull/1327

cc @SLR722 we will need to update the corresponding dataset via

```python
def update_to_json_str():

dataset = datasets.load_dataset(...)
processed_dataset = dataset[split].map(
        lambda x: {
                "column": json.dumps(eval(x["column"]))
       }
)
processed_dataset.push_to_hub(...)
```
[//]: # (## Documentation)
---
 .../post_training/torchtune/datasets/format_adapter.py   | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
index 884977803..6b607f1c7 100644
--- a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
+++ b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
@@ -10,16 +10,19 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import json
 from typing import Any, Mapping
 
 from llama_stack.providers.utils.common.data_schema_validator import ColumnName
 
 
-def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Mapping[str, Any]:
+def llama_stack_instruct_to_torchtune_instruct(
+    sample: Mapping[str, Any],
+) -> Mapping[str, Any]:
     assert ColumnName.chat_completion_input.value in sample and ColumnName.expected_answer.value in sample, (
         "Invalid input row"
     )
-    input_messages = eval(str(sample[ColumnName.chat_completion_input.value]))
+    input_messages = json.loads(sample[ColumnName.chat_completion_input.value])
 
     assert len(input_messages) == 1, "llama stack intruct dataset format only supports 1 user message"
     input_message = input_messages[0]
@@ -37,7 +40,7 @@ def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Map
 def llama_stack_chat_to_torchtune_chat(sample: Mapping[str, Any]) -> Mapping[str, Any]:
     assert ColumnName.dialog.value in sample, "Invalid input row"
     role_map = {"user": "human", "assistant": "gpt"}
-    dialog = eval(str(sample[ColumnName.dialog.value]))
+    dialog = json.loads(sample[ColumnName.dialog.value])
 
     assert len(dialog) > 1, "dialog must have at least 2 messagse"
     roles = []