From 5acbe76ed7dfc073e92e5eb1ce3de6b7a0cba600 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Tue, 18 Mar 2025 15:29:13 -0700 Subject: [PATCH] avoid tensor memory error --- .../inline/inference/meta_reference/parallel_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py index 738f9ddcd..e8767c2ff 100644 --- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py @@ -10,6 +10,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import copy import json import logging import multiprocessing @@ -213,7 +214,7 @@ def maybe_parse_message(maybe_json: Optional[str]) -> Optional[ProcessingMessage def parse_message(json_str: str) -> ProcessingMessage: data = json.loads(json_str) - return ProcessingMessageWrapper(**data).payload + return copy.deepcopy(ProcessingMessageWrapper(**data).payload) def worker_process_entrypoint(