From 462cdb051caca82830f570d1677f6ebcf85e2643 Mon Sep 17 00:00:00 2001 From: Swapna Lekkala Date: Tue, 21 Oct 2025 12:13:03 -0700 Subject: [PATCH] remove setting interops threads --- llama_stack/providers/utils/inference/embedding_mixin.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/llama_stack/providers/utils/inference/embedding_mixin.py index eafa91cff..c959b9c19 100644 --- a/llama_stack/providers/utils/inference/embedding_mixin.py +++ b/llama_stack/providers/utils/inference/embedding_mixin.py @@ -93,11 +93,7 @@ class SentenceTransformerEmbeddingMixin: # PyTorch's OpenMP kernels can segfault on macOS when spawned from background # threads with the default parallel settings, so force a single-threaded CPU run. log.debug(f"Constraining torch threads on {platform_name} to a single worker") - try: - torch.set_num_threads(1) - torch.set_num_interop_threads(1) - except Exception: - log.debug(f"Failed to adjust torch thread counts on {platform_name}", exc_info=True) + torch.set_num_threads(1) return SentenceTransformer(model, trust_remote_code=True)