From 462cdb051caca82830f570d1677f6ebcf85e2643 Mon Sep 17 00:00:00 2001
From: Swapna Lekkala <swapna942@meta.com>
Date: Tue, 21 Oct 2025 12:13:03 -0700
Subject: [PATCH] remove setting interops threads

---
 llama_stack/providers/utils/inference/embedding_mixin.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/llama_stack/providers/utils/inference/embedding_mixin.py
index eafa91cff..c959b9c19 100644
--- a/llama_stack/providers/utils/inference/embedding_mixin.py
+++ b/llama_stack/providers/utils/inference/embedding_mixin.py
@@ -93,11 +93,7 @@ class SentenceTransformerEmbeddingMixin:
                 # PyTorch's OpenMP kernels can segfault on macOS when spawned from background
                 # threads with the default parallel settings, so force a single-threaded CPU run.
                 log.debug(f"Constraining torch threads on {platform_name} to a single worker")
-                try:
-                    torch.set_num_threads(1)
-                    torch.set_num_interop_threads(1)
-                except Exception:
-                    log.debug(f"Failed to adjust torch thread counts on {platform_name}", exc_info=True)
+                torch.set_num_threads(1)
 
             return SentenceTransformer(model, trust_remote_code=True)