diff --git a/litellm/llms/cohere/embed/handler.py b/litellm/llms/cohere/embed/handler.py
index 95cbec225..5b224c375 100644
--- a/litellm/llms/cohere/embed/handler.py
+++ b/litellm/llms/cohere/embed/handler.py
@@ -11,7 +11,11 @@ import requests  # type: ignore
 
 import litellm
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    get_async_httpx_client,
+)
 from litellm.types.llms.bedrock import CohereEmbeddingRequest
 from litellm.utils import Choices, Message, ModelResponse, Usage
 
@@ -71,7 +75,10 @@ async def async_embedding(
     )
     ## COMPLETION CALL
     if client is None:
-        client = AsyncHTTPHandler(concurrent_limit=1, timeout=timeout)
+        client = get_async_httpx_client(
+            llm_provider=litellm.LlmProviders.COHERE,
+            params={"timeout": timeout},
+        )
 
     try:
         response = await client.post(api_base, headers=headers, data=json.dumps(data))
diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py
index 11d052191..fd418103e 100644
--- a/litellm/llms/fine_tuning_apis/vertex_ai.py
+++ b/litellm/llms/fine_tuning_apis/vertex_ai.py
@@ -5,9 +5,14 @@ from typing import Any, Coroutine, Literal, Optional, Union
 import httpx
 from openai.types.fine_tuning.fine_tuning_job import FineTuningJob, Hyperparameters
 
+import litellm
 from litellm._logging import verbose_logger
 from litellm.llms.base import BaseLLM
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    get_async_httpx_client,
+)
 from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
     VertexLLM,
 )
@@ -26,8 +31,9 @@ class VertexFineTuningAPI(VertexLLM):
 
     def __init__(self) -> None:
         super().__init__()
-        self.async_handler = AsyncHTTPHandler(
-            timeout=httpx.Timeout(timeout=600.0, connect=5.0)
+        self.async_handler = get_async_httpx_client(
+            llm_provider=litellm.LlmProviders.VERTEX_AI,
+            params={"timeout": 600.0},
         )
 
     def convert_response_created_at(self, response: ResponseTuningJob):
diff --git a/litellm/llms/watsonx/completion/handler.py b/litellm/llms/watsonx/completion/handler.py
index fda25ba0f..9618f6342 100644
--- a/litellm/llms/watsonx/completion/handler.py
+++ b/litellm/llms/watsonx/completion/handler.py
@@ -24,7 +24,10 @@ import httpx  # type: ignore
 import requests  # type: ignore
 
 import litellm
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    get_async_httpx_client,
+)
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.watsonx import WatsonXAIEndpoint
 from litellm.utils import EmbeddingResponse, ModelResponse, Usage, map_finish_reason
@@ -710,10 +713,13 @@ class RequestManager:
         if stream:
             request_params["stream"] = stream
         try:
-            self.async_handler = AsyncHTTPHandler(
-                timeout=httpx.Timeout(
-                    timeout=request_params.pop("timeout", 600.0), connect=5.0
-                ),
+            self.async_handler = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders.WATSONX,
+                params={
+                    "timeout": httpx.Timeout(
+                        timeout=request_params.pop("timeout", 600.0), connect=5.0
+                    ),
+                },
             )
             if "json" in request_params:
                 request_params["data"] = json.dumps(request_params.pop("json", {}))