diff --git a/litellm/caching.py b/litellm/caching.py
index 7f67ee455..2c0a5a2da 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -1242,8 +1242,8 @@ class QdrantSemanticCache(BaseCache):
         import os
 
         from litellm.llms.custom_httpx.http_handler import (
-            _get_async_httpx_client,
             _get_httpx_client,
+            get_async_httpx_client,
         )
 
         if collection_name is None:
@@ -1290,7 +1290,7 @@ class QdrantSemanticCache(BaseCache):
         self.headers = headers
 
         self.sync_client = _get_httpx_client()
-        self.async_client = _get_async_httpx_client()
+        self.async_client = get_async_httpx_client()
 
         if quantization_config is None:
             print_verbose(
diff --git a/litellm/llms/anthropic/chat.py b/litellm/llms/anthropic/chat.py
index 18e530bb7..a5cee1715 100644
--- a/litellm/llms/anthropic/chat.py
+++ b/litellm/llms/anthropic/chat.py
@@ -25,8 +25,8 @@ from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,
-    _get_async_httpx_client,
     _get_httpx_client,
+    get_async_httpx_client,
 )
 from litellm.types.llms.anthropic import (
     AnthopicMessagesAssistantMessageParam,
@@ -918,7 +918,7 @@ class AnthropicChatCompletion(BaseLLM):
         headers={},
         client=None,
     ) -> Union[ModelResponse, CustomStreamWrapper]:
-        async_handler = _get_async_httpx_client()
+        async_handler = get_async_httpx_client()
 
         try:
             response = await async_handler.post(
diff --git a/litellm/llms/bedrock/chat.py b/litellm/llms/bedrock/chat.py
index ee09797ba..80a546bd5 100644
--- a/litellm/llms/bedrock/chat.py
+++ b/litellm/llms/bedrock/chat.py
@@ -35,8 +35,8 @@ from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,
-    _get_async_httpx_client,
     _get_httpx_client,
+    get_async_httpx_client,
 )
 from litellm.types.llms.bedrock import *
 from litellm.types.llms.openai import (
@@ -209,7 +209,7 @@ async def make_call(
 ):
     try:
         if client is None:
-            client = _get_async_httpx_client()  # Create a new client if none provided
+            client = get_async_httpx_client()  # Create a new client if none provided
 
         response = await client.post(
             api_base,
@@ -1041,7 +1041,7 @@ class BedrockLLM(BaseAWSLLM):
                 if isinstance(timeout, float) or isinstance(timeout, int):
                     timeout = httpx.Timeout(timeout)
                 _params["timeout"] = timeout
-            client = _get_async_httpx_client(_params)  # type: ignore
+            client = get_async_httpx_client(_params)  # type: ignore
         else:
             client = client  # type: ignore
 
@@ -1498,7 +1498,7 @@ class BedrockConverseLLM(BaseAWSLLM):
                 if isinstance(timeout, float) or isinstance(timeout, int):
                     timeout = httpx.Timeout(timeout)
                 _params["timeout"] = timeout
-            client = _get_async_httpx_client(_params)  # type: ignore
+            client = get_async_httpx_client(_params)  # type: ignore
         else:
             client = client  # type: ignore
 
diff --git a/litellm/llms/bedrock/embed/embedding.py b/litellm/llms/bedrock/embed/embedding.py
index a7a6c173c..ddc8f2162 100644
--- a/litellm/llms/bedrock/embed/embedding.py
+++ b/litellm/llms/bedrock/embed/embedding.py
@@ -15,8 +15,8 @@ from litellm.llms.cohere.embed import embedding as cohere_embedding
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,
-    _get_async_httpx_client,
     _get_httpx_client,
+    get_async_httpx_client,
 )
 from litellm.secret_managers.main import get_secret
 from litellm.types.llms.bedrock import AmazonEmbeddingRequest, CohereEmbeddingRequest
@@ -130,7 +130,7 @@ class BedrockEmbedding(BaseAWSLLM):
                 if isinstance(timeout, float) or isinstance(timeout, int):
                     timeout = httpx.Timeout(timeout)
                 _params["timeout"] = timeout
-            client = _get_async_httpx_client(_params)  # type: ignore
+            client = get_async_httpx_client(_params)  # type: ignore
         else:
             client = client
 
diff --git a/litellm/llms/cohere/rerank.py b/litellm/llms/cohere/rerank.py
index 97cd7e399..137624ade 100644
--- a/litellm/llms/cohere/rerank.py
+++ b/litellm/llms/cohere/rerank.py
@@ -11,8 +11,8 @@ from pydantic import BaseModel
 
 from litellm.llms.base import BaseLLM
 from litellm.llms.custom_httpx.http_handler import (
-    _get_async_httpx_client,
     _get_httpx_client,
+    get_async_httpx_client,
 )
 from litellm.rerank_api.types import RerankRequest, RerankResponse
 
@@ -65,7 +65,7 @@ class CohereRerank(BaseLLM):
         api_key: str,
         api_base: str,
     ) -> RerankResponse:
-        client = _get_async_httpx_client()
+        client = get_async_httpx_client()
 
         response = await client.post(
             api_base,
diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py
index 2f07ee2f7..babab8940 100644
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@@ -378,7 +378,7 @@ class HTTPHandler:
             pass
 
 
-def _get_async_httpx_client(params: Optional[dict] = None) -> AsyncHTTPHandler:
+def get_async_httpx_client(params: Optional[dict] = None) -> AsyncHTTPHandler:
     """
     Retrieves the async HTTP client from the cache
     If not present, creates a new client
diff --git a/litellm/llms/sagemaker/sagemaker.py b/litellm/llms/sagemaker/sagemaker.py
index a7b36134b..733826aee 100644
--- a/litellm/llms/sagemaker/sagemaker.py
+++ b/litellm/llms/sagemaker/sagemaker.py
@@ -19,8 +19,8 @@ from litellm.litellm_core_utils.asyncify import asyncify
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,
-    _get_async_httpx_client,
     _get_httpx_client,
+    get_async_httpx_client,
 )
 from litellm.types.llms.openai import (
     ChatCompletionToolCallChunk,
@@ -566,7 +566,7 @@ class SagemakerLLM(BaseAWSLLM):
         try:
             if client is None:
                 client = (
-                    _get_async_httpx_client()
+                    get_async_httpx_client()
                 )  # Create a new client if none provided
             response = await client.post(
                 api_base,
@@ -673,7 +673,7 @@ class SagemakerLLM(BaseAWSLLM):
         model_id: Optional[str],
     ):
         timeout = 300.0
-        async_handler = _get_async_httpx_client()
+        async_handler = get_async_httpx_client()
 
         async_transform_prompt = asyncify(self._transform_prompt)
 
diff --git a/litellm/llms/togetherai/rerank.py b/litellm/llms/togetherai/rerank.py
index 5d905071c..17e4ce7e5 100644
--- a/litellm/llms/togetherai/rerank.py
+++ b/litellm/llms/togetherai/rerank.py
@@ -11,8 +11,8 @@ from pydantic import BaseModel
 
 from litellm.llms.base import BaseLLM
 from litellm.llms.custom_httpx.http_handler import (
-    _get_async_httpx_client,
     _get_httpx_client,
+    get_async_httpx_client,
 )
 from litellm.rerank_api.types import RerankRequest, RerankResponse
 
@@ -77,7 +77,7 @@ class TogetherAIRerank(BaseLLM):
         request_data_dict: Dict[str, Any],
         api_key: str,
     ) -> RerankResponse:
-        client = _get_async_httpx_client()  # Use async client
+        client = get_async_httpx_client()  # Use async client
 
         response = await client.post(
             "https://api.together.xyz/v1/rerank",
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
index abe8b29ed..d2456fd1f 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
@@ -22,7 +22,7 @@ from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,
-    _get_async_httpx_client,
+    get_async_httpx_client,
 )
 from litellm.llms.prompt_templates.factory import (
     convert_url_to_base64,
@@ -1293,7 +1293,7 @@ class VertexLLM(BaseLLM):
         _async_client_params = {}
         if timeout:
             _async_client_params["timeout"] = timeout
-        client = _get_async_httpx_client(params=_async_client_params)
+        client = get_async_httpx_client(params=_async_client_params)
         ## LOGGING
         logging_obj.pre_call(
             input=messages,
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py b/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py
index bc2424ecc..dbb38a715 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py
@@ -9,8 +9,8 @@ from litellm.llms.base import BaseLLM
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,
-    _get_async_httpx_client,
     _get_httpx_client,
+    get_async_httpx_client,
 )
 from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent
 from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
@@ -178,7 +178,7 @@ class VertexTextToSpeechAPI(VertexLLM):
     ) -> HttpxBinaryResponseContent:
         import base64
 
-        async_handler = _get_async_httpx_client()
+        async_handler = get_async_httpx_client()
 
         response = await async_handler.post(
             url=url,
diff --git a/litellm/proxy/auth/oauth2_check.py b/litellm/proxy/auth/oauth2_check.py
index ed5a3e26b..f2e175632 100644
--- a/litellm/proxy/auth/oauth2_check.py
+++ b/litellm/proxy/auth/oauth2_check.py
@@ -20,7 +20,7 @@ async def check_oauth2_token(token: str) -> UserAPIKeyAuth:
     import httpx
 
     from litellm._logging import verbose_proxy_logger
-    from litellm.llms.custom_httpx.http_handler import _get_async_httpx_client
+    from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
     from litellm.proxy._types import CommonProxyErrors
     from litellm.proxy.proxy_server import premium_user
 
@@ -40,7 +40,7 @@ async def check_oauth2_token(token: str) -> UserAPIKeyAuth:
     if not token_info_endpoint:
         raise ValueError("OAUTH_TOKEN_INFO_ENDPOINT environment variable is not set")
 
-    client = _get_async_httpx_client()
+    client = get_async_httpx_client()
     headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
 
     try:
diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
index eee26bd42..f4e1a345e 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
@@ -33,7 +33,7 @@ from litellm.litellm_core_utils.logging_utils import (
 from litellm.llms.base_aws_llm import BaseAWSLLM
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
-    _get_async_httpx_client,
+    get_async_httpx_client,
 )
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
@@ -55,7 +55,7 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
         guardrailVersion: Optional[str] = None,
         **kwargs,
     ):
-        self.async_handler = _get_async_httpx_client()
+        self.async_handler = get_async_httpx_client()
         self.guardrailIdentifier = guardrailIdentifier
         self.guardrailVersion = guardrailVersion
 
diff --git a/litellm/proxy/guardrails/guardrail_hooks/presidio.py b/litellm/proxy/guardrails/guardrail_hooks/presidio.py
index 857704bf2..db22c1872 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/presidio.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/presidio.py
@@ -22,7 +22,7 @@ import litellm  # noqa: E401
 from litellm._logging import verbose_proxy_logger
 from litellm.caching import DualCache
 from litellm.integrations.custom_guardrail import CustomGuardrail
-from litellm.llms.custom_httpx.http_handler import _get_async_httpx_client
+from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.utils import (
     EmbeddingResponse,