diff --git a/litellm/caching.py b/litellm/caching.py index 7f67ee455..2c0a5a2da 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -1242,8 +1242,8 @@ class QdrantSemanticCache(BaseCache): import os from litellm.llms.custom_httpx.http_handler import ( - _get_async_httpx_client, _get_httpx_client, + get_async_httpx_client, ) if collection_name is None: @@ -1290,7 +1290,7 @@ class QdrantSemanticCache(BaseCache): self.headers = headers self.sync_client = _get_httpx_client() - self.async_client = _get_async_httpx_client() + self.async_client = get_async_httpx_client() if quantization_config is None: print_verbose( diff --git a/litellm/llms/anthropic/chat.py b/litellm/llms/anthropic/chat.py index 18e530bb7..a5cee1715 100644 --- a/litellm/llms/anthropic/chat.py +++ b/litellm/llms/anthropic/chat.py @@ -25,8 +25,8 @@ from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, - _get_async_httpx_client, _get_httpx_client, + get_async_httpx_client, ) from litellm.types.llms.anthropic import ( AnthopicMessagesAssistantMessageParam, @@ -918,7 +918,7 @@ class AnthropicChatCompletion(BaseLLM): headers={}, client=None, ) -> Union[ModelResponse, CustomStreamWrapper]: - async_handler = _get_async_httpx_client() + async_handler = get_async_httpx_client() try: response = await async_handler.post( diff --git a/litellm/llms/bedrock/chat.py b/litellm/llms/bedrock/chat.py index ee09797ba..80a546bd5 100644 --- a/litellm/llms/bedrock/chat.py +++ b/litellm/llms/bedrock/chat.py @@ -35,8 +35,8 @@ from litellm.litellm_core_utils.litellm_logging import Logging from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, - _get_async_httpx_client, _get_httpx_client, + get_async_httpx_client, ) from litellm.types.llms.bedrock import * from litellm.types.llms.openai import ( @@ -209,7 +209,7 @@ async def make_call( ): try: if client is None: - client = _get_async_httpx_client() # Create a new client if none provided + client = get_async_httpx_client() # Create a new client if none provided response = await client.post( api_base, @@ -1041,7 +1041,7 @@ class BedrockLLM(BaseAWSLLM): if isinstance(timeout, float) or isinstance(timeout, int): timeout = httpx.Timeout(timeout) _params["timeout"] = timeout - client = _get_async_httpx_client(_params) # type: ignore + client = get_async_httpx_client(_params) # type: ignore else: client = client # type: ignore @@ -1498,7 +1498,7 @@ class BedrockConverseLLM(BaseAWSLLM): if isinstance(timeout, float) or isinstance(timeout, int): timeout = httpx.Timeout(timeout) _params["timeout"] = timeout - client = _get_async_httpx_client(_params) # type: ignore + client = get_async_httpx_client(_params) # type: ignore else: client = client # type: ignore diff --git a/litellm/llms/bedrock/embed/embedding.py b/litellm/llms/bedrock/embed/embedding.py index a7a6c173c..ddc8f2162 100644 --- a/litellm/llms/bedrock/embed/embedding.py +++ b/litellm/llms/bedrock/embed/embedding.py @@ -15,8 +15,8 @@ from litellm.llms.cohere.embed import embedding as cohere_embedding from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, - _get_async_httpx_client, _get_httpx_client, + get_async_httpx_client, ) from litellm.secret_managers.main import get_secret from litellm.types.llms.bedrock import AmazonEmbeddingRequest, CohereEmbeddingRequest @@ -130,7 +130,7 @@ class BedrockEmbedding(BaseAWSLLM): if isinstance(timeout, float) or isinstance(timeout, int): timeout = httpx.Timeout(timeout) _params["timeout"] = timeout - client = _get_async_httpx_client(_params) # type: ignore + client = get_async_httpx_client(_params) # type: ignore else: client = client diff --git a/litellm/llms/cohere/rerank.py b/litellm/llms/cohere/rerank.py index 97cd7e399..137624ade 100644 --- a/litellm/llms/cohere/rerank.py +++ b/litellm/llms/cohere/rerank.py @@ -11,8 +11,8 @@ from pydantic import BaseModel from litellm.llms.base import BaseLLM from litellm.llms.custom_httpx.http_handler import ( - _get_async_httpx_client, _get_httpx_client, + get_async_httpx_client, ) from litellm.rerank_api.types import RerankRequest, RerankResponse @@ -65,7 +65,7 @@ class CohereRerank(BaseLLM): api_key: str, api_base: str, ) -> RerankResponse: - client = _get_async_httpx_client() + client = get_async_httpx_client() response = await client.post( api_base, diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py index 2f07ee2f7..babab8940 100644 --- a/litellm/llms/custom_httpx/http_handler.py +++ b/litellm/llms/custom_httpx/http_handler.py @@ -378,7 +378,7 @@ class HTTPHandler: pass -def _get_async_httpx_client(params: Optional[dict] = None) -> AsyncHTTPHandler: +def get_async_httpx_client(params: Optional[dict] = None) -> AsyncHTTPHandler: """ Retrieves the async HTTP client from the cache If not present, creates a new client diff --git a/litellm/llms/sagemaker/sagemaker.py b/litellm/llms/sagemaker/sagemaker.py index a7b36134b..733826aee 100644 --- a/litellm/llms/sagemaker/sagemaker.py +++ b/litellm/llms/sagemaker/sagemaker.py @@ -19,8 +19,8 @@ from litellm.litellm_core_utils.asyncify import asyncify from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, - _get_async_httpx_client, _get_httpx_client, + get_async_httpx_client, ) from litellm.types.llms.openai import ( ChatCompletionToolCallChunk, @@ -566,7 +566,7 @@ class SagemakerLLM(BaseAWSLLM): try: if client is None: client = ( - _get_async_httpx_client() + get_async_httpx_client() ) # Create a new client if none provided response = await client.post( api_base, @@ -673,7 +673,7 @@ class SagemakerLLM(BaseAWSLLM): model_id: Optional[str], ): timeout = 300.0 - async_handler = _get_async_httpx_client() + async_handler = get_async_httpx_client() async_transform_prompt = asyncify(self._transform_prompt) diff --git a/litellm/llms/togetherai/rerank.py b/litellm/llms/togetherai/rerank.py index 5d905071c..17e4ce7e5 100644 --- a/litellm/llms/togetherai/rerank.py +++ b/litellm/llms/togetherai/rerank.py @@ -11,8 +11,8 @@ from pydantic import BaseModel from litellm.llms.base import BaseLLM from litellm.llms.custom_httpx.http_handler import ( - _get_async_httpx_client, _get_httpx_client, + get_async_httpx_client, ) from litellm.rerank_api.types import RerankRequest, RerankResponse @@ -77,7 +77,7 @@ class TogetherAIRerank(BaseLLM): request_data_dict: Dict[str, Any], api_key: str, ) -> RerankResponse: - client = _get_async_httpx_client() # Use async client + client = get_async_httpx_client() # Use async client response = await client.post( "https://api.together.xyz/v1/rerank", diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py index abe8b29ed..d2456fd1f 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py @@ -22,7 +22,7 @@ from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, - _get_async_httpx_client, + get_async_httpx_client, ) from litellm.llms.prompt_templates.factory import ( convert_url_to_base64, @@ -1293,7 +1293,7 @@ class VertexLLM(BaseLLM): _async_client_params = {} if timeout: _async_client_params["timeout"] = timeout - client = _get_async_httpx_client(params=_async_client_params) + client = get_async_httpx_client(params=_async_client_params) ## LOGGING logging_obj.pre_call( input=messages, diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py b/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py index bc2424ecc..dbb38a715 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/text_to_speech/text_to_speech_handler.py @@ -9,8 +9,8 @@ from litellm.llms.base import BaseLLM from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, HTTPHandler, - _get_async_httpx_client, _get_httpx_client, + get_async_httpx_client, ) from litellm.llms.OpenAI.openai import HttpxBinaryResponseContent from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import ( @@ -178,7 +178,7 @@ class VertexTextToSpeechAPI(VertexLLM): ) -> HttpxBinaryResponseContent: import base64 - async_handler = _get_async_httpx_client() + async_handler = get_async_httpx_client() response = await async_handler.post( url=url, diff --git a/litellm/proxy/auth/oauth2_check.py b/litellm/proxy/auth/oauth2_check.py index ed5a3e26b..f2e175632 100644 --- a/litellm/proxy/auth/oauth2_check.py +++ b/litellm/proxy/auth/oauth2_check.py @@ -20,7 +20,7 @@ async def check_oauth2_token(token: str) -> UserAPIKeyAuth: import httpx from litellm._logging import verbose_proxy_logger - from litellm.llms.custom_httpx.http_handler import _get_async_httpx_client + from litellm.llms.custom_httpx.http_handler import get_async_httpx_client from litellm.proxy._types import CommonProxyErrors from litellm.proxy.proxy_server import premium_user @@ -40,7 +40,7 @@ async def check_oauth2_token(token: str) -> UserAPIKeyAuth: if not token_info_endpoint: raise ValueError("OAUTH_TOKEN_INFO_ENDPOINT environment variable is not set") - client = _get_async_httpx_client() + client = get_async_httpx_client() headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} try: diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py index eee26bd42..f4e1a345e 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py +++ b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py @@ -33,7 +33,7 @@ from litellm.litellm_core_utils.logging_utils import ( from litellm.llms.base_aws_llm import BaseAWSLLM from litellm.llms.custom_httpx.http_handler import ( AsyncHTTPHandler, - _get_async_httpx_client, + get_async_httpx_client, ) from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata @@ -55,7 +55,7 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM): guardrailVersion: Optional[str] = None, **kwargs, ): - self.async_handler = _get_async_httpx_client() + self.async_handler = get_async_httpx_client() self.guardrailIdentifier = guardrailIdentifier self.guardrailVersion = guardrailVersion diff --git a/litellm/proxy/guardrails/guardrail_hooks/presidio.py b/litellm/proxy/guardrails/guardrail_hooks/presidio.py index 857704bf2..db22c1872 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/presidio.py +++ b/litellm/proxy/guardrails/guardrail_hooks/presidio.py @@ -22,7 +22,7 @@ import litellm # noqa: E401 from litellm._logging import verbose_proxy_logger from litellm.caching import DualCache from litellm.integrations.custom_guardrail import CustomGuardrail -from litellm.llms.custom_httpx.http_handler import _get_async_httpx_client +from litellm.llms.custom_httpx.http_handler import get_async_httpx_client from litellm.proxy._types import UserAPIKeyAuth from litellm.utils import ( EmbeddingResponse,