From 7b28c0f3fb330758dbc36e8b30abc84d1ed6121c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 22 Nov 2024 17:15:47 -0800
Subject: [PATCH 1/8] use correct location for types

---
 litellm/llms/custom_httpx/http_handler.py |  3 +--
 litellm/types/llms/custom_http.py         | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 litellm/types/llms/custom_http.py

diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py
index f1b78ea63..f5c4f694d 100644
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@@ -8,8 +8,7 @@ from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport
 
 import litellm
 from litellm.caching import InMemoryCache
-
-from .types import httpxSpecialProvider
+from litellm.types.llms.custom_http import *
 
 if TYPE_CHECKING:
     from litellm import LlmProviders
diff --git a/litellm/types/llms/custom_http.py b/litellm/types/llms/custom_http.py
new file mode 100644
index 000000000..f43daff2a
--- /dev/null
+++ b/litellm/types/llms/custom_http.py
@@ -0,0 +1,20 @@
+from enum import Enum
+
+import litellm
+
+
+class httpxSpecialProvider(str, Enum):
+    """
+    Httpx Clients can be created for these litellm internal providers
+
+    Example:
+    - langsmith logging would need a custom async httpx client
+    - pass through endpoint would need a custom async httpx client
+    """
+
+    LoggingCallback = "logging_callback"
+    GuardrailCallback = "guardrail_callback"
+    Caching = "caching"
+    Oauth2Check = "oauth2_check"
+    SecretManager = "secret_manager"
+    PassThroughEndpoint = "pass_through_endpoint"

From 74979a6b5c2061c58fd277a8a5cbbd62b0811cd3 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 22 Nov 2024 17:15:58 -0800
Subject: [PATCH 2/8] fix types location

---
 litellm/llms/custom_httpx/types.py | 11 -----------
 1 file changed, 11 deletions(-)
 delete mode 100644 litellm/llms/custom_httpx/types.py

diff --git a/litellm/llms/custom_httpx/types.py b/litellm/llms/custom_httpx/types.py
deleted file mode 100644
index 8e6ad0eda..000000000
--- a/litellm/llms/custom_httpx/types.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from enum import Enum
-
-import litellm
-
-
-class httpxSpecialProvider(str, Enum):
-    LoggingCallback = "logging_callback"
-    GuardrailCallback = "guardrail_callback"
-    Caching = "caching"
-    Oauth2Check = "oauth2_check"
-    SecretManager = "secret_manager"

From ba74427cee8b524c0329bbf423f1ab5d6432b737 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 22 Nov 2024 17:16:31 -0800
Subject: [PATCH 3/8] perf improvement for pass through endpoints

---
 .../pass_through_endpoints/pass_through_endpoints.py     | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
index f60fd0166..0fd174440 100644
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
@@ -22,6 +22,7 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
 from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
     ModelResponseIterator,
 )
@@ -35,6 +36,7 @@ from litellm.proxy._types import (
 )
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
 from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.custom_http import httpxSpecialProvider
 
 from .streaming_handler import PassThroughStreamingHandler
 from .success_handler import PassThroughEndpointLogging
@@ -363,8 +365,11 @@ async def pass_through_request(  # noqa: PLR0915
             data=_parsed_body,
             call_type="pass_through_endpoint",
         )
-
-        async_client = httpx.AsyncClient(timeout=600)
+        async_client_obj = get_async_httpx_client(
+            llm_provider=httpxSpecialProvider.PassThroughEndpoint,
+            params={"timeout": 600},
+        )
+        async_client = async_client_obj.client
 
         litellm_call_id = str(uuid.uuid4())
 

From e305c4e76bfb10c59f7bb018326df1ccb4143102 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 22 Nov 2024 17:19:40 -0800
Subject: [PATCH 4/8] update lint check

---
 .../ensure_async_clients_test.py                    | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/code_coverage_tests/ensure_async_clients_test.py b/tests/code_coverage_tests/ensure_async_clients_test.py
index a509e5509..a4203d881 100644
--- a/tests/code_coverage_tests/ensure_async_clients_test.py
+++ b/tests/code_coverage_tests/ensure_async_clients_test.py
@@ -43,6 +43,19 @@ def check_for_async_http_handler(file_path):
                 raise ValueError(
                     f"found violation in file {file_path} line: {node.lineno}. Please use `get_async_httpx_client` instead. {warning_msg}"
                 )
+            # Check for attribute calls like httpx.AsyncClient()
+            elif isinstance(node.func, ast.Attribute):
+                full_name = ""
+                current = node.func
+                while isinstance(current, ast.Attribute):
+                    full_name = "." + current.attr + full_name
+                    current = current.value
+                if isinstance(current, ast.Name):
+                    full_name = current.id + full_name
+                    if full_name.lower() in [name.lower() for name in target_names]:
+                        raise ValueError(
+                            f"found violation in file {file_path} line: {node.lineno}. Please use `get_async_httpx_client` instead. {warning_msg}"
+                        )
     return violations
 
 

From fb0299ef07fb3f24ea291f017482fc621aef0330 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 22 Nov 2024 17:22:41 -0800
Subject: [PATCH 5/8] fix import

---
 litellm/secret_managers/aws_secret_manager_v2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/secret_managers/aws_secret_manager_v2.py b/litellm/secret_managers/aws_secret_manager_v2.py
index 69add6f23..32653f57d 100644
--- a/litellm/secret_managers/aws_secret_manager_v2.py
+++ b/litellm/secret_managers/aws_secret_manager_v2.py
@@ -31,8 +31,8 @@ from litellm.llms.custom_httpx.http_handler import (
     _get_httpx_client,
     get_async_httpx_client,
 )
-from litellm.llms.custom_httpx.types import httpxSpecialProvider
 from litellm.proxy._types import KeyManagementSystem
+from litellm.types.llms.custom_http import httpxSpecialProvider
 
 
 class AWSSecretsManagerV2(BaseAWSLLM):

From 49464df216d136c22e8c877bfcb72fad253f1569 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 22 Nov 2024 17:33:22 -0800
Subject: [PATCH 6/8] fix ensure async clients test

---
 tests/code_coverage_tests/ensure_async_clients_test.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/code_coverage_tests/ensure_async_clients_test.py b/tests/code_coverage_tests/ensure_async_clients_test.py
index a4203d881..0565de9b3 100644
--- a/tests/code_coverage_tests/ensure_async_clients_test.py
+++ b/tests/code_coverage_tests/ensure_async_clients_test.py
@@ -5,9 +5,19 @@ ALLOWED_FILES = [
     # local files
     "../../litellm/__init__.py",
     "../../litellm/llms/custom_httpx/http_handler.py",
+    "../../litellm/router_utils/client_initalization_utils.py",
+    "../../litellm/llms/custom_httpx/http_handler.py",
+    "../../litellm/llms/huggingface_restapi.py",
+    "../../litellm/llms/base.py",
+    "../../litellm/llms/custom_httpx/httpx_handler.py",
     # when running on ci/cd
     "./litellm/__init__.py",
     "./litellm/llms/custom_httpx/http_handler.py",
+    "./litellm/router_utils/client_initalization_utils.py",
+    "./litellm/llms/custom_httpx/http_handler.py",
+    "./litellm/llms/huggingface_restapi.py",
+    "./litellm/llms/base.py",
+    "./litellm/llms/custom_httpx/httpx_handler.py",
 ]
 
 warning_msg = "this is a serious violation that can impact latency. Creating Async clients per request can add +500ms per request"

From 04dec96f79aff8f08048d92eb2eccf4fdf87ed5a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 22 Nov 2024 17:33:39 -0800
Subject: [PATCH 7/8] fix azure.py health check

---
 litellm/llms/AzureOpenAI/azure.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/litellm/llms/AzureOpenAI/azure.py b/litellm/llms/AzureOpenAI/azure.py
index f6a1790b6..24303ef2f 100644
--- a/litellm/llms/AzureOpenAI/azure.py
+++ b/litellm/llms/AzureOpenAI/azure.py
@@ -1528,7 +1528,8 @@ class AzureChatCompletion(BaseLLM):
         prompt: Optional[str] = None,
     ) -> dict:
         client_session = (
-            litellm.aclient_session or httpx.AsyncClient()
+            litellm.aclient_session
+            or get_async_httpx_client(llm_provider=litellm.LlmProviders.AZURE).client
         )  # handle dall-e-2 calls
 
         if "gateway.ai.cloudflare.com" in api_base:

From eabe323eaa90ad412868474370d694a16f8ceec0 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 22 Nov 2024 17:34:08 -0800
Subject: [PATCH 8/8] fix ollama

---
 litellm/llms/ollama.py      | 6 +++++-
 litellm/llms/ollama_chat.py | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py
index 896b93be5..e9dd2b53f 100644
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@@ -14,6 +14,7 @@ import requests  # type: ignore
 
 import litellm
 from litellm import verbose_logger
+from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.utils import ModelInfo, ProviderField, StreamingChoices
 
@@ -456,7 +457,10 @@ def ollama_completion_stream(url, data, logging_obj):
 
 async def ollama_async_streaming(url, data, model_response, encoding, logging_obj):
     try:
-        client = httpx.AsyncClient()
+        _async_http_client = get_async_httpx_client(
+            llm_provider=litellm.LlmProviders.OLLAMA
+        )
+        client = _async_http_client.client
         async with client.stream(
             url=f"{url}", json=data, method="POST", timeout=litellm.request_timeout
         ) as response:
diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py
index 536f766e0..ce0df139d 100644
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@@ -13,6 +13,7 @@ from pydantic import BaseModel
 
 import litellm
 from litellm import verbose_logger
+from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
 from litellm.types.llms.ollama import OllamaToolCall, OllamaToolCallFunction
 from litellm.types.llms.openai import ChatCompletionAssistantToolCall
 from litellm.types.utils import StreamingChoices
@@ -445,7 +446,10 @@ async def ollama_async_streaming(
     url, api_key, data, model_response, encoding, logging_obj
 ):
     try:
-        client = httpx.AsyncClient()
+        _async_http_client = get_async_httpx_client(
+            llm_provider=litellm.LlmProviders.OLLAMA
+        )
+        client = _async_http_client.client
         _request = {
             "url": f"{url}",
             "json": data,