(fix) httpx handler - bind to ipv4 for httpx handler (#6785)

* bind to ipv4 on httpx handler * add force_ipv4 * use helper for _create_async_transport * fix circular import * document force_ipv4 * test_async_http_handler_force_ipv4
2024-11-18 12:22:51 -08:00 · 2024-11-18 12:22:51 -08:00 · f43768d617
commit f43768d617
parent b854f6c07b
4 changed files with 87 additions and 1 deletions
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@ -625,7 +625,9 @@ litellm_settings:
  redact_user_api_key_info: boolean  # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
  langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging
  
+  # Networking settings
  request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout 
+  force_ipv4: boolean # If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API
  
  set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION
  json_logs: boolean # if true, logs will be in json format
@ -727,6 +729,7 @@ general_settings:
 | json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) |
 | default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) |
 | request_timeout | integer | The timeout for requests in seconds. If not set, the default value is `6000 seconds`. [For reference OpenAI Python SDK defaults to `600 seconds`.](https://github.com/openai/openai-python/blob/main/src/openai/_constants.py) |
+| force_ipv4 | boolean | If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API |
 | content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) |
 | context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) |
 | cache | boolean | If true, enables caching. [Further docs](./caching) |
--- a/litellm/init.py
+++ b/litellm/init.py
@ -284,11 +284,18 @@ max_end_user_budget: Optional[float] = None
 priority_reservation: Optional[Dict[str, float]] = None
 #### RELIABILITY ####
 REPEATED_STREAMING_CHUNK_LIMIT = 100  # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
+
+#### Networking settings ####
 request_timeout: float = 6000  # time in seconds
+force_ipv4: bool = (
+    False  # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
+)
 module_level_aclient = AsyncHTTPHandler(
    timeout=request_timeout, client_alias="module level aclient"
 )
 module_level_client = HTTPHandler(timeout=request_timeout)
+
+#### RETRIES ####
 num_retries: Optional[int] = None  # per model endpoint
 max_fallbacks: Optional[int] = None
 default_fallbacks: Optional[List] = None
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -4,7 +4,7 @@ import traceback
 from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union

 import httpx
-from httpx import USE_CLIENT_DEFAULT
+from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport

 import litellm

@ -60,8 +60,10 @@ class AsyncHTTPHandler:
        if timeout is None:
            timeout = _DEFAULT_TIMEOUT
        # Create a client with a connection pool
+        transport = self._create_async_transport()

        return httpx.AsyncClient(
+            transport=transport,
            event_hooks=event_hooks,
            timeout=timeout,
            limits=httpx.Limits(
@ -297,6 +299,18 @@ class AsyncHTTPHandler:
        except Exception:
            pass

+    def _create_async_transport(self) -> Optional[AsyncHTTPTransport]:
+        """
+        Create an async transport with IPv4 only if litellm.force_ipv4 is True.
+        Otherwise, return None.
+
+        Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
+        """
+        if litellm.force_ipv4:
+            return AsyncHTTPTransport(local_address="0.0.0.0")
+        else:
+            return None
+

 class HTTPHandler:
    def __init__(
@ -316,8 +330,11 @@ class HTTPHandler:
        cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)

        if client is None:
+            transport = self._create_sync_transport()
+
            # Create a client with a connection pool
            self.client = httpx.Client(
+                transport=transport,
                timeout=timeout,
                limits=httpx.Limits(
                    max_connections=concurrent_limit,
@ -427,6 +444,18 @@ class HTTPHandler:
        except Exception:
            pass

+    def _create_sync_transport(self) -> Optional[HTTPTransport]:
+        """
+        Create an HTTP transport with IPv4 only if litellm.force_ipv4 is True.
+        Otherwise, return None.
+
+        Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
+        """
+        if litellm.force_ipv4:
+            return HTTPTransport(local_address="0.0.0.0")
+        else:
+            return None
+

 def get_async_httpx_client(
    llm_provider: Union[LlmProviders, httpxSpecialProvider],
--- a/tests/local_testing/test_utils.py
+++ b/tests/local_testing/test_utils.py
@ -855,6 +855,7 @@ def test_async_http_handler(mock_async_client):

    mock_async_client.assert_called_with(
        cert="/client.pem",
+        transport=None,
        event_hooks=event_hooks,
        headers=headers,
        limits=httpx.Limits(
@ -866,6 +867,52 @@ def test_async_http_handler(mock_async_client):
    )


+@mock.patch("httpx.AsyncClient")
+@mock.patch.dict(os.environ, {}, clear=True)
+def test_async_http_handler_force_ipv4(mock_async_client):
+    """
+    Test AsyncHTTPHandler when litellm.force_ipv4 is True
+
+    This is prod test - we need to ensure that httpx always uses ipv4 when litellm.force_ipv4 is True
+    """
+    import httpx
+    from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+
+    # Set force_ipv4 to True
+    litellm.force_ipv4 = True
+
+    try:
+        timeout = 120
+        event_hooks = {"request": [lambda r: r]}
+        concurrent_limit = 2
+
+        AsyncHTTPHandler(timeout, event_hooks, concurrent_limit)
+
+        # Get the call arguments
+        call_args = mock_async_client.call_args[1]
+
+        ############# IMPORTANT ASSERTION #################
+        # Assert transport exists and is configured correctly for using ipv4
+        assert isinstance(call_args["transport"], httpx.AsyncHTTPTransport)
+        print(call_args["transport"])
+        assert call_args["transport"]._pool._local_address == "0.0.0.0"
+        ####################################
+
+        # Assert other parameters match
+        assert call_args["event_hooks"] == event_hooks
+        assert call_args["headers"] == headers
+        assert isinstance(call_args["limits"], httpx.Limits)
+        assert call_args["limits"].max_connections == concurrent_limit
+        assert call_args["limits"].max_keepalive_connections == concurrent_limit
+        assert call_args["timeout"] == timeout
+        assert call_args["verify"] is True
+        assert call_args["cert"] is None
+
+    finally:
+        # Reset force_ipv4 to default
+        litellm.force_ipv4 = False
+
+
@pytest.mark.parametrize(
    "model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)]
 )