(fix) httpx handler - bind to ipv4 for httpx handler (#6785)

* bind to ipv4 on httpx handler

* add force_ipv4

* use helper for _create_async_transport

* fix circular import

* document force_ipv4

* test_async_http_handler_force_ipv4
This commit is contained in:
Ishaan Jaff 2024-11-18 12:22:51 -08:00 committed by GitHub
parent b854f6c07b
commit f43768d617
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 87 additions and 1 deletions

View file

@ -625,7 +625,9 @@ litellm_settings:
redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging
# Networking settings
request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
force_ipv4: boolean # If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API
set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION
json_logs: boolean # if true, logs will be in json format
@ -727,6 +729,7 @@ general_settings:
| json_logs | boolean | If true, logs will be in json format. If you need to store the logs as JSON, just set the `litellm.json_logs = True`. We currently just log the raw POST request from litellm as a JSON [Further docs](./debugging) |
| default_fallbacks | array of strings | List of fallback models to use if a specific model group is misconfigured / bad. [Further docs](./reliability#default-fallbacks) |
| request_timeout | integer | The timeout for requests in seconds. If not set, the default value is `6000 seconds`. [For reference OpenAI Python SDK defaults to `600 seconds`.](https://github.com/openai/openai-python/blob/main/src/openai/_constants.py) |
| force_ipv4 | boolean | If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API |
| content_policy_fallbacks | array of objects | Fallbacks to use when a ContentPolicyViolationError is encountered. [Further docs](./reliability#content-policy-fallbacks) |
| context_window_fallbacks | array of objects | Fallbacks to use when a ContextWindowExceededError is encountered. [Further docs](./reliability#context-window-fallbacks) |
| cache | boolean | If true, enables caching. [Further docs](./caching) |

View file

@ -284,11 +284,18 @@ max_end_user_budget: Optional[float] = None
priority_reservation: Optional[Dict[str, float]] = None
#### RELIABILITY ####
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
#### Networking settings ####
request_timeout: float = 6000 # time in seconds
force_ipv4: bool = (
False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
)
module_level_aclient = AsyncHTTPHandler(
timeout=request_timeout, client_alias="module level aclient"
)
module_level_client = HTTPHandler(timeout=request_timeout)
#### RETRIES ####
num_retries: Optional[int] = None # per model endpoint
max_fallbacks: Optional[int] = None
default_fallbacks: Optional[List] = None

View file

@ -4,7 +4,7 @@ import traceback
from typing import TYPE_CHECKING, Any, Callable, List, Mapping, Optional, Union
import httpx
from httpx import USE_CLIENT_DEFAULT
from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport
import litellm
@ -60,8 +60,10 @@ class AsyncHTTPHandler:
if timeout is None:
timeout = _DEFAULT_TIMEOUT
# Create a client with a connection pool
transport = self._create_async_transport()
return httpx.AsyncClient(
transport=transport,
event_hooks=event_hooks,
timeout=timeout,
limits=httpx.Limits(
@ -297,6 +299,18 @@ class AsyncHTTPHandler:
except Exception:
pass
def _create_async_transport(self) -> Optional[AsyncHTTPTransport]:
"""
Create an async transport with IPv4 only if litellm.force_ipv4 is True.
Otherwise, return None.
Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
"""
if litellm.force_ipv4:
return AsyncHTTPTransport(local_address="0.0.0.0")
else:
return None
class HTTPHandler:
def __init__(
@ -316,8 +330,11 @@ class HTTPHandler:
cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)
if client is None:
transport = self._create_sync_transport()
# Create a client with a connection pool
self.client = httpx.Client(
transport=transport,
timeout=timeout,
limits=httpx.Limits(
max_connections=concurrent_limit,
@ -427,6 +444,18 @@ class HTTPHandler:
except Exception:
pass
def _create_sync_transport(self) -> Optional[HTTPTransport]:
"""
Create an HTTP transport with IPv4 only if litellm.force_ipv4 is True.
Otherwise, return None.
Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
"""
if litellm.force_ipv4:
return HTTPTransport(local_address="0.0.0.0")
else:
return None
def get_async_httpx_client(
llm_provider: Union[LlmProviders, httpxSpecialProvider],

View file

@ -855,6 +855,7 @@ def test_async_http_handler(mock_async_client):
mock_async_client.assert_called_with(
cert="/client.pem",
transport=None,
event_hooks=event_hooks,
headers=headers,
limits=httpx.Limits(
@ -866,6 +867,52 @@ def test_async_http_handler(mock_async_client):
)
@mock.patch("httpx.AsyncClient")
@mock.patch.dict(os.environ, {}, clear=True)
def test_async_http_handler_force_ipv4(mock_async_client):
"""
Test AsyncHTTPHandler when litellm.force_ipv4 is True
This is prod test - we need to ensure that httpx always uses ipv4 when litellm.force_ipv4 is True
"""
import httpx
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
# Set force_ipv4 to True
litellm.force_ipv4 = True
try:
timeout = 120
event_hooks = {"request": [lambda r: r]}
concurrent_limit = 2
AsyncHTTPHandler(timeout, event_hooks, concurrent_limit)
# Get the call arguments
call_args = mock_async_client.call_args[1]
############# IMPORTANT ASSERTION #################
# Assert transport exists and is configured correctly for using ipv4
assert isinstance(call_args["transport"], httpx.AsyncHTTPTransport)
print(call_args["transport"])
assert call_args["transport"]._pool._local_address == "0.0.0.0"
####################################
# Assert other parameters match
assert call_args["event_hooks"] == event_hooks
assert call_args["headers"] == headers
assert isinstance(call_args["limits"], httpx.Limits)
assert call_args["limits"].max_connections == concurrent_limit
assert call_args["limits"].max_keepalive_connections == concurrent_limit
assert call_args["timeout"] == timeout
assert call_args["verify"] is True
assert call_args["cert"] is None
finally:
# Reset force_ipv4 to default
litellm.force_ipv4 = False
@pytest.mark.parametrize(
"model, expected_bool", [("gpt-3.5-turbo", False), ("gpt-4o-audio-preview", True)]
)