(fix) add linting check to ban creating AsyncHTTPHandler during LLM calling (#6855)

* fix triton

* fix TEXT_COMPLETION_CODESTRAL

* fix REPLICATE

* fix CLARIFAI

* fix HUGGINGFACE

* add test_no_async_http_handler_usage

* fix PREDIBASE

* fix anthropic use get_async_httpx_client

* fix vertex fine tuning

* fix dbricks get_async_httpx_client

* fix get_async_httpx_client vertex

* fix get_async_httpx_client

* fix get_async_httpx_client

* fix make_async_azure_httpx_request

* fix check_for_async_http_handler

* test: cleanup mistral model

* add check for AsyncClient

* fix check_for_async_http_handler

* fix get_async_httpx_client

* fix tests using in_memory_llm_clients_cache

* fix langfuse import

* fix import

---------

Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com>
This commit is contained in:
Ishaan Jaff 2024-11-21 19:03:02 -08:00 committed by GitHub
parent 71ebf47cef
commit 920f4c9f82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 288 additions and 62 deletions

View file

@ -7,6 +7,7 @@ import httpx
from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport
import litellm
from litellm.caching import InMemoryCache
from .types import httpxSpecialProvider
@ -26,6 +27,7 @@ headers = {
# https://www.python-httpx.org/advanced/timeouts
_DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0)
_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600 # 1 hour, re-use the same httpx client for 1 hour
class AsyncHTTPHandler:
@ -476,8 +478,9 @@ def get_async_httpx_client(
pass
_cache_key_name = "async_httpx_client" + _params_key_name + llm_provider
if _cache_key_name in litellm.in_memory_llm_clients_cache:
return litellm.in_memory_llm_clients_cache[_cache_key_name]
_cached_client = litellm.in_memory_llm_clients_cache.get_cache(_cache_key_name)
if _cached_client:
return _cached_client
if params is not None:
_new_client = AsyncHTTPHandler(**params)
@ -485,7 +488,11 @@ def get_async_httpx_client(
_new_client = AsyncHTTPHandler(
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
)
litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
litellm.in_memory_llm_clients_cache.set_cache(
key=_cache_key_name,
value=_new_client,
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
)
return _new_client
@ -505,13 +512,18 @@ def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
pass
_cache_key_name = "httpx_client" + _params_key_name
if _cache_key_name in litellm.in_memory_llm_clients_cache:
return litellm.in_memory_llm_clients_cache[_cache_key_name]
_cached_client = litellm.in_memory_llm_clients_cache.get_cache(_cache_key_name)
if _cached_client:
return _cached_client
if params is not None:
_new_client = HTTPHandler(**params)
else:
_new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
litellm.in_memory_llm_clients_cache.set_cache(
key=_cache_key_name,
value=_new_client,
ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
)
return _new_client