From e63ea48894a958a4d66b9c9ad7137269f6f66f1c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 21 Nov 2024 11:18:07 -0800 Subject: [PATCH] fix get_async_httpx_client --- litellm/__init__.py | 2 +- litellm/llms/OpenAI/openai.py | 12 +++++++--- litellm/llms/custom_httpx/http_handler.py | 24 ++++++++++++++----- .../vertex_ai_non_gemini.py | 9 +++++-- 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index 9a8c56a56..c978b24ee 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -133,7 +133,7 @@ use_client: bool = False ssl_verify: Union[str, bool] = True ssl_certificate: Optional[str] = None disable_streaming_logging: bool = False -in_memory_llm_clients_cache: dict = {} +in_memory_llm_clients_cache: InMemoryCache = InMemoryCache() safe_memory_mode: bool = False enable_azure_ad_token_refresh: Optional[bool] = False ### DEFAULT AZURE API VERSION ### diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py index 7d701d26c..057340b51 100644 --- a/litellm/llms/OpenAI/openai.py +++ b/litellm/llms/OpenAI/openai.py @@ -18,6 +18,7 @@ import litellm from litellm import LlmProviders from litellm._logging import verbose_logger from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS from litellm.secret_managers.main import get_secret_str from litellm.types.utils import ProviderField from litellm.utils import ( @@ -562,8 +563,9 @@ class OpenAIChatCompletion(BaseLLM): _cache_key = f"hashed_api_key={hashed_api_key},api_base={api_base},timeout={timeout},max_retries={max_retries},organization={organization},is_async={is_async}" - if _cache_key in litellm.in_memory_llm_clients_cache: - return litellm.in_memory_llm_clients_cache[_cache_key] + _cached_client = litellm.in_memory_llm_clients_cache.get_cache(_cache_key) + if _cached_client: + return _cached_client if is_async: _new_client: Union[OpenAI, AsyncOpenAI] = AsyncOpenAI( api_key=api_key, @@ -584,7 +586,11 @@ class OpenAIChatCompletion(BaseLLM): ) ## SAVE CACHE KEY - litellm.in_memory_llm_clients_cache[_cache_key] = _new_client + litellm.in_memory_llm_clients_cache.set_cache( + key=_cache_key, + value=_new_client, + ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS, + ) return _new_client else: diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py index 020af7e90..f1b78ea63 100644 --- a/litellm/llms/custom_httpx/http_handler.py +++ b/litellm/llms/custom_httpx/http_handler.py @@ -7,6 +7,7 @@ import httpx from httpx import USE_CLIENT_DEFAULT, AsyncHTTPTransport, HTTPTransport import litellm +from litellm.caching import InMemoryCache from .types import httpxSpecialProvider @@ -26,6 +27,7 @@ headers = { # https://www.python-httpx.org/advanced/timeouts _DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0) +_DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600 # 1 hour, re-use the same httpx client for 1 hour class AsyncHTTPHandler: @@ -476,8 +478,9 @@ def get_async_httpx_client( pass _cache_key_name = "async_httpx_client" + _params_key_name + llm_provider - if _cache_key_name in litellm.in_memory_llm_clients_cache: - return litellm.in_memory_llm_clients_cache[_cache_key_name] + _cached_client = litellm.in_memory_llm_clients_cache.get_cache(_cache_key_name) + if _cached_client: + return _cached_client if params is not None: _new_client = AsyncHTTPHandler(**params) @@ -485,7 +488,11 @@ def get_async_httpx_client( _new_client = AsyncHTTPHandler( timeout=httpx.Timeout(timeout=600.0, connect=5.0) ) - litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client + litellm.in_memory_llm_clients_cache.set_cache( + key=_cache_key_name, + value=_new_client, + ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS, + ) return _new_client @@ -505,13 +512,18 @@ def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler: pass _cache_key_name = "httpx_client" + _params_key_name - if _cache_key_name in litellm.in_memory_llm_clients_cache: - return litellm.in_memory_llm_clients_cache[_cache_key_name] + _cached_client = litellm.in_memory_llm_clients_cache.get_cache(_cache_key_name) + if _cached_client: + return _cached_client if params is not None: _new_client = HTTPHandler(**params) else: _new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0)) - litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client + litellm.in_memory_llm_clients_cache.set_cache( + key=_cache_key_name, + value=_new_client, + ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS, + ) return _new_client diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py index 80295ec40..829bf6528 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_non_gemini.py @@ -14,6 +14,7 @@ from pydantic import BaseModel import litellm from litellm._logging import verbose_logger from litellm.litellm_core_utils.core_helpers import map_finish_reason +from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS from litellm.llms.prompt_templates.factory import ( convert_to_anthropic_image_obj, convert_to_gemini_tool_call_invoke, @@ -93,11 +94,15 @@ def _get_client_cache_key( def _get_client_from_cache(client_cache_key: str): - return litellm.in_memory_llm_clients_cache.get(client_cache_key, None) + return litellm.in_memory_llm_clients_cache.get_cache(client_cache_key) def _set_client_in_cache(client_cache_key: str, vertex_llm_model: Any): - litellm.in_memory_llm_clients_cache[client_cache_key] = vertex_llm_model + litellm.in_memory_llm_clients_cache.set_cache( + key=client_cache_key, + value=vertex_llm_model, + ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS, + ) def completion( # noqa: PLR0915