(fix) add linting check to ban creating AsyncHTTPHandler during LLM calling (#6855)

* fix triton * fix TEXT_COMPLETION_CODESTRAL * fix REPLICATE * fix CLARIFAI * fix HUGGINGFACE * add test_no_async_http_handler_usage * fix PREDIBASE * fix anthropic use get_async_httpx_client * fix vertex fine tuning * fix dbricks get_async_httpx_client * fix get_async_httpx_client vertex * fix get_async_httpx_client * fix get_async_httpx_client * fix make_async_azure_httpx_request * fix check_for_async_http_handler * test: cleanup mistral model * add check for AsyncClient * fix check_for_async_http_handler * fix get_async_httpx_client * fix tests using in_memory_llm_clients_cache * fix langfuse import * fix import --------- Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com>
2025-04-27 11:43:54 +00:00 · 2024-11-21 19:03:02 -08:00 · 2024-11-21 19:03:02 -08:00 · 920f4c9f82
commit 920f4c9f82
parent 71ebf47cef
26 changed files with 288 additions and 62 deletions
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@ -9,7 +9,10 @@ import httpx  # type: ignore
 import requests  # type: ignore

 import litellm
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    get_async_httpx_client,
+)
 from litellm.utils import CustomStreamWrapper, ModelResponse, Usage

 from .prompt_templates.factory import custom_prompt, prompt_factory
@ -325,7 +328,7 @@ def handle_prediction_response_streaming(prediction_url, api_token, print_verbos
 async def async_handle_prediction_response_streaming(
    prediction_url, api_token, print_verbose
 ):
-    http_handler = AsyncHTTPHandler(concurrent_limit=1)
+    http_handler = get_async_httpx_client(llm_provider=litellm.LlmProviders.REPLICATE)
    previous_output = ""
    output_string = ""

@ -560,7 +563,9 @@ async def async_completion(
    logging_obj,
    print_verbose,
 ) -> Union[ModelResponse, CustomStreamWrapper]:
-    http_handler = AsyncHTTPHandler(concurrent_limit=1)
+    http_handler = get_async_httpx_client(
+        llm_provider=litellm.LlmProviders.REPLICATE,
+    )
    prediction_url = await async_start_prediction(
        version_id,
        input_data,