(fix) add linting check to ban creating AsyncHTTPHandler during LLM calling (#6855)

* fix triton * fix TEXT_COMPLETION_CODESTRAL * fix REPLICATE * fix CLARIFAI * fix HUGGINGFACE * add test_no_async_http_handler_usage * fix PREDIBASE * fix anthropic use get_async_httpx_client * fix vertex fine tuning * fix dbricks get_async_httpx_client * fix get_async_httpx_client vertex * fix get_async_httpx_client * fix get_async_httpx_client * fix make_async_azure_httpx_request * fix check_for_async_http_handler * test: cleanup mistral model * add check for AsyncClient * fix check_for_async_http_handler * fix get_async_httpx_client * fix tests using in_memory_llm_clients_cache * fix langfuse import * fix import --------- Co-authored-by: Krrish Dholakia <krrishdholakia@gmail.com>
2025-04-27 19:54:13 +00:00 · 2024-11-21 19:03:02 -08:00 · 2024-11-21 19:03:02 -08:00 · aa6b133557
commit aa6b133557
parent 7c595e770c
26 changed files with 288 additions and 62 deletions
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@ -263,7 +263,11 @@ def get_hf_task_for_model(model: str) -> Tuple[hf_tasks, str]:
        return "text-generation-inference", model  # default to tgi


-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    get_async_httpx_client,
+)


 def get_hf_task_embedding_for_model(
@ -301,7 +305,9 @@ async def async_get_hf_task_embedding_for_model(
                    task_type, hf_tasks_embeddings
                )
            )
-    http_client = AsyncHTTPHandler(concurrent_limit=1)
+    http_client = get_async_httpx_client(
+        llm_provider=litellm.LlmProviders.HUGGINGFACE,
+    )

    model_info = await http_client.get(url=api_base)

@ -1067,7 +1073,9 @@ class Huggingface(BaseLLM):
        )
        ## COMPLETION CALL
        if client is None:
-            client = AsyncHTTPHandler(concurrent_limit=1)
+            client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders.HUGGINGFACE,
+            )

        response = await client.post(api_base, headers=headers, data=json.dumps(data))