feat(huggingface_restapi.py): Support multiple hf embedding types + async hf embeddings

Closes https://github.com/BerriAI/litellm/issues/3261
2025-04-26 11:14:04 +00:00 · 2024-07-30 13:32:03 -07:00 · 2024-07-30 13:32:03 -07:00 · 17ac0f0636
commit 17ac0f0636
parent 3449e51796
3 changed files with 332 additions and 59 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -3114,6 +3114,7 @@ async def aembedding(*args, **kwargs) -> EmbeddingResponse:
            or custom_llm_provider == "vertex_ai"
            or custom_llm_provider == "databricks"
            or custom_llm_provider == "watsonx"
+            or custom_llm_provider == "huggingface"
        ):  # currently implemented aiohttp calls for just azure and openai, soon all.
            # Await normally
            init_response = await loop.run_in_executor(None, func_with_context)
@ -3450,7 +3451,7 @@ def embedding(
                or litellm.huggingface_key
                or get_secret("HUGGINGFACE_API_KEY")
                or litellm.api_key
-            )
+            )  # type: ignore
            response = huggingface.embedding(
                model=model,
                input=input,
@ -3459,6 +3460,9 @@ def embedding(
                api_base=api_base,
                logging_obj=logging,
                model_response=EmbeddingResponse(),
+                optional_params=optional_params,
+                client=client,
+                aembedding=aembedding,
            )
        elif custom_llm_provider == "bedrock":
            response = bedrock.embedding(