feat(huggingface_restapi.py): Support multiple hf embedding types + async hf embeddings

Closes https://github.com/BerriAI/litellm/issues/3261
This commit is contained in:
Krrish Dholakia 2024-07-30 13:32:03 -07:00
parent 3449e51796
commit 17ac0f0636
3 changed files with 332 additions and 59 deletions

View file

@ -3114,6 +3114,7 @@ async def aembedding(*args, **kwargs) -> EmbeddingResponse:
or custom_llm_provider == "vertex_ai"
or custom_llm_provider == "databricks"
or custom_llm_provider == "watsonx"
or custom_llm_provider == "huggingface"
): # currently implemented aiohttp calls for just azure and openai, soon all.
# Await normally
init_response = await loop.run_in_executor(None, func_with_context)
@ -3450,7 +3451,7 @@ def embedding(
or litellm.huggingface_key
or get_secret("HUGGINGFACE_API_KEY")
or litellm.api_key
)
) # type: ignore
response = huggingface.embedding(
model=model,
input=input,
@ -3459,6 +3460,9 @@ def embedding(
api_base=api_base,
logging_obj=logging,
model_response=EmbeddingResponse(),
optional_params=optional_params,
client=client,
aembedding=aembedding,
)
elif custom_llm_provider == "bedrock":
response = bedrock.embedding(