Add inference providers support for Hugging Face (#8258) (#9738) (#9773)

* Add inference providers support for Hugging Face (#8258) * add first version of inference providers for huggingface * temporarily skipping tests * Add documentation * Fix titles * remove max_retries from params and clean up * add suggestions * use llm http handler * update doc * add suggestions * run formatters * add tests * revert * revert * rename file * set maxsize for lru cache * fix embeddings * fix inference url * fix tests following breaking change in main * use ChatCompletionRequest * fix tests and lint * [Hugging Face] Remove outdated chat completion tests and fix embedding tests (#9749) * remove or fix tests * fix link in doc * fix(config_settings.md): document hf api key --------- Co-authored-by: célina <hanouticelina@gmail.com>
2025-04-26 03:04:13 +00:00 · 2025-04-05 10:50:15 -07:00 · 2025-04-05 10:50:15 -07:00 · 34bdf36eab
commit 34bdf36eab
parent 0d503ad8ad
24 changed files with 2052 additions and 2456 deletions
--- a/tests/local_testing/test_embedding.py
+++ b/tests/local_testing/test_embedding.py
@ -643,8 +643,8 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler


@pytest.mark.asyncio
-@patch("litellm.llms.huggingface.chat.handler.async_get_hf_task_embedding_for_model")
-@patch("litellm.llms.huggingface.chat.handler.get_hf_task_embedding_for_model")
+@patch("litellm.llms.huggingface.embedding.handler.async_get_hf_task_embedding_for_model")
+@patch("litellm.llms.huggingface.embedding.handler.get_hf_task_embedding_for_model")
@pytest.mark.parametrize("sync_mode", [True, False])
 async def test_hf_embedding_sentence_sim(
    mock_async_get_hf_task_embedding_for_model,