fix(hosted_vllm/transformation.py): return fake api key, if none give… (#7301)

* fix(hosted_vllm/transformation.py): return fake api key, if none give. Prevents httpx error Fixes https://github.com/BerriAI/litellm/issues/7291 * test: fix test * fix(main.py): add hosted_vllm/ support for embeddings endpoint Closes https://github.com/BerriAI/litellm/issues/7290 * docs(vllm.md): add docs on vllm embeddings usage * fix(__init__.py): fix sambanova model test * fix(base_llm_unit_tests.py): skip pydantic obj test if model takes >5s to respond
2025-04-26 03:04:13 +00:00 · 2024-12-18 18:41:53 -08:00 · 2024-12-18 18:41:53 -08:00 · 6a45ee1ef7
commit 6a45ee1ef7
parent 246e3bafc8
9 changed files with 189 additions and 6 deletions
--- a/tests/local_testing/test_embedding.py
+++ b/tests/local_testing/test_embedding.py
@ -1004,6 +1004,28 @@ async def test_hf_embedddings_with_optional_params(sync_mode):
        assert json_data["parameters"]["top_k"] == 10


+def test_hosted_vllm_embedding(monkeypatch):
+    monkeypatch.setenv("HOSTED_VLLM_API_BASE", "http://localhost:8000")
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler
+
+    client = HTTPHandler()
+    with patch.object(client, "post") as mock_post:
+        try:
+            embedding(
+                model="hosted_vllm/jina-embeddings-v3",
+                input=["Hello world"],
+                client=client,
+            )
+        except Exception as e:
+            print(e)
+
+        mock_post.assert_called_once()
+
+        json_data = json.loads(mock_post.call_args.kwargs["data"])
+        assert json_data["input"] == ["Hello world"]
+        assert json_data["model"] == "jina-embeddings-v3"
+
+
@pytest.mark.parametrize(
    "model",
    [