fix(hosted_vllm/transformation.py): return fake api key, if none give… (#7301)

* fix(hosted_vllm/transformation.py): return fake api key, if none give. Prevents httpx error

Fixes https://github.com/BerriAI/litellm/issues/7291

* test: fix test

* fix(main.py): add hosted_vllm/ support for embeddings endpoint

Closes https://github.com/BerriAI/litellm/issues/7290

* docs(vllm.md): add docs on vllm embeddings usage

* fix(__init__.py): fix sambanova model test

* fix(base_llm_unit_tests.py): skip pydantic obj test if model takes >5s to respond
This commit is contained in:
Krish Dholakia 2024-12-18 18:41:53 -08:00 committed by GitHub
parent 246e3bafc8
commit 6a45ee1ef7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 189 additions and 6 deletions

View file

@ -1004,6 +1004,28 @@ async def test_hf_embedddings_with_optional_params(sync_mode):
assert json_data["parameters"]["top_k"] == 10
def test_hosted_vllm_embedding(monkeypatch):
monkeypatch.setenv("HOSTED_VLLM_API_BASE", "http://localhost:8000")
from litellm.llms.custom_httpx.http_handler import HTTPHandler
client = HTTPHandler()
with patch.object(client, "post") as mock_post:
try:
embedding(
model="hosted_vllm/jina-embeddings-v3",
input=["Hello world"],
client=client,
)
except Exception as e:
print(e)
mock_post.assert_called_once()
json_data = json.loads(mock_post.call_args.kwargs["data"])
assert json_data["input"] == ["Hello world"]
assert json_data["model"] == "jina-embeddings-v3"
@pytest.mark.parametrize(
"model",
[