test: Only run embedding tests for remote::nvidia (#1317)

This fixes release build failure
3796497240:

```
=================================== FAILURES ===================================
______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-None] _______
llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-none] _______
llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
_______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-None] _______
llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
_______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-none] _______
llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
_________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-NONE] _________
llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
_________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-END] __________
llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-START] _________
llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
_________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-left] _________
llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-right] _________
llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
=========================== short test summary info ============================
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-None] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-none] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-None] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-none] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-NONE] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-END] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-START] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-left] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-right] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
= 9 failed, 48 passed, 2 skipped, 3 deselected, 3 xfailed, 1 xpassed, 121 warnings in 90.16s (0:01:30) =
Error: Process completed with exit code 1.
```

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
Yuan Tang 2025-02-27 22:35:52 -05:00 committed by GitHub
parent a9f5c5bfca
commit 6824d23dc9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -75,6 +75,7 @@ DUMMY_IMAGE_URL = ImageContentItem(
image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image" image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
) )
DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image") DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
SUPPORTED_PROVIDERS = {"remote::nvidia"}
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -88,7 +89,9 @@ DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64st
"list[text]", "list[text]",
], ],
) )
def test_embedding_text(llama_stack_client, embedding_model_id, contents): def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
assert isinstance(response, EmbeddingsResponse) assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
@ -108,7 +111,9 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents):
], ],
) )
@pytest.mark.xfail(reason="Media is not supported") @pytest.mark.xfail(reason="Media is not supported")
def test_embedding_image(llama_stack_client, embedding_model_id, contents): def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
assert isinstance(response, EmbeddingsResponse) assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
@ -134,7 +139,11 @@ def test_embedding_image(llama_stack_client, embedding_model_id, contents):
"short", "short",
], ],
) )
def test_embedding_truncation(llama_stack_client, embedding_model_id, text_truncation, contents): def test_embedding_truncation(
llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings( response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=contents, text_truncation=text_truncation model_id=embedding_model_id, contents=contents, text_truncation=text_truncation
) )
@ -162,7 +171,11 @@ def test_embedding_truncation(llama_stack_client, embedding_model_id, text_trunc
"long-str", "long-str",
], ],
) )
def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text_truncation, contents): def test_embedding_truncation_error(
llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
with pytest.raises(BadRequestError) as excinfo: with pytest.raises(BadRequestError) as excinfo:
llama_stack_client.inference.embeddings( llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation
@ -170,7 +183,9 @@ def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text
@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction") @pytest.mark.xfail(reason="Only valid for model supporting dimension reduction")
def test_embedding_output_dimension(llama_stack_client, embedding_model_id): def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING]) base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING])
test_response = llama_stack_client.inference.embeddings( test_response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32 model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32
@ -180,7 +195,9 @@ def test_embedding_output_dimension(llama_stack_client, embedding_model_id):
@pytest.mark.xfail(reason="Only valid for model supporting task type") @pytest.mark.xfail(reason="Only valid for model supporting task type")
def test_embedding_task_type(llama_stack_client, embedding_model_id): def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
query_embedding = llama_stack_client.inference.embeddings( query_embedding = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query" model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
) )
@ -199,7 +216,9 @@ def test_embedding_task_type(llama_stack_client, embedding_model_id):
"start", "start",
], ],
) )
def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation): def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings( response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
) )
@ -219,7 +238,11 @@ def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_
"right", "right",
], ],
) )
def test_embedding_text_truncation_error(llama_stack_client, embedding_model_id, text_truncation): def test_embedding_text_truncation_error(
llama_stack_client, embedding_model_id, text_truncation, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
with pytest.raises(BadRequestError) as excinfo: with pytest.raises(BadRequestError) as excinfo:
llama_stack_client.inference.embeddings( llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation