test: Only run embedding tests for remote::nvidia

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
2025-08-12 04:50:39 +00:00 · 2025-02-27 22:31:10 -05:00 · 2025-02-27 22:31:10 -05:00 · 33cfa2fb81
commit 33cfa2fb81
parent a9f5c5bfca
1 changed files with 31 additions and 8 deletions
--- a/tests/client-sdk/inference/test_embedding.py
+++ b/tests/client-sdk/inference/test_embedding.py
@ -75,6 +75,7 @@ DUMMY_IMAGE_URL = ImageContentItem(
    image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
 )
 DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
+SUPPORTED_PROVIDERS = {"remote::nvidia"}


@pytest.mark.parametrize(
@ -88,7 +89,9 @@ DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64st
        "list[text]",
    ],
 )
-def test_embedding_text(llama_stack_client, embedding_model_id, contents):
+def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
    response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
    assert isinstance(response, EmbeddingsResponse)
    assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
@ -108,7 +111,9 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents):
    ],
 )
@pytest.mark.xfail(reason="Media is not supported")
-def test_embedding_image(llama_stack_client, embedding_model_id, contents):
+def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
    response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
    assert isinstance(response, EmbeddingsResponse)
    assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
@ -134,7 +139,11 @@ def test_embedding_image(llama_stack_client, embedding_model_id, contents):
        "short",
    ],
 )
-def test_embedding_truncation(llama_stack_client, embedding_model_id, text_truncation, contents):
+def test_embedding_truncation(
+    llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
+):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
    response = llama_stack_client.inference.embeddings(
        model_id=embedding_model_id, contents=contents, text_truncation=text_truncation
    )
@ -162,7 +171,11 @@ def test_embedding_truncation(llama_stack_client, embedding_model_id, text_trunc
        "long-str",
    ],
 )
-def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text_truncation, contents):
+def test_embedding_truncation_error(
+    llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
+):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
    with pytest.raises(BadRequestError) as excinfo:
        llama_stack_client.inference.embeddings(
            model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation
@ -170,7 +183,9 @@ def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text


@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction")
-def test_embedding_output_dimension(llama_stack_client, embedding_model_id):
+def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
    base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING])
    test_response = llama_stack_client.inference.embeddings(
        model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32
@ -180,7 +195,9 @@ def test_embedding_output_dimension(llama_stack_client, embedding_model_id):


@pytest.mark.xfail(reason="Only valid for model supporting task type")
-def test_embedding_task_type(llama_stack_client, embedding_model_id):
+def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
    query_embedding = llama_stack_client.inference.embeddings(
        model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
    )
@ -199,7 +216,9 @@ def test_embedding_task_type(llama_stack_client, embedding_model_id):
        "start",
    ],
 )
-def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation):
+def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
    response = llama_stack_client.inference.embeddings(
        model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
    )
@ -219,7 +238,11 @@ def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_
        "right",
    ],
 )
-def test_embedding_text_truncation_error(llama_stack_client, embedding_model_id, text_truncation):
+def test_embedding_text_truncation_error(
+    llama_stack_client, embedding_model_id, text_truncation, inference_provider_type
+):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
    with pytest.raises(BadRequestError) as excinfo:
        llama_stack_client.inference.embeddings(
            model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation