From 6824d23dc903ae16cfb4e4af299150f655a6133c Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Thu, 27 Feb 2025 22:35:52 -0500
Subject: [PATCH] test: Only run embedding tests for remote::nvidia (#1317)

This fixes release build failure
https://github.com/meta-llama/llama-stack-ops/actions/runs/13580302250/job/37964972403:

```
=================================== FAILURES ===================================
______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-None] _______
llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-none] _______
llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
_______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-None] _______
llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
_______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-none] _______
llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
_________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-NONE] _________
llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
_________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-END] __________
llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-START] _________
llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
_________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-left] _________
llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-right] _________
llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error
    with pytest.raises(BadRequestError) as excinfo:
E   Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
=========================== short test summary info ============================
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-None] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-none] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-None] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-none] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-NONE] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-END] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-START] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-left] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-right] - Failed: DID NOT RAISE <class 'llama_stack_client.BadRequestError'>
= 9 failed, 48 passed, 2 skipped, 3 deselected, 3 xfailed, 1 xpassed, 121 warnings in 90.16s (0:01:30) =
Error: Process completed with exit code 1.
```

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
---
 tests/client-sdk/inference/test_embedding.py | 39 ++++++++++++++++----
 1 file changed, 31 insertions(+), 8 deletions(-)
diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py
index 46a901d62..c46a6517f 100644
--- a/tests/client-sdk/inference/test_embedding.py
+++ b/tests/client-sdk/inference/test_embedding.py
@@ -75,6 +75,7 @@ DUMMY_IMAGE_URL = ImageContentItem(
     image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
 )
 DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
+SUPPORTED_PROVIDERS = {"remote::nvidia"}
 
 
 @pytest.mark.parametrize(
@@ -88,7 +89,9 @@ DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64st
         "list[text]",
     ],
 )
-def test_embedding_text(llama_stack_client, embedding_model_id, contents):
+def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
     assert isinstance(response, EmbeddingsResponse)
     assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
@@ -108,7 +111,9 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents):
     ],
 )
 @pytest.mark.xfail(reason="Media is not supported")
-def test_embedding_image(llama_stack_client, embedding_model_id, contents):
+def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
     assert isinstance(response, EmbeddingsResponse)
     assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
@@ -134,7 +139,11 @@ def test_embedding_image(llama_stack_client, embedding_model_id, contents):
         "short",
     ],
 )
-def test_embedding_truncation(llama_stack_client, embedding_model_id, text_truncation, contents):
+def test_embedding_truncation(
+    llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
+):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     response = llama_stack_client.inference.embeddings(
         model_id=embedding_model_id, contents=contents, text_truncation=text_truncation
     )
@@ -162,7 +171,11 @@ def test_embedding_truncation(llama_stack_client, embedding_model_id, text_trunc
         "long-str",
     ],
 )
-def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text_truncation, contents):
+def test_embedding_truncation_error(
+    llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
+):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     with pytest.raises(BadRequestError) as excinfo:
         llama_stack_client.inference.embeddings(
             model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation
@@ -170,7 +183,9 @@ def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text
 
 
 @pytest.mark.xfail(reason="Only valid for model supporting dimension reduction")
-def test_embedding_output_dimension(llama_stack_client, embedding_model_id):
+def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING])
     test_response = llama_stack_client.inference.embeddings(
         model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32
@@ -180,7 +195,9 @@ def test_embedding_output_dimension(llama_stack_client, embedding_model_id):
 
 
 @pytest.mark.xfail(reason="Only valid for model supporting task type")
-def test_embedding_task_type(llama_stack_client, embedding_model_id):
+def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     query_embedding = llama_stack_client.inference.embeddings(
         model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
     )
@@ -199,7 +216,9 @@ def test_embedding_task_type(llama_stack_client, embedding_model_id):
         "start",
     ],
 )
-def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation):
+def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     response = llama_stack_client.inference.embeddings(
         model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
     )
@@ -219,7 +238,11 @@ def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_
         "right",
     ],
 )
-def test_embedding_text_truncation_error(llama_stack_client, embedding_model_id, text_truncation):
+def test_embedding_text_truncation_error(
+    llama_stack_client, embedding_model_id, text_truncation, inference_provider_type
+):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     with pytest.raises(BadRequestError) as excinfo:
         llama_stack_client.inference.embeddings(
             model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation