diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 7052cfb57..ec4cba742 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -7,7 +7,7 @@ import warnings from collections.abc import AsyncIterator -from openai import NOT_GIVEN, APIConnectionError, BadRequestError +from openai import NOT_GIVEN, APIConnectionError from llama_stack.apis.common.content_types import ( InterleavedContent, @@ -197,15 +197,11 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper): } extra_body["input_type"] = task_type_options[task_type] - try: - response = await self.client.embeddings.create( - model=provider_model_id, - input=input, - extra_body=extra_body, - ) - except BadRequestError as e: - raise ValueError(f"Failed to get embeddings: {e}") from e - + response = await self.client.embeddings.create( + model=provider_model_id, + input=input, + extra_body=extra_body, + ) # # OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=list[float], ...)], ...) # -> diff --git a/tests/integration/inference/test_embedding.py b/tests/integration/inference/test_embedding.py index 44fad7913..e592a6b14 100644 --- a/tests/integration/inference/test_embedding.py +++ b/tests/integration/inference/test_embedding.py @@ -55,6 +55,7 @@ # import pytest +from llama_stack_client import BadRequestError as LlamaStackBadRequestError from llama_stack_client.types import EmbeddingsResponse from llama_stack_client.types.shared.interleaved_content import ( ImageContentItem, @@ -62,6 +63,9 @@ from llama_stack_client.types.shared.interleaved_content import ( ImageContentItemImageURL, TextContentItem, ) +from openai import BadRequestError as OpenAIBadRequestError + +from llama_stack.core.library_client import LlamaStackAsLibraryClient DUMMY_STRING = "hello" DUMMY_STRING2 = "world" @@ -204,8 +208,12 @@ def test_embedding_truncation_error( pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") # Using LlamaStackClient from llama_stack_client will raise llama_stack_client.BadRequestError # While using LlamaStackAsLibraryClient from llama_stack.distribution.library_client will raise the error that the backend raises - # Here we are using the LlamaStackAsLibraryClient, so the error raised is the same as what the backend raises - with pytest.raises(ValueError): + error_type = ( + OpenAIBadRequestError + if isinstance(llama_stack_client, LlamaStackAsLibraryClient) + else LlamaStackBadRequestError + ) + with pytest.raises(error_type): llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], @@ -285,7 +293,8 @@ def test_embedding_text_truncation_error( ): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - with pytest.raises(ValueError): + error_type = ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError + with pytest.raises(error_type): llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_STRING],