diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 7052cfb57..ec4cba742 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -7,7 +7,7 @@ import warnings from collections.abc import AsyncIterator -from openai import NOT_GIVEN, APIConnectionError, BadRequestError +from openai import NOT_GIVEN, APIConnectionError from llama_stack.apis.common.content_types import ( InterleavedContent, @@ -197,15 +197,11 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference, ModelRegistryHelper): } extra_body["input_type"] = task_type_options[task_type] - try: - response = await self.client.embeddings.create( - model=provider_model_id, - input=input, - extra_body=extra_body, - ) - except BadRequestError as e: - raise ValueError(f"Failed to get embeddings: {e}") from e - + response = await self.client.embeddings.create( + model=provider_model_id, + input=input, + extra_body=extra_body, + ) # # OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=list[float], ...)], ...) # -> diff --git a/tests/integration/inference/test_embedding.py b/tests/integration/inference/test_embedding.py index 075f927f7..e592a6b14 100644 --- a/tests/integration/inference/test_embedding.py +++ b/tests/integration/inference/test_embedding.py @@ -55,7 +55,7 @@ # import pytest -from llama_stack_client import BadRequestError +from llama_stack_client import BadRequestError as LlamaStackBadRequestError from llama_stack_client.types import EmbeddingsResponse from llama_stack_client.types.shared.interleaved_content import ( ImageContentItem, @@ -63,6 +63,9 @@ from llama_stack_client.types.shared.interleaved_content import ( ImageContentItemImageURL, TextContentItem, ) +from openai import BadRequestError as OpenAIBadRequestError + +from llama_stack.core.library_client import LlamaStackAsLibraryClient DUMMY_STRING = "hello" DUMMY_STRING2 = "world" @@ -203,7 +206,14 @@ def test_embedding_truncation_error( ): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - with pytest.raises(BadRequestError): + # Using LlamaStackClient from llama_stack_client will raise llama_stack_client.BadRequestError + # While using LlamaStackAsLibraryClient from llama_stack.distribution.library_client will raise the error that the backend raises + error_type = ( + OpenAIBadRequestError + if isinstance(llama_stack_client, LlamaStackAsLibraryClient) + else LlamaStackBadRequestError + ) + with pytest.raises(error_type): llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], @@ -283,7 +293,8 @@ def test_embedding_text_truncation_error( ): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - with pytest.raises(BadRequestError): + error_type = ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError + with pytest.raises(error_type): llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_STRING],