diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index a505a1b93..56d13a09a 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -41,9 +41,11 @@ def get_distribution_template() -> DistributionTemplate: core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} default_models = [ ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], + model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, provider_model_id=m.provider_model_id, provider_id="nvidia", + model_type=m.model_type, + metadata=m.metadata, ) for m in _MODEL_ENTRIES ] diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py index 6b0ddd74d..9b807341e 100644 --- a/tests/client-sdk/inference/test_embedding.py +++ b/tests/client-sdk/inference/test_embedding.py @@ -45,16 +45,14 @@ # import pytest - from llama_stack_client.types import EmbeddingsResponse from llama_stack_client.types.shared.interleaved_content import ( - TextContentItem, + URL, ImageContentItem, ImageContentItemImage, - URL, + TextContentItem, ) - DUMMY_STRING = "hello" DUMMY_STRING2 = "world" DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")