From 99b6925ad834618b42dd2b3fc704092e3fa63287 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Wed, 26 Feb 2025 23:18:34 -0600 Subject: [PATCH] feat: add nemo retriever text embedding models to nvidia inference provider (#1218) # What does this PR do? add the NeMo Retriever Embedding models from https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html --- .../remote_hosted_distro/nvidia.md | 5 ++- .../remote/inference/nvidia/models.py | 41 ++++++++++++++++++- llama_stack/templates/nvidia/run.yaml | 27 ++++++++++-- 3 files changed, 67 insertions(+), 6 deletions(-) diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index a1f70e450..20a10ba4d 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -36,7 +36,10 @@ The following models are available by default: - `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)` - `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)` - `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)` -- `baai/bge-m3 (baai/bge-m3)` +- `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)` +- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)` +- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)` +- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)` ### Prerequisite: API Keys diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py index 4305f4c6f..a855566bc 100644 --- a/llama_stack/providers/remote/inference/nvidia/models.py +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -48,12 +48,49 @@ _MODEL_ENTRIES = [ "meta/llama-3.2-90b-vision-instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), + # NeMo Retriever Text Embedding models - + # + # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html + # + # +-----------------------------------+--------+-----------+-----------+------------+ + # | Model ID | Max | Publisher | Embedding | Dynamic | + # | | Tokens | | Dimension | Embeddings | + # +-----------------------------------+--------+-----------+-----------+------------+ + # | nvidia/llama-3.2-nv-embedqa-1b-v2 | 8192 | NVIDIA | 2048 | Yes | + # | nvidia/nv-embedqa-e5-v5 | 512 | NVIDIA | 1024 | No | + # | nvidia/nv-embedqa-mistral-7b-v2 | 512 | NVIDIA | 4096 | No | + # | snowflake/arctic-embed-l | 512 | Snowflake | 1024 | No | + # +-----------------------------------+--------+-----------+-----------+------------+ ProviderModelEntry( - provider_model_id="baai/bge-m3", + provider_model_id="nvidia/llama-3.2-nv-embedqa-1b-v2", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 2048, + "context_length": 8192, + }, + ), + ProviderModelEntry( + provider_model_id="nvidia/nv-embedqa-e5-v5", model_type=ModelType.embedding, metadata={ "embedding_dimension": 1024, - "context_length": 8192, + "context_length": 512, + }, + ), + ProviderModelEntry( + provider_model_id="nvidia/nv-embedqa-mistral-7b-v2", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 4096, + "context_length": 512, + }, + ), + ProviderModelEntry( + provider_model_id="snowflake/arctic-embed-l", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 1024, + "context_length": 512, }, ), # TODO(mf): how do we handle Nemotron models? diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index 4c38ec24e..bfbad749a 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -136,11 +136,32 @@ models: provider_model_id: meta/llama-3.2-90b-vision-instruct model_type: llm - metadata: - embedding_dimension: 1024 + embedding_dimension: 2048 context_length: 8192 - model_id: baai/bge-m3 + model_id: nvidia/llama-3.2-nv-embedqa-1b-v2 provider_id: nvidia - provider_model_id: baai/bge-m3 + provider_model_id: nvidia/llama-3.2-nv-embedqa-1b-v2 + model_type: embedding +- metadata: + embedding_dimension: 1024 + context_length: 512 + model_id: nvidia/nv-embedqa-e5-v5 + provider_id: nvidia + provider_model_id: nvidia/nv-embedqa-e5-v5 + model_type: embedding +- metadata: + embedding_dimension: 4096 + context_length: 512 + model_id: nvidia/nv-embedqa-mistral-7b-v2 + provider_id: nvidia + provider_model_id: nvidia/nv-embedqa-mistral-7b-v2 + model_type: embedding +- metadata: + embedding_dimension: 1024 + context_length: 512 + model_id: snowflake/arctic-embed-l + provider_id: nvidia + provider_model_id: snowflake/arctic-embed-l model_type: embedding shields: [] vector_dbs: []