feat: add nemo retriever text embedding models to nvidia inference provider (#1218)

# What does this PR do?

add the NeMo Retriever Embedding models from
https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
This commit is contained in:
Matthew Farrellee 2025-02-26 23:18:34 -06:00 committed by GitHub
parent 23b65b6cee
commit 99b6925ad8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 67 additions and 6 deletions

View file

@ -36,7 +36,10 @@ The following models are available by default:
- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)` - `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)` - `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)` - `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)`
- `baai/bge-m3 (baai/bge-m3)` - `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)`
- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)`
- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)`
- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)`
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -48,12 +48,49 @@ _MODEL_ENTRIES = [
"meta/llama-3.2-90b-vision-instruct", "meta/llama-3.2-90b-vision-instruct",
CoreModelId.llama3_2_90b_vision_instruct.value, CoreModelId.llama3_2_90b_vision_instruct.value,
), ),
# NeMo Retriever Text Embedding models -
#
# https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
#
# +-----------------------------------+--------+-----------+-----------+------------+
# | Model ID | Max | Publisher | Embedding | Dynamic |
# | | Tokens | | Dimension | Embeddings |
# +-----------------------------------+--------+-----------+-----------+------------+
# | nvidia/llama-3.2-nv-embedqa-1b-v2 | 8192 | NVIDIA | 2048 | Yes |
# | nvidia/nv-embedqa-e5-v5 | 512 | NVIDIA | 1024 | No |
# | nvidia/nv-embedqa-mistral-7b-v2 | 512 | NVIDIA | 4096 | No |
# | snowflake/arctic-embed-l | 512 | Snowflake | 1024 | No |
# +-----------------------------------+--------+-----------+-----------+------------+
ProviderModelEntry( ProviderModelEntry(
provider_model_id="baai/bge-m3", provider_model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 2048,
"context_length": 8192,
},
),
ProviderModelEntry(
provider_model_id="nvidia/nv-embedqa-e5-v5",
model_type=ModelType.embedding, model_type=ModelType.embedding,
metadata={ metadata={
"embedding_dimension": 1024, "embedding_dimension": 1024,
"context_length": 8192, "context_length": 512,
},
),
ProviderModelEntry(
provider_model_id="nvidia/nv-embedqa-mistral-7b-v2",
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 4096,
"context_length": 512,
},
),
ProviderModelEntry(
provider_model_id="snowflake/arctic-embed-l",
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 1024,
"context_length": 512,
}, },
), ),
# TODO(mf): how do we handle Nemotron models? # TODO(mf): how do we handle Nemotron models?

View file

@ -136,11 +136,32 @@ models:
provider_model_id: meta/llama-3.2-90b-vision-instruct provider_model_id: meta/llama-3.2-90b-vision-instruct
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 1024 embedding_dimension: 2048
context_length: 8192 context_length: 8192
model_id: baai/bge-m3 model_id: nvidia/llama-3.2-nv-embedqa-1b-v2
provider_id: nvidia provider_id: nvidia
provider_model_id: baai/bge-m3 provider_model_id: nvidia/llama-3.2-nv-embedqa-1b-v2
model_type: embedding
- metadata:
embedding_dimension: 1024
context_length: 512
model_id: nvidia/nv-embedqa-e5-v5
provider_id: nvidia
provider_model_id: nvidia/nv-embedqa-e5-v5
model_type: embedding
- metadata:
embedding_dimension: 4096
context_length: 512
model_id: nvidia/nv-embedqa-mistral-7b-v2
provider_id: nvidia
provider_model_id: nvidia/nv-embedqa-mistral-7b-v2
model_type: embedding
- metadata:
embedding_dimension: 1024
context_length: 512
model_id: snowflake/arctic-embed-l
provider_id: nvidia
provider_model_id: snowflake/arctic-embed-l
model_type: embedding model_type: embedding
shields: [] shields: []
vector_dbs: [] vector_dbs: []