forked from phoenix-oss/llama-stack-mirror
feat: add nemo retriever text embedding models to nvidia inference provider (#1218)
# What does this PR do? add the NeMo Retriever Embedding models from https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
This commit is contained in:
parent
23b65b6cee
commit
99b6925ad8
3 changed files with 67 additions and 6 deletions
|
@ -36,7 +36,10 @@ The following models are available by default:
|
||||||
- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)`
|
- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)`
|
||||||
- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)`
|
- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)`
|
||||||
- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)`
|
- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)`
|
||||||
- `baai/bge-m3 (baai/bge-m3)`
|
- `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)`
|
||||||
|
- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)`
|
||||||
|
- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)`
|
||||||
|
- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)`
|
||||||
|
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
|
@ -48,12 +48,49 @@ _MODEL_ENTRIES = [
|
||||||
"meta/llama-3.2-90b-vision-instruct",
|
"meta/llama-3.2-90b-vision-instruct",
|
||||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||||
),
|
),
|
||||||
|
# NeMo Retriever Text Embedding models -
|
||||||
|
#
|
||||||
|
# https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
||||||
|
#
|
||||||
|
# +-----------------------------------+--------+-----------+-----------+------------+
|
||||||
|
# | Model ID | Max | Publisher | Embedding | Dynamic |
|
||||||
|
# | | Tokens | | Dimension | Embeddings |
|
||||||
|
# +-----------------------------------+--------+-----------+-----------+------------+
|
||||||
|
# | nvidia/llama-3.2-nv-embedqa-1b-v2 | 8192 | NVIDIA | 2048 | Yes |
|
||||||
|
# | nvidia/nv-embedqa-e5-v5 | 512 | NVIDIA | 1024 | No |
|
||||||
|
# | nvidia/nv-embedqa-mistral-7b-v2 | 512 | NVIDIA | 4096 | No |
|
||||||
|
# | snowflake/arctic-embed-l | 512 | Snowflake | 1024 | No |
|
||||||
|
# +-----------------------------------+--------+-----------+-----------+------------+
|
||||||
ProviderModelEntry(
|
ProviderModelEntry(
|
||||||
provider_model_id="baai/bge-m3",
|
provider_model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
|
||||||
|
model_type=ModelType.embedding,
|
||||||
|
metadata={
|
||||||
|
"embedding_dimension": 2048,
|
||||||
|
"context_length": 8192,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
ProviderModelEntry(
|
||||||
|
provider_model_id="nvidia/nv-embedqa-e5-v5",
|
||||||
model_type=ModelType.embedding,
|
model_type=ModelType.embedding,
|
||||||
metadata={
|
metadata={
|
||||||
"embedding_dimension": 1024,
|
"embedding_dimension": 1024,
|
||||||
"context_length": 8192,
|
"context_length": 512,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
ProviderModelEntry(
|
||||||
|
provider_model_id="nvidia/nv-embedqa-mistral-7b-v2",
|
||||||
|
model_type=ModelType.embedding,
|
||||||
|
metadata={
|
||||||
|
"embedding_dimension": 4096,
|
||||||
|
"context_length": 512,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
ProviderModelEntry(
|
||||||
|
provider_model_id="snowflake/arctic-embed-l",
|
||||||
|
model_type=ModelType.embedding,
|
||||||
|
metadata={
|
||||||
|
"embedding_dimension": 1024,
|
||||||
|
"context_length": 512,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
# TODO(mf): how do we handle Nemotron models?
|
# TODO(mf): how do we handle Nemotron models?
|
||||||
|
|
|
@ -136,11 +136,32 @@ models:
|
||||||
provider_model_id: meta/llama-3.2-90b-vision-instruct
|
provider_model_id: meta/llama-3.2-90b-vision-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 1024
|
embedding_dimension: 2048
|
||||||
context_length: 8192
|
context_length: 8192
|
||||||
model_id: baai/bge-m3
|
model_id: nvidia/llama-3.2-nv-embedqa-1b-v2
|
||||||
provider_id: nvidia
|
provider_id: nvidia
|
||||||
provider_model_id: baai/bge-m3
|
provider_model_id: nvidia/llama-3.2-nv-embedqa-1b-v2
|
||||||
|
model_type: embedding
|
||||||
|
- metadata:
|
||||||
|
embedding_dimension: 1024
|
||||||
|
context_length: 512
|
||||||
|
model_id: nvidia/nv-embedqa-e5-v5
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: nvidia/nv-embedqa-e5-v5
|
||||||
|
model_type: embedding
|
||||||
|
- metadata:
|
||||||
|
embedding_dimension: 4096
|
||||||
|
context_length: 512
|
||||||
|
model_id: nvidia/nv-embedqa-mistral-7b-v2
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: nvidia/nv-embedqa-mistral-7b-v2
|
||||||
|
model_type: embedding
|
||||||
|
- metadata:
|
||||||
|
embedding_dimension: 1024
|
||||||
|
context_length: 512
|
||||||
|
model_id: snowflake/arctic-embed-l
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: snowflake/arctic-embed-l
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
vector_dbs: []
|
vector_dbs: []
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue