feat: Add rerank API for NVIDIA Inference Provider (#3329)

# What does this PR do?
Add rerank API for NVIDIA Inference Provider.

<!-- If resolving an issue, uncomment and update the line below -->
Closes #3278 

## Test Plan
Unit test:
```
pytest tests/unit/providers/nvidia/test_rerank_inference.py
```

Integration test: 
```
pytest -s -v tests/integration/inference/test_rerank.py   --stack-config="inference=nvidia"   --rerank-model=nvidia/nvidia/nv-rerankqa-mistral-4b-v3   --env NVIDIA_API_KEY=""   --env NVIDIA_BASE_URL="https://integrate.api.nvidia.com"
```
This commit is contained in:
Jiayi Ni 2025-10-30 21:42:09 -07:00 committed by GitHub
parent c396de57a4
commit fa7699d2c3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 622 additions and 1 deletions

View file

@ -28,6 +28,7 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
Attributes:
url (str): A base url for accessing the NVIDIA NIM, e.g. http://localhost:8000
api_key (str): The access key for the hosted NIM endpoints
rerank_model_to_url (dict[str, str]): Mapping of rerank model identifiers to their API endpoints
There are two ways to access NVIDIA NIMs -
0. Hosted: Preview APIs hosted at https://integrate.api.nvidia.com
@ -55,6 +56,14 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
)
rerank_model_to_url: dict[str, str] = Field(
default_factory=lambda: {
"nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
"nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking",
"nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
},
description="Mapping of rerank model identifiers to their API endpoints. ",
)
@classmethod
def sample_run_config(