From 8c9b7aa764a936b9617d5f8a82d74beb302ed835 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Fri, 5 Sep 2025 16:52:42 -0700 Subject: [PATCH] Add example documentation --- .../remote/inference/nvidia/NVIDIA.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index 625be6088..c683c7a68 100644 --- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -188,3 +188,22 @@ vlm_response = client.chat.completions.create( print(f"VLM Response: {vlm_response.choices[0].message.content}") ``` + +### Rerank Example + +The following example shows how to rerank documents using an NVIDIA NIM. + +```python +rerank_response = client.inference.rerank( + model="nvidia/llama-3.2-nv-rerankqa-1b-v2", + query="query", + items=[ + "item_1", + "item_2", + "item_3", + ], +) + +for i, result in enumerate(rerank_response.data): + print(f"{i+1}. [Index: {result.index}, Score: {result.relevance_score:.3f}]") +``` \ No newline at end of file