From 78375889ec474f9f1916a0e1163b7ef9b2b5eba5 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Wed, 10 Sep 2025 11:39:39 -0700
Subject: [PATCH] Update index.md

---
 docs/docs/providers/inference/index.mdx | 2 +-
 llama_stack/apis/inference/inference.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index d9d30ab78..1cbeb12f0 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -18,6 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe
     This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
-    - Rerank models: these models rerank the documents by relevance.
+    - Rerank models: these models reorder the documents by relevance.
 
 This section contains documentation for all available providers for the **inference** API.
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index e452d8157..7bd9f5918 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -1159,9 +1159,10 @@ class InferenceProvider(Protocol):
 class Inference(InferenceProvider):
     """Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents by relevance.
     """
 
     @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)