diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index d9d30ab78..1cbeb12f0 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -18,6 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe
     This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
-    - Rerank models: these models rerank the documents by relevance.
+    - Rerank models: these models reorder the documents by relevance.
 
 This section contains documentation for all available providers for the **inference** API.
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index e452d8157..7bd9f5918 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -1159,9 +1159,10 @@ class InferenceProvider(Protocol):
 class Inference(InferenceProvider):
     """Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents by relevance.
     """
 
     @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)