From d1b4e090effd61a29c7102ba9042b749e0727971 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Wed, 10 Sep 2025 11:50:55 -0700
Subject: [PATCH] Update docs to include rerank models

---
 docs/static/llama-stack-spec.html | 2 +-
 docs/static/llama-stack-spec.yaml | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index b260f01a7..8192a9cf6 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -17875,7 +17875,7 @@
         },
         {
             "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents by relevance.",
             "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
         },
         {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index ebe142557..895b939ab 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -13452,13 +13452,15 @@ tags:
     description: ''
   - name: Inference
     description: >-
-      This API provides the raw interface to the underlying models. Two kinds of models
-      are supported:
+      This API provides the raw interface to the underlying models. Three kinds of
+      models are supported:
 
       - LLM models: these models generate "raw" and "chat" (conversational) completions.
 
       - Embedding models: these models generate embeddings to be used for semantic
       search.
+
+      - Rerank models: these models reorder the documents by relevance.
     x-displayName: >-
       Llama Stack Inference API for generating completions, chat completions, and
       embeddings.