From 78375889ec474f9f1916a0e1163b7ef9b2b5eba5 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Wed, 10 Sep 2025 11:39:39 -0700 Subject: [PATCH] Update index.md --- docs/docs/providers/inference/index.mdx | 2 +- llama_stack/apis/inference/inference.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index d9d30ab78..1cbeb12f0 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -18,6 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models: these models rerank the documents by relevance. + - Rerank models: these models reorder the documents by relevance. This section contains documentation for all available providers for the **inference** API. diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index e452d8157..7bd9f5918 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -1159,9 +1159,10 @@ class InferenceProvider(Protocol): class Inference(InferenceProvider): """Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: + This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models reorder the documents by relevance. """ @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)