diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index d9d30ab78..1cbeb12f0 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -18,6 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models: these models rerank the documents by relevance. + - Rerank models: these models reorder the documents by relevance. This section contains documentation for all available providers for the **inference** API. diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index e452d8157..7bd9f5918 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -1159,9 +1159,10 @@ class InferenceProvider(Protocol): class Inference(InferenceProvider): """Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: + This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models reorder the documents by relevance. """ @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)