From ad52849072424a56b5c586d2964fa699dcdbada9 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Fri, 17 Oct 2025 14:51:17 -0700 Subject: [PATCH] Remove experimental from rerank models doc --- docs/docs/providers/inference/index.mdx | 4 ++-- docs/static/deprecated-llama-stack-spec.html | 2 +- docs/static/deprecated-llama-stack-spec.yaml | 4 ++-- docs/static/llama-stack-spec.html | 2 +- docs/static/llama-stack-spec.yaml | 4 ++-- docs/static/stainless-llama-stack-spec.html | 2 +- docs/static/stainless-llama-stack-spec.yaml | 4 ++-- llama_stack/apis/inference/inference.py | 2 +- llama_stack/providers/utils/inference/openai_mixin.py | 1 + 9 files changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index bc31caf5f..478611420 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -6,7 +6,7 @@ description: "Inference This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models (Experimental): these models reorder the documents based on their relevance to a query." + - Rerank models: these models reorder the documents based on their relevance to a query." sidebar_label: Inference title: Inference --- @@ -22,6 +22,6 @@ Inference This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models (Experimental): these models reorder the documents based on their relevance to a query. + - Rerank models: these models reorder the documents based on their relevance to a query. This section contains documentation for all available providers for the **inference** API. diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index ef8ec0464..f038a910c 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -13459,7 +13459,7 @@ }, { "name": "Inference", - "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models (Experimental): these models reorder the documents based on their relevance to a query.", + "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.", "x-displayName": "Inference" }, { diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 74cd3ac56..47f009635 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -10218,8 +10218,8 @@ tags: - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models (Experimental): these models reorder the documents based on - their relevance to a query. + - Rerank models: these models reorder the documents based on their relevance + to a query. x-displayName: Inference - name: Models description: '' diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 4a011de66..ca12ef485 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -13262,7 +13262,7 @@ }, { "name": "Inference", - "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models (Experimental): these models reorder the documents based on their relevance to a query.", + "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.", "x-displayName": "Inference" }, { diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 2819cde1f..066b38f9b 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -10191,8 +10191,8 @@ tags: - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models (Experimental): these models reorder the documents based on - their relevance to a query. + - Rerank models: these models reorder the documents based on their relevance + to a query. x-displayName: Inference - name: Inspect description: >- diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 29edfa6b4..563ad54d0 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -17952,7 +17952,7 @@ }, { "name": "Inference", - "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models (Experimental): these models reorder the documents based on their relevance to a query.", + "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.", "x-displayName": "Inference" }, { diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 1705ecce0..a06c20df4 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -13586,8 +13586,8 @@ tags: - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models (Experimental): these models reorder the documents based on - their relevance to a query. + - Rerank models: these models reorder the documents based on their relevance + to a query. x-displayName: Inference - name: Inspect description: >- diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index 6dc16305c..049482837 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -1237,7 +1237,7 @@ class Inference(InferenceProvider): This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models (Experimental): these models reorder the documents based on their relevance to a query. + - Rerank models: these models reorder the documents based on their relevance to a query. """ @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/llama_stack/providers/utils/inference/openai_mixin.py index adbe4dcb0..e207b1a43 100644 --- a/llama_stack/providers/utils/inference/openai_mixin.py +++ b/llama_stack/providers/utils/inference/openai_mixin.py @@ -48,6 +48,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel): - overwrite_completion_id: If True, overwrites the 'id' field in OpenAI responses - download_images: If True, downloads images and converts to base64 for providers that require it - embedding_model_metadata: A dictionary mapping model IDs to their embedding metadata + - rerank_model_list: A list of model IDs for rerank models - provider_data_api_key_field: Optional field name in provider data to look for API key - list_provider_model_ids: Method to list available models from the provider - get_extra_client_params: Method to provide extra parameters to the AsyncOpenAI client