chore: Enabling Milvus for VectorIO CI

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-12-27 13:18:03 +00:00 · 2025-06-27 21:25:57 -04:00 · 2025-06-27 21:25:57 -04:00 · c8d41d45ec
commit c8d41d45ec
parent 709eb7da33
115 changed files with 2919 additions and 184 deletions
--- a/llama_stack/providers/registry/agents.py
+++ b/llama_stack/providers/registry/agents.py
@ -35,5 +35,6 @@ def available_providers() -> list[ProviderSpec]:
                Api.tool_runtime,
                Api.tool_groups,
            ],
+            description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
        ),
    ]
--- a/llama_stack/providers/registry/datasetio.py
+++ b/llama_stack/providers/registry/datasetio.py
@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
            module="llama_stack.providers.inline.datasetio.localfs",
            config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig",
            api_dependencies=[],
+            description="Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.",
        ),
        remote_provider_spec(
            api=Api.datasetio,
@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
                ],
                module="llama_stack.providers.remote.datasetio.huggingface",
                config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
+                description="HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.",
            ),
        ),
        remote_provider_spec(
@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
                ],
                module="llama_stack.providers.remote.datasetio.nvidia",
                config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
+                description="NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.",
            ),
        ),
    ]
--- a/llama_stack/providers/registry/eval.py
+++ b/llama_stack/providers/registry/eval.py
@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
                Api.inference,
                Api.agents,
            ],
+            description="Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.",
        ),
        remote_provider_spec(
            api=Api.eval,
@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
                ],
                module="llama_stack.providers.remote.eval.nvidia",
                config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig",
+                description="NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.",
            ),
            api_dependencies=[
                Api.datasetio,
--- a/llama_stack/providers/registry/files.py
+++ b/llama_stack/providers/registry/files.py
@ -21,5 +21,6 @@ def available_providers() -> list[ProviderSpec]:
            pip_packages=sql_store_pip_packages,
            module="llama_stack.providers.inline.files.localfs",
            config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
+            description="Local filesystem-based file storage provider for managing files and documents locally.",
        ),
    ]
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -35,6 +35,7 @@ def available_providers() -> list[ProviderSpec]:
            pip_packages=META_REFERENCE_DEPS,
            module="llama_stack.providers.inline.inference.meta_reference",
            config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
+            description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
        ),
        InlineProviderSpec(
            api=Api.inference,
@ -44,6 +45,7 @@ def available_providers() -> list[ProviderSpec]:
            ],
            module="llama_stack.providers.inline.inference.vllm",
            config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
+            description="vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.",
        ),
        InlineProviderSpec(
            api=Api.inference,
@ -54,6 +56,7 @@ def available_providers() -> list[ProviderSpec]:
            ],
            module="llama_stack.providers.inline.inference.sentence_transformers",
            config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
+            description="Sentence Transformers inference provider for text embeddings and similarity search.",
        ),
        remote_provider_spec(
            api=Api.inference,
@ -64,6 +67,7 @@ def available_providers() -> list[ProviderSpec]:
                ],
                module="llama_stack.providers.remote.inference.cerebras",
                config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
+                description="Cerebras inference provider for running models on Cerebras Cloud platform.",
            ),
        ),
        remote_provider_spec(
@ -73,6 +77,7 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
                config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
                module="llama_stack.providers.remote.inference.ollama",
+                description="Ollama inference provider for running local models through the Ollama runtime.",
            ),
        ),
        remote_provider_spec(
@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["openai"],
                module="llama_stack.providers.remote.inference.vllm",
                config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
+                description="Remote vLLM inference provider for connecting to vLLM servers.",
            ),
        ),
        remote_provider_spec(
@ -91,6 +97,7 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["huggingface_hub", "aiohttp"],
                module="llama_stack.providers.remote.inference.tgi",
                config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
+                description="Text Generation Inference (TGI) provider for HuggingFace model serving.",
            ),
        ),
        remote_provider_spec(
@ -100,6 +107,7 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["huggingface_hub", "aiohttp"],
                module="llama_stack.providers.remote.inference.tgi",
                config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
+                description="HuggingFace Inference API serverless provider for on-demand model inference.",
            ),
        ),
        remote_provider_spec(
@ -109,6 +117,7 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["huggingface_hub", "aiohttp"],
                module="llama_stack.providers.remote.inference.tgi",
                config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
+                description="HuggingFace Inference Endpoints provider for dedicated model serving.",
            ),
        ),
        remote_provider_spec(
@ -121,6 +130,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.fireworks",
                config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
                provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
+                description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.",
            ),
        ),
        remote_provider_spec(
@ -133,6 +143,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.together",
                config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
                provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
+                description="Together AI inference provider for open-source models and collaborative AI development.",
            ),
        ),
        remote_provider_spec(
@ -142,6 +153,7 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["boto3"],
                module="llama_stack.providers.remote.inference.bedrock",
                config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
+                description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.",
            ),
        ),
        remote_provider_spec(
@ -153,6 +165,7 @@ def available_providers() -> list[ProviderSpec]:
                ],
                module="llama_stack.providers.remote.inference.databricks",
                config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
+                description="Databricks inference provider for running models on Databricks' unified analytics platform.",
            ),
        ),
        remote_provider_spec(
@ -164,6 +177,7 @@ def available_providers() -> list[ProviderSpec]:
                ],
                module="llama_stack.providers.remote.inference.nvidia",
                config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
+                description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
            ),
        ),
        remote_provider_spec(
@ -173,6 +187,7 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["openai"],
                module="llama_stack.providers.remote.inference.runpod",
                config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
+                description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
            ),
        ),
        remote_provider_spec(
@ -183,6 +198,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.openai",
                config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
                provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
+                description="OpenAI inference provider for accessing GPT models and other OpenAI services.",
            ),
        ),
        remote_provider_spec(
@ -193,6 +209,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.anthropic",
                config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
                provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
+                description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.",
            ),
        ),
        remote_provider_spec(
@ -203,6 +220,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.gemini",
                config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
                provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
+                description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
            ),
        ),
        remote_provider_spec(
@ -213,6 +231,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.groq",
                config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
                provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
+                description="Groq inference provider for ultra-fast inference using Groq's LPU technology.",
            ),
        ),
        remote_provider_spec(
@ -223,6 +242,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.fireworks_openai_compat",
                config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig",
                provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator",
+                description="Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.",
            ),
        ),
        remote_provider_spec(
@ -233,6 +253,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.llama_openai_compat",
                config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
                provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
+                description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.",
            ),
        ),
        remote_provider_spec(
@ -243,6 +264,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.together_openai_compat",
                config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig",
                provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator",
+                description="Together AI OpenAI-compatible provider for using Together models with OpenAI API format.",
            ),
        ),
        remote_provider_spec(
@ -253,6 +275,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.groq_openai_compat",
                config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig",
                provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator",
+                description="Groq OpenAI-compatible provider for using Groq models with OpenAI API format.",
            ),
        ),
        remote_provider_spec(
@ -263,6 +286,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.sambanova_openai_compat",
                config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig",
                provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator",
+                description="SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.",
            ),
        ),
        remote_provider_spec(
@ -273,6 +297,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.cerebras_openai_compat",
                config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig",
                provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator",
+                description="Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.",
            ),
        ),
        remote_provider_spec(
@ -283,6 +308,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.sambanova",
                config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
                provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
+                description="SambaNova inference provider for running models on SambaNova's dataflow architecture.",
            ),
        ),
        remote_provider_spec(
@ -293,6 +319,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.passthrough",
                config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig",
                provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
+                description="Passthrough inference provider for connecting to any external inference service not directly supported.",
            ),
        ),
        remote_provider_spec(
@ -303,6 +330,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.inference.watsonx",
                config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
                provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
+                description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
            ),
        ),
    ]
--- a/llama_stack/providers/registry/post_training.py
+++ b/llama_stack/providers/registry/post_training.py
@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
                Api.datasetio,
                Api.datasets,
            ],
+            description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
        ),
        InlineProviderSpec(
            api=Api.post_training,
@ -31,6 +32,7 @@ def available_providers() -> list[ProviderSpec]:
                Api.datasetio,
                Api.datasets,
            ],
+            description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
        ),
        remote_provider_spec(
            api=Api.post_training,
@ -39,6 +41,7 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["requests", "aiohttp"],
                module="llama_stack.providers.remote.post_training.nvidia",
                config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig",
+                description="NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.",
            ),
        ),
    ]
--- a/llama_stack/providers/registry/safety.py
+++ b/llama_stack/providers/registry/safety.py
@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]:
            ],
            module="llama_stack.providers.inline.safety.prompt_guard",
            config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig",
+            description="Prompt Guard safety provider for detecting and filtering unsafe prompts and content.",
        ),
        InlineProviderSpec(
            api=Api.safety,
@ -35,6 +36,7 @@ def available_providers() -> list[ProviderSpec]:
            api_dependencies=[
                Api.inference,
            ],
+            description="Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.",
        ),
        InlineProviderSpec(
            api=Api.safety,
@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
            ],
            module="llama_stack.providers.inline.safety.code_scanner",
            config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig",
+            description="Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.",
        ),
        remote_provider_spec(
            api=Api.safety,
@ -52,6 +55,7 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["boto3"],
                module="llama_stack.providers.remote.safety.bedrock",
                config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig",
+                description="AWS Bedrock safety provider for content moderation using AWS's safety services.",
            ),
        ),
        remote_provider_spec(
@ -61,6 +65,7 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["requests"],
                module="llama_stack.providers.remote.safety.nvidia",
                config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig",
+                description="NVIDIA's safety provider for content moderation and safety filtering.",
            ),
        ),
        remote_provider_spec(
@ -71,6 +76,7 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.safety.sambanova",
                config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
                provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
+                description="SambaNova's safety provider for content moderation and safety filtering.",
            ),
        ),
    ]
--- a/llama_stack/providers/registry/scoring.py
+++ b/llama_stack/providers/registry/scoring.py
@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
                Api.datasetio,
                Api.datasets,
            ],
+            description="Basic scoring provider for simple evaluation metrics and scoring functions.",
        ),
        InlineProviderSpec(
            api=Api.scoring,
@ -32,6 +33,7 @@ def available_providers() -> list[ProviderSpec]:
                Api.datasets,
                Api.inference,
            ],
+            description="LLM-as-judge scoring provider that uses language models to evaluate and score responses.",
        ),
        InlineProviderSpec(
            api=Api.scoring,
@ -44,5 +46,6 @@ def available_providers() -> list[ProviderSpec]:
                Api.datasets,
            ],
            provider_data_validator="llama_stack.providers.inline.scoring.braintrust.BraintrustProviderDataValidator",
+            description="Braintrust scoring provider for evaluation and scoring using the Braintrust platform.",
        ),
    ]
--- a/llama_stack/providers/registry/telemetry.py
+++ b/llama_stack/providers/registry/telemetry.py
@ -24,5 +24,6 @@ def available_providers() -> list[ProviderSpec]:
            optional_api_dependencies=[Api.datasetio],
            module="llama_stack.providers.inline.telemetry.meta_reference",
            config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
+            description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
        ),
    ]
--- a/llama_stack/providers/registry/tool_runtime.py
+++ b/llama_stack/providers/registry/tool_runtime.py
@ -33,6 +33,7 @@ def available_providers() -> list[ProviderSpec]:
            module="llama_stack.providers.inline.tool_runtime.rag",
            config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
            api_dependencies=[Api.vector_io, Api.inference],
+            description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
        ),
        remote_provider_spec(
            api=Api.tool_runtime,
@ -42,6 +43,7 @@ def available_providers() -> list[ProviderSpec]:
                config_class="llama_stack.providers.remote.tool_runtime.brave_search.config.BraveSearchToolConfig",
                pip_packages=["requests"],
                provider_data_validator="llama_stack.providers.remote.tool_runtime.brave_search.BraveSearchToolProviderDataValidator",
+                description="Brave Search tool for web search capabilities with privacy-focused results.",
            ),
        ),
        remote_provider_spec(
@ -52,6 +54,7 @@ def available_providers() -> list[ProviderSpec]:
                config_class="llama_stack.providers.remote.tool_runtime.bing_search.config.BingSearchToolConfig",
                pip_packages=["requests"],
                provider_data_validator="llama_stack.providers.remote.tool_runtime.bing_search.BingSearchToolProviderDataValidator",
+                description="Bing Search tool for web search capabilities using Microsoft's search engine.",
            ),
        ),
        remote_provider_spec(
@ -62,6 +65,7 @@ def available_providers() -> list[ProviderSpec]:
                config_class="llama_stack.providers.remote.tool_runtime.tavily_search.config.TavilySearchToolConfig",
                pip_packages=["requests"],
                provider_data_validator="llama_stack.providers.remote.tool_runtime.tavily_search.TavilySearchToolProviderDataValidator",
+                description="Tavily Search tool for AI-optimized web search with structured results.",
            ),
        ),
        remote_provider_spec(
@ -72,6 +76,7 @@ def available_providers() -> list[ProviderSpec]:
                config_class="llama_stack.providers.remote.tool_runtime.wolfram_alpha.config.WolframAlphaToolConfig",
                pip_packages=["requests"],
                provider_data_validator="llama_stack.providers.remote.tool_runtime.wolfram_alpha.WolframAlphaToolProviderDataValidator",
+                description="Wolfram Alpha tool for computational knowledge and mathematical calculations.",
            ),
        ),
        remote_provider_spec(
@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]:
                config_class="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderConfig",
                pip_packages=["mcp"],
                provider_data_validator="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderDataValidator",
+                description="Model Context Protocol (MCP) tool for standardized tool calling and context management.",
            ),
        ),
    ]
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]:
            deprecation_warning="Please use the `inline::faiss` provider instead.",
            api_dependencies=[Api.inference],
            optional_api_dependencies=[Api.files],
+            description="Meta's reference implementation of a vector database.",
        ),
        InlineProviderSpec(
            api=Api.vector_io,
@ -34,6 +35,36 @@ def available_providers() -> list[ProviderSpec]:
            config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
            api_dependencies=[Api.inference],
            optional_api_dependencies=[Api.files],
+            description="""
+[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It
+allows you to store and query vectors directly in memory.
+That means you'll get fast and efficient vector retrieval.
+
+## Features
+
+- Lightweight and easy to use
+- Fully integrated with Llama Stack
+- GPU support
+
+## Usage
+
+To use Faiss in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use Faiss.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install Faiss using pip:
+
+```bash
+pip install faiss-cpu
+```
+## Documentation
+See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for
+more details about Faiss in general.
+""",
        ),
        # NOTE: sqlite-vec cannot be bundled into the container image because it does not have a
        # source distribution and the wheels are not available for all platforms.
@ -45,6 +76,204 @@ def available_providers() -> list[ProviderSpec]:
            config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
            api_dependencies=[Api.inference],
            optional_api_dependencies=[Api.files],
+            description="""
+[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It
+allows you to store and query vectors directly within an SQLite database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+
+- Lightweight and easy to use
+- Fully integrated with Llama Stacks
+- Uses disk-based storage for persistence, allowing for larger vector storage
+
+### Comparison to Faiss
+
+The choice between Faiss and sqlite-vec should be made based on the needs of your application,
+as they have different strengths.
+
+#### Choosing the Right Provider
+
+Scenario | Recommended Tool | Reason
+-- |-----------------| --
+Online Analytical Processing (OLAP) | Faiss           | Fast, in-memory searches
+Online Transaction Processing (OLTP) | sqlite-vec      | Frequent writes and reads
+Frequent writes | sqlite-vec      | Efficient disk-based storage and incremental indexing
+Large datasets | sqlite-vec      | Disk-based storage for larger vector storage
+Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, indexing, and GPU acceleration
+
+#### Empirical Example
+
+Consider the histogram below in which 10,000 randomly generated strings were inserted
+in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
+
+```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
+:alt: Comparison of SQLite-Vec and Faiss write times
+:width: 400px
+```
+
+You will notice that the average write time for `sqlite-vec` was 788ms, compared to
+47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather
+uniformly spread across the [1500, 100000] interval.
+
+Looking at each individual write in the order that the documents are inserted you'll see the increase in
+write speed as Faiss reindexes the vectors after each write.
+```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png
+:alt: Comparison of SQLite-Vec and Faiss write times
+:width: 400px
+```
+
+In comparison, the read times for Faiss was on average 10% faster than sqlite-vec.
+The modes of the two distributions highlight the differences much further where Faiss
+will likely yield faster read performance.
+
+```{image} ../../../../_static/providers/vector_io/read_time_comparison_sqlite-vec-faiss.png
+:alt: Comparison of SQLite-Vec and Faiss read times
+:width: 400px
+```
+
+## Usage
+
+To use sqlite-vec in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use SQLite-Vec.
+3. Start storing and querying vectors.
+
+The SQLite-vec provider supports three search modes:
+
+1. **Vector Search** (`mode="vector"`): Performs pure vector similarity search using the embeddings.
+2. **Keyword Search** (`mode="keyword"`): Performs full-text search using SQLite's FTS5.
+3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword search for better results. First performs keyword search to get candidate matches, then applies vector similarity search on those candidates.
+
+Example with hybrid search:
+```python
+response = await vector_io.query_chunks(
+    vector_db_id="my_db",
+    query="your query here",
+    params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
+)
+
+# Using RRF ranker
+response = await vector_io.query_chunks(
+    vector_db_id="my_db",
+    query="your query here",
+    params={
+        "mode": "hybrid",
+        "max_chunks": 3,
+        "score_threshold": 0.7,
+        "ranker": {"type": "rrf", "impact_factor": 60.0},
+    },
+)
+
+# Using weighted ranker
+response = await vector_io.query_chunks(
+    vector_db_id="my_db",
+    query="your query here",
+    params={
+        "mode": "hybrid",
+        "max_chunks": 3,
+        "score_threshold": 0.7,
+        "ranker": {"type": "weighted", "alpha": 0.7},  # 70% vector, 30% keyword
+    },
+)
+```
+
+Example with explicit vector search:
+```python
+response = await vector_io.query_chunks(
+    vector_db_id="my_db",
+    query="your query here",
+    params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
+)
+```
+
+Example with keyword search:
+```python
+response = await vector_io.query_chunks(
+    vector_db_id="my_db",
+    query="your query here",
+    params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
+)
+```
+
+## Supported Search Modes
+
+The SQLite vector store supports three search modes:
+
+1. **Vector Search** (`mode="vector"`): Uses vector similarity to find relevant chunks
+2. **Keyword Search** (`mode="keyword"`): Uses keyword matching to find relevant chunks
+3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword scores using a ranker
+
+### Hybrid Search
+
+Hybrid search combines the strengths of both vector and keyword search by:
+- Computing vector similarity scores
+- Computing keyword match scores
+- Using a ranker to combine these scores
+
+Two ranker types are supported:
+
+1. **RRF (Reciprocal Rank Fusion)**:
+   - Combines ranks from both vector and keyword results
+   - Uses an impact factor (default: 60.0) to control the weight of higher-ranked results
+   - Good for balancing between vector and keyword results
+   - The default impact factor of 60.0 comes from the original RRF paper by Cormack et al. (2009) [^1], which found this value to provide optimal performance across various retrieval tasks
+
+2. **Weighted**:
+   - Linearly combines normalized vector and keyword scores
+   - Uses an alpha parameter (0-1) to control the blend:
+     - alpha=0: Only use keyword scores
+     - alpha=1: Only use vector scores
+     - alpha=0.5: Equal weight to both (default)
+
+Example using RAGQueryConfig with different search modes:
+
+```python
+from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+
+# Vector search
+config = RAGQueryConfig(mode="vector", max_chunks=5)
+
+# Keyword search
+config = RAGQueryConfig(mode="keyword", max_chunks=5)
+
+# Hybrid search with custom RRF ranker
+config = RAGQueryConfig(
+    mode="hybrid",
+    max_chunks=5,
+    ranker=RRFRanker(impact_factor=50.0),  # Custom impact factor
+)
+
+# Hybrid search with weighted ranker
+config = RAGQueryConfig(
+    mode="hybrid",
+    max_chunks=5,
+    ranker=WeightedRanker(alpha=0.7),  # 70% vector, 30% keyword
+)
+
+# Hybrid search with default RRF ranker
+config = RAGQueryConfig(
+    mode="hybrid", max_chunks=5
+)  # Will use RRF with impact_factor=60.0
+```
+
+Note: The ranker configuration is only used in hybrid mode. For vector or keyword modes, the ranker parameter is ignored.
+
+## Installation
+
+You can install SQLite-Vec using pip:
+
+```bash
+pip install sqlite-vec
+```
+
+## Documentation
+
+See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
+
+[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
+""",
        ),
        InlineProviderSpec(
            api=Api.vector_io,
@ -55,6 +284,9 @@ def available_providers() -> list[ProviderSpec]:
            deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
            api_dependencies=[Api.inference],
            optional_api_dependencies=[Api.files],
+            description="""
+Please refer to the sqlite-vec provider documentation.
+""",
        ),
        remote_provider_spec(
            Api.vector_io,
@ -63,6 +295,39 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["chromadb-client"],
                module="llama_stack.providers.remote.vector_io.chroma",
                config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig",
+                description="""
+[Chroma](https://www.trychroma.com/) is an inline and remote vector
+database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+Chroma supports:
+- Store embeddings and their metadata
+- Vector search
+- Full-text search
+- Document storage
+- Metadata filtering
+- Multi-modal retrieval
+
+## Usage
+
+To use Chrome in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use chroma.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install chroma using pip:
+
+```bash
+pip install chromadb
+```
+
+## Documentation
+See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
+""",
            ),
            api_dependencies=[Api.inference],
        ),
@ -73,6 +338,40 @@ def available_providers() -> list[ProviderSpec]:
            module="llama_stack.providers.inline.vector_io.chroma",
            config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
            api_dependencies=[Api.inference],
+            description="""
+[Chroma](https://www.trychroma.com/) is an inline and remote vector
+database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+Chroma supports:
+- Store embeddings and their metadata
+- Vector search
+- Full-text search
+- Document storage
+- Metadata filtering
+- Multi-modal retrieval
+
+## Usage
+
+To use Chrome in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use chroma.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install chroma using pip:
+
+```bash
+pip install chromadb
+```
+
+## Documentation
+See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
+
+""",
        ),
        remote_provider_spec(
            Api.vector_io,
@ -81,6 +380,34 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["psycopg2-binary"],
                module="llama_stack.providers.remote.vector_io.pgvector",
                config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig",
+                description="""
+[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It
+allows you to store and query vectors directly in memory.
+That means you'll get fast and efficient vector retrieval.
+
+## Features
+
+- Easy to use
+- Fully integrated with Llama Stack
+
+## Usage
+
+To use PGVector in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use Faiss.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install PGVector using docker:
+
+```bash
+docker pull pgvector/pgvector:pg17
+```
+## Documentation
+See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
+""",
            ),
            api_dependencies=[Api.inference],
        ),
@ -92,6 +419,36 @@ def available_providers() -> list[ProviderSpec]:
                module="llama_stack.providers.remote.vector_io.weaviate",
                config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig",
                provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
+                description="""
+[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack.
+It allows you to store and query vectors directly within a Weaviate database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+Weaviate supports:
+- Store embeddings and their metadata
+- Vector search
+- Full-text search
+- Hybrid search
+- Document storage
+- Metadata filtering
+- Multi-modal retrieval
+
+## Usage
+
+To use Weaviate in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use chroma.
+3. Start storing and querying vectors.
+
+## Installation
+
+To install Weaviate see the [Weaviate quickstart documentation](https://weaviate.io/developers/weaviate/quickstart).
+
+## Documentation
+See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
+""",
            ),
            api_dependencies=[Api.inference],
        ),
@ -102,6 +459,49 @@ def available_providers() -> list[ProviderSpec]:
            module="llama_stack.providers.inline.vector_io.qdrant",
            config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
            api_dependencies=[Api.inference],
+            description=r"""
+[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
+allows you to store and query vectors directly in memory.
+That means you'll get fast and efficient vector retrieval.
+
+> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in
+> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative.
+>
+> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\]
+
+
+
+## Features
+
+- Lightweight and easy to use
+- Fully integrated with Llama Stack
+- Apache 2.0 license terms
+- Store embeddings and their metadata
+- Supports search by
+  [Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/)
+  and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search
+- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/)
+- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/)
+- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/)
+
+## Usage
+
+To use Qdrant in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use Qdrant.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install Qdrant using docker:
+
+```bash
+docker pull qdrant/qdrant
+```
+## Documentation
+See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general.
+""",
        ),
        remote_provider_spec(
            Api.vector_io,
@ -110,6 +510,9 @@ def available_providers() -> list[ProviderSpec]:
                pip_packages=["qdrant-client"],
                module="llama_stack.providers.remote.vector_io.qdrant",
                config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig",
+                description="""
+Please refer to the inline provider documentation.
+""",
            ),
            api_dependencies=[Api.inference],
        ),
@ -117,9 +520,113 @@ def available_providers() -> list[ProviderSpec]:
            Api.vector_io,
            AdapterSpec(
                adapter_type="milvus",
-                pip_packages=["pymilvus"],
+                pip_packages=["pymilvus[marshmallow<3.13.0]"],
                module="llama_stack.providers.remote.vector_io.milvus",
                config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig",
+                description="""
+[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
+allows you to store and query vectors directly within a Milvus database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+
+- Easy to use
+- Fully integrated with Llama Stack
+
+## Usage
+
+To use Milvus in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use Milvus.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install Milvus using pymilvus:
+
+```bash
+pip install pymilvus
+```
+
+## Configuration
+
+In Llama Stack, Milvus can be configured in two ways:
+- **Inline (Local) Configuration** - Uses Milvus-Lite for local storage
+- **Remote Configuration** - Connects to a remote Milvus server
+
+### Inline (Local) Configuration
+
+The simplest method is local configuration, which requires setting `db_path`, a path for locally storing Milvus-Lite files:
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: inline::milvus
+    config:
+      db_path: ~/.llama/distributions/together/milvus_store.db
+```
+
+### Remote Configuration
+
+Remote configuration is suitable for larger data storage requirements:
+
+#### Standard Remote Connection
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: remote::milvus
+    config:
+      uri: "http://<host>:<port>"
+      token: "<user>:<password>"
+```
+
+#### TLS-Enabled Remote Connection (One-way TLS)
+
+For connections to Milvus instances with one-way TLS enabled:
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: remote::milvus
+    config:
+      uri: "https://<host>:<port>"
+      token: "<user>:<password>"
+      secure: True
+      server_pem_path: "/path/to/server.pem"
+```
+
+#### Mutual TLS (mTLS) Remote Connection
+
+For connections to Milvus instances with mutual TLS (mTLS) enabled:
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: remote::milvus
+    config:
+      uri: "https://<host>:<port>"
+      token: "<user>:<password>"
+      secure: True
+      ca_pem_path: "/path/to/ca.pem"
+      client_pem_path: "/path/to/client.pem"
+      client_key_path: "/path/to/client.key"
+```
+
+#### Key Parameters for TLS Configuration
+
+- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
+- **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS).
+- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
+- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
+- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
+
+## Documentation
+See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
+
+For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
+""",
            ),
            api_dependencies=[Api.inference],
        ),
@ -131,5 +638,8 @@ def available_providers() -> list[ProviderSpec]:
            config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
            api_dependencies=[Api.inference],
            optional_api_dependencies=[Api.files],
+            description="""
+Please refer to the remote provider documentation.
+""",
        ),
    ]