diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index cc316541a..ebbadefa6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,7 +14,7 @@ repos:
     -   id: check-added-large-files
         args: ['--maxkb=1000']
     -   id: end-of-file-fixer
-        exclude: '^(.*\.svg)$'
+        exclude: '^(.*\.svg|.*\.md)$'
     -   id: no-commit-to-branch
     -   id: check-yaml
         args: ["--unsafe"]
@@ -95,6 +95,15 @@ repos:
         pass_filenames: false
         require_serial: true
         files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
+      - id: provider-codegen
+        name: Provider Codegen
+        additional_dependencies:
+          - uv==0.7.8
+        entry: uv run --group codegen ./scripts/provider_codegen.py
+        language: python
+        pass_filenames: false
+        require_serial: true
+        files: ^llama_stack/providers/.*$
       - id: openapi-codegen
         name: API Spec Codegen
         additional_dependencies:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index caabf1af8..b9b25cedf 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -139,6 +139,8 @@ uv sync
   justification for bypassing the check.
 * Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or
   readability reasons.
+* Providers configuration class should be Pydantic Field class. It should have a `description` field
+  that describes the configuration. These descriptions will be used to generate the provider documentation.
 
 ## Common Tasks
 
@@ -157,10 +159,19 @@ cd llama-stack
 LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...>
 ```
 
+### Updating distribution configurations
 
-### Updating Provider Configurations
+If you have made changes to a provider's configuration in any form (introducing a new config key, or
+changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML
+files as well as the documentation. You should not change `docs/source/.../distributions/` files
+manually as they are auto-generated.
 
-If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated.
+### Updating the provider documentation
+
+If you have made changes to a provider's configuration, you should run `./scripts/distro_codegen.py`
+to re-generate the documentation. You should not change `docs/source/.../providers/` files manually
+as they are auto-generated.
+Note that the provider "description" field will be used to generate the provider documentation.
 
 ### Building the Documentation
 
diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md
index ee7cdd4a9..8382758cc 100644
--- a/docs/source/getting_started/index.md
+++ b/docs/source/getting_started/index.md
@@ -6,7 +6,7 @@ Llama Stack is a stateful service with REST APIs to support the seamless transit
 environments. You can build and test using a local server first and deploy to a hosted endpoint for production.
 
 In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
-as the inference [provider](../providers/index.md#inference) for a Llama Model.
+as the inference [provider](../providers/inference/index) for a Llama Model.
 
 #### Step 1: Install and setup
 1. Install [uv](https://docs.astral.sh/uv/)
diff --git a/docs/source/providers/agents/index.md b/docs/source/providers/agents/index.md
new file mode 100644
index 000000000..ebc134ce9
--- /dev/null
+++ b/docs/source/providers/agents/index.md
@@ -0,0 +1,5 @@
+# Agents Providers
+
+This section contains documentation for all available providers for the **agents** API.
+
+- [inline::meta-reference](inline_meta-reference.md)
\ No newline at end of file
diff --git a/docs/source/providers/agents/inline_meta-reference.md b/docs/source/providers/agents/inline_meta-reference.md
new file mode 100644
index 000000000..cfc0c6881
--- /dev/null
+++ b/docs/source/providers/agents/inline_meta-reference.md
@@ -0,0 +1,26 @@
+# inline::meta-reference
+
+## Description
+
+Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite |  |
+
+## Sample Configuration
+
+```yaml
+persistence_store:
+  type: sqlite
+  namespace: null
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
+responses_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db
+
+```
+
diff --git a/docs/source/providers/datasetio/index.md b/docs/source/providers/datasetio/index.md
new file mode 100644
index 000000000..726bc75b8
--- /dev/null
+++ b/docs/source/providers/datasetio/index.md
@@ -0,0 +1,7 @@
+# Datasetio Providers
+
+This section contains documentation for all available providers for the **datasetio** API.
+
+- [inline::localfs](inline_localfs.md)
+- [remote::huggingface](remote_huggingface.md)
+- [remote::nvidia](remote_nvidia.md)
\ No newline at end of file
diff --git a/docs/source/providers/datasetio/inline_localfs.md b/docs/source/providers/datasetio/inline_localfs.md
new file mode 100644
index 000000000..fbe4c40e3
--- /dev/null
+++ b/docs/source/providers/datasetio/inline_localfs.md
@@ -0,0 +1,22 @@
+# inline::localfs
+
+## Description
+
+Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+
+## Sample Configuration
+
+```yaml
+kvstore:
+  type: sqlite
+  namespace: null
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
+
+```
+
diff --git a/docs/source/providers/datasetio/remote_huggingface.md b/docs/source/providers/datasetio/remote_huggingface.md
new file mode 100644
index 000000000..e2052602e
--- /dev/null
+++ b/docs/source/providers/datasetio/remote_huggingface.md
@@ -0,0 +1,22 @@
+# remote::huggingface
+
+## Description
+
+HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+
+## Sample Configuration
+
+```yaml
+kvstore:
+  type: sqlite
+  namespace: null
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
+
+```
+
diff --git a/docs/source/providers/datasetio/remote_nvidia.md b/docs/source/providers/datasetio/remote_nvidia.md
new file mode 100644
index 000000000..b5a672a54
--- /dev/null
+++ b/docs/source/providers/datasetio/remote_nvidia.md
@@ -0,0 +1,25 @@
+# remote::nvidia
+
+## Description
+
+NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | The NVIDIA API key. |
+| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
+| `project_id` | `str \| None` | No | test-project | The NVIDIA project ID. |
+| `datasets_url` | `<class 'str'>` | No | http://nemo.test | Base URL for the NeMo Dataset API |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.NVIDIA_API_KEY:+}
+dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
+
+```
+
diff --git a/docs/source/providers/eval/index.md b/docs/source/providers/eval/index.md
new file mode 100644
index 000000000..330380670
--- /dev/null
+++ b/docs/source/providers/eval/index.md
@@ -0,0 +1,6 @@
+# Eval Providers
+
+This section contains documentation for all available providers for the **eval** API.
+
+- [inline::meta-reference](inline_meta-reference.md)
+- [remote::nvidia](remote_nvidia.md)
\ No newline at end of file
diff --git a/docs/source/providers/eval/inline_meta-reference.md b/docs/source/providers/eval/inline_meta-reference.md
new file mode 100644
index 000000000..704741b5a
--- /dev/null
+++ b/docs/source/providers/eval/inline_meta-reference.md
@@ -0,0 +1,22 @@
+# inline::meta-reference
+
+## Description
+
+Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+
+## Sample Configuration
+
+```yaml
+kvstore:
+  type: sqlite
+  namespace: null
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
+
+```
+
diff --git a/docs/source/providers/eval/remote_nvidia.md b/docs/source/providers/eval/remote_nvidia.md
new file mode 100644
index 000000000..cb764b511
--- /dev/null
+++ b/docs/source/providers/eval/remote_nvidia.md
@@ -0,0 +1,19 @@
+# remote::nvidia
+
+## Description
+
+NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `evaluator_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service |
+
+## Sample Configuration
+
+```yaml
+evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
+
+```
+
diff --git a/docs/source/providers/files/index.md b/docs/source/providers/files/index.md
new file mode 100644
index 000000000..25d9b05ba
--- /dev/null
+++ b/docs/source/providers/files/index.md
@@ -0,0 +1,5 @@
+# Files Providers
+
+This section contains documentation for all available providers for the **files** API.
+
+- [inline::localfs](inline_localfs.md)
\ No newline at end of file
diff --git a/docs/source/providers/files/inline_localfs.md b/docs/source/providers/files/inline_localfs.md
new file mode 100644
index 000000000..54c489c7d
--- /dev/null
+++ b/docs/source/providers/files/inline_localfs.md
@@ -0,0 +1,24 @@
+# inline::localfs
+
+## Description
+
+Local filesystem-based file storage provider for managing files and documents locally.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `storage_dir` | `<class 'str'>` | No | PydanticUndefined | Directory to store uploaded files |
+| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
+| `ttl_secs` | `<class 'int'>` | No | 31536000 |  |
+
+## Sample Configuration
+
+```yaml
+storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db
+
+```
+
diff --git a/docs/source/providers/index.md b/docs/source/providers/index.md
index 1f5026479..6689b58bc 100644
--- a/docs/source/providers/index.md
+++ b/docs/source/providers/index.md
@@ -18,60 +18,92 @@ Llama Stack supports external providers that live outside of the main codebase.
 ## Agents
 Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc.
 
+```{toctree}
+:maxdepth: 1
+
+agents/index
+```
+
 ## DatasetIO
 Interfaces with datasets and data loaders.
 
-## Eval
-Generates outputs (via Inference or Agents) and perform scoring.
-
-## Inference
-Runs inference with an LLM.
-
-## Post Training
-Fine-tunes a model.
-
-#### Post Training Providers
-The following providers are available for Post Training:
-
 ```{toctree}
 :maxdepth: 1
 
-external
-post_training/huggingface
-post_training/torchtune
-post_training/nvidia_nemo
+datasetio/index
+```
+
+## Eval
+Generates outputs (via Inference or Agents) and perform scoring.
+
+```{toctree}
+:maxdepth: 1
+
+eval/index
+```
+
+## Inference
+Runs inference with an LLM.
+
+```{toctree}
+:maxdepth: 1
+
+inference/index
+```
+
+## Post Training
+Fine-tunes a model.
+
+```{toctree}
+:maxdepth: 1
+
+post_training/index
 ```
 
 ## Safety
 Applies safety policies to the output at a Systems (not only model) level.
 
+```{toctree}
+:maxdepth: 1
+
+safety/index
+```
+
 ## Scoring
 Evaluates the outputs of the system.
 
+```{toctree}
+:maxdepth: 1
+
+scoring/index
+```
+
 ## Telemetry
 Collects telemetry data from the system.
 
+```{toctree}
+:maxdepth: 1
+
+telemetry/index
+```
+
 ## Tool Runtime
 Is associated with the ToolGroup resouces.
 
+```{toctree}
+:maxdepth: 1
+
+tool_runtime/index
+```
+
 ## Vector IO
 
 Vector IO refers to operations on vector databases, such as adding documents, searching, and deleting documents.
 Vector IO plays a crucial role in [Retreival Augmented Generation (RAG)](../..//building_applications/rag), where the vector
 io and database are used to store and retrieve documents for retrieval.
 
-#### Vector IO Providers
-The following providers (i.e., databases) are available for Vector IO:
-
 ```{toctree}
 :maxdepth: 1
 
-external
-vector_io/faiss
-vector_io/sqlite-vec
-vector_io/chromadb
-vector_io/pgvector
-vector_io/qdrant
-vector_io/milvus
-vector_io/weaviate
+vector_io/index
 ```
diff --git a/docs/source/providers/inference/index.md b/docs/source/providers/inference/index.md
new file mode 100644
index 000000000..05773efce
--- /dev/null
+++ b/docs/source/providers/inference/index.md
@@ -0,0 +1,32 @@
+# Inference Providers
+
+This section contains documentation for all available providers for the **inference** API.
+
+- [inline::meta-reference](inline_meta-reference.md)
+- [inline::sentence-transformers](inline_sentence-transformers.md)
+- [inline::vllm](inline_vllm.md)
+- [remote::anthropic](remote_anthropic.md)
+- [remote::bedrock](remote_bedrock.md)
+- [remote::cerebras](remote_cerebras.md)
+- [remote::cerebras-openai-compat](remote_cerebras-openai-compat.md)
+- [remote::databricks](remote_databricks.md)
+- [remote::fireworks](remote_fireworks.md)
+- [remote::fireworks-openai-compat](remote_fireworks-openai-compat.md)
+- [remote::gemini](remote_gemini.md)
+- [remote::groq](remote_groq.md)
+- [remote::groq-openai-compat](remote_groq-openai-compat.md)
+- [remote::hf::endpoint](remote_hf_endpoint.md)
+- [remote::hf::serverless](remote_hf_serverless.md)
+- [remote::llama-openai-compat](remote_llama-openai-compat.md)
+- [remote::nvidia](remote_nvidia.md)
+- [remote::ollama](remote_ollama.md)
+- [remote::openai](remote_openai.md)
+- [remote::passthrough](remote_passthrough.md)
+- [remote::runpod](remote_runpod.md)
+- [remote::sambanova](remote_sambanova.md)
+- [remote::sambanova-openai-compat](remote_sambanova-openai-compat.md)
+- [remote::tgi](remote_tgi.md)
+- [remote::together](remote_together.md)
+- [remote::together-openai-compat](remote_together-openai-compat.md)
+- [remote::vllm](remote_vllm.md)
+- [remote::watsonx](remote_watsonx.md)
\ No newline at end of file
diff --git a/docs/source/providers/inference/inline_meta-reference.md b/docs/source/providers/inference/inline_meta-reference.md
new file mode 100644
index 000000000..eca12a839
--- /dev/null
+++ b/docs/source/providers/inference/inline_meta-reference.md
@@ -0,0 +1,32 @@
+# inline::meta-reference
+
+## Description
+
+Meta's reference implementation of inference with support for various model formats and optimization techniques.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `model` | `str \| None` | No |  |  |
+| `torch_seed` | `int \| None` | No |  |  |
+| `max_seq_len` | `<class 'int'>` | No | 4096 |  |
+| `max_batch_size` | `<class 'int'>` | No | 1 |  |
+| `model_parallel_size` | `int \| None` | No |  |  |
+| `create_distributed_process_group` | `<class 'bool'>` | No | True |  |
+| `checkpoint_dir` | `str \| None` | No |  |  |
+| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig, annotation=NoneType, required=True, discriminator='type'` | No |  |  |
+
+## Sample Configuration
+
+```yaml
+model: Llama3.2-3B-Instruct
+checkpoint_dir: ${env.CHECKPOINT_DIR:=null}
+quantization:
+  type: ${env.QUANTIZATION_TYPE:=bf16}
+model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+max_seq_len: ${env.MAX_SEQ_LEN:=4096}
+
+```
+
diff --git a/docs/source/providers/inference/inline_sentence-transformers.md b/docs/source/providers/inference/inline_sentence-transformers.md
new file mode 100644
index 000000000..57ec7f7d0
--- /dev/null
+++ b/docs/source/providers/inference/inline_sentence-transformers.md
@@ -0,0 +1,13 @@
+# inline::sentence-transformers
+
+## Description
+
+Sentence Transformers inference provider for text embeddings and similarity search.
+
+## Sample Configuration
+
+```yaml
+{}
+
+```
+
diff --git a/docs/source/providers/inference/inline_vllm.md b/docs/source/providers/inference/inline_vllm.md
new file mode 100644
index 000000000..6ea34acb8
--- /dev/null
+++ b/docs/source/providers/inference/inline_vllm.md
@@ -0,0 +1,29 @@
+# inline::vllm
+
+## Description
+
+vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `tensor_parallel_size` | `<class 'int'>` | No | 1 | Number of tensor parallel replicas (number of GPUs to use). |
+| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
+| `max_model_len` | `<class 'int'>` | No | 4096 | Maximum context length to use during serving. |
+| `max_num_seqs` | `<class 'int'>` | No | 4 | Maximum parallel batch size for generation. |
+| `enforce_eager` | `<class 'bool'>` | No | False | Whether to use eager mode for inference (otherwise cuda graphs are used). |
+| `gpu_memory_utilization` | `<class 'float'>` | No | 0.3 | How much GPU memory will be allocated when this provider has finished loading, including memory that was already allocated before loading. |
+
+## Sample Configuration
+
+```yaml
+tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:=1}
+max_tokens: ${env.MAX_TOKENS:=4096}
+max_model_len: ${env.MAX_MODEL_LEN:=4096}
+max_num_seqs: ${env.MAX_NUM_SEQS:=4}
+enforce_eager: ${env.ENFORCE_EAGER:=False}
+gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:=0.3}
+
+```
+
diff --git a/docs/source/providers/inference/remote_anthropic.md b/docs/source/providers/inference/remote_anthropic.md
new file mode 100644
index 000000000..79d5a3f6e
--- /dev/null
+++ b/docs/source/providers/inference/remote_anthropic.md
@@ -0,0 +1,19 @@
+# remote::anthropic
+
+## Description
+
+Anthropic inference provider for accessing Claude models and Anthropic's AI services.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | API key for Anthropic models |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.ANTHROPIC_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_bedrock.md b/docs/source/providers/inference/remote_bedrock.md
new file mode 100644
index 000000000..1454c54c2
--- /dev/null
+++ b/docs/source/providers/inference/remote_bedrock.md
@@ -0,0 +1,28 @@
+# remote::bedrock
+
+## Description
+
+AWS Bedrock inference provider for accessing various AI models through AWS's managed service.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `aws_access_key_id` | `str \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
+| `aws_secret_access_key` | `str \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
+| `aws_session_token` | `str \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
+| `region_name` | `str \| None` | No |  | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
+| `profile_name` | `str \| None` | No |  | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
+| `total_max_attempts` | `int \| None` | No |  | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
+| `retry_mode` | `str \| None` | No |  | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
+| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
+| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
+| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
+
+## Sample Configuration
+
+```yaml
+{}
+
+```
+
diff --git a/docs/source/providers/inference/remote_cerebras-openai-compat.md b/docs/source/providers/inference/remote_cerebras-openai-compat.md
new file mode 100644
index 000000000..64b899246
--- /dev/null
+++ b/docs/source/providers/inference/remote_cerebras-openai-compat.md
@@ -0,0 +1,21 @@
+# remote::cerebras-openai-compat
+
+## Description
+
+Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | The Cerebras API key |
+| `openai_compat_api_base` | `<class 'str'>` | No | https://api.cerebras.ai/v1 | The URL for the Cerebras API server |
+
+## Sample Configuration
+
+```yaml
+openai_compat_api_base: https://api.cerebras.ai/v1
+api_key: ${env.CEREBRAS_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_cerebras.md b/docs/source/providers/inference/remote_cerebras.md
new file mode 100644
index 000000000..c9793d7de
--- /dev/null
+++ b/docs/source/providers/inference/remote_cerebras.md
@@ -0,0 +1,21 @@
+# remote::cerebras
+
+## Description
+
+Cerebras inference provider for running models on Cerebras Cloud platform.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Cerebras API Key |
+
+## Sample Configuration
+
+```yaml
+base_url: https://api.cerebras.ai
+api_key: ${env.CEREBRAS_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_databricks.md b/docs/source/providers/inference/remote_databricks.md
new file mode 100644
index 000000000..c611d9414
--- /dev/null
+++ b/docs/source/providers/inference/remote_databricks.md
@@ -0,0 +1,21 @@
+# remote::databricks
+
+## Description
+
+Databricks inference provider for running models on Databricks' unified analytics platform.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `<class 'str'>` | No |  | The URL for the Databricks model serving endpoint |
+| `api_token` | `<class 'str'>` | No |  | The Databricks API token |
+
+## Sample Configuration
+
+```yaml
+url: ${env.DATABRICKS_URL}
+api_token: ${env.DATABRICKS_API_TOKEN}
+
+```
+
diff --git a/docs/source/providers/inference/remote_fireworks-openai-compat.md b/docs/source/providers/inference/remote_fireworks-openai-compat.md
new file mode 100644
index 000000000..0a2bd0fe8
--- /dev/null
+++ b/docs/source/providers/inference/remote_fireworks-openai-compat.md
@@ -0,0 +1,21 @@
+# remote::fireworks-openai-compat
+
+## Description
+
+Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | The Fireworks API key |
+| `openai_compat_api_base` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks API server |
+
+## Sample Configuration
+
+```yaml
+openai_compat_api_base: https://api.fireworks.ai/inference/v1
+api_key: ${env.FIREWORKS_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_fireworks.md b/docs/source/providers/inference/remote_fireworks.md
new file mode 100644
index 000000000..351586c34
--- /dev/null
+++ b/docs/source/providers/inference/remote_fireworks.md
@@ -0,0 +1,21 @@
+# remote::fireworks
+
+## Description
+
+Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The Fireworks.ai API Key |
+
+## Sample Configuration
+
+```yaml
+url: https://api.fireworks.ai/inference/v1
+api_key: ${env.FIREWORKS_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_gemini.md b/docs/source/providers/inference/remote_gemini.md
new file mode 100644
index 000000000..cafcd787d
--- /dev/null
+++ b/docs/source/providers/inference/remote_gemini.md
@@ -0,0 +1,19 @@
+# remote::gemini
+
+## Description
+
+Google Gemini inference provider for accessing Gemini models and Google's AI services.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | API key for Gemini models |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.GEMINI_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_groq-openai-compat.md b/docs/source/providers/inference/remote_groq-openai-compat.md
new file mode 100644
index 000000000..e424bedd2
--- /dev/null
+++ b/docs/source/providers/inference/remote_groq-openai-compat.md
@@ -0,0 +1,21 @@
+# remote::groq-openai-compat
+
+## Description
+
+Groq OpenAI-compatible provider for using Groq models with OpenAI API format.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | The Groq API key |
+| `openai_compat_api_base` | `<class 'str'>` | No | https://api.groq.com/openai/v1 | The URL for the Groq API server |
+
+## Sample Configuration
+
+```yaml
+openai_compat_api_base: https://api.groq.com/openai/v1
+api_key: ${env.GROQ_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_groq.md b/docs/source/providers/inference/remote_groq.md
new file mode 100644
index 000000000..4f734f263
--- /dev/null
+++ b/docs/source/providers/inference/remote_groq.md
@@ -0,0 +1,21 @@
+# remote::groq
+
+## Description
+
+Groq inference provider for ultra-fast inference using Groq's LPU technology.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | The Groq API key |
+| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
+
+## Sample Configuration
+
+```yaml
+url: https://api.groq.com
+api_key: ${env.GROQ_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_hf_endpoint.md b/docs/source/providers/inference/remote_hf_endpoint.md
new file mode 100644
index 000000000..f9ca6b538
--- /dev/null
+++ b/docs/source/providers/inference/remote_hf_endpoint.md
@@ -0,0 +1,21 @@
+# remote::hf::endpoint
+
+## Description
+
+HuggingFace Inference Endpoints provider for dedicated model serving.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `endpoint_name` | `<class 'str'>` | No | PydanticUndefined | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. |
+| `api_token` | `pydantic.types.SecretStr \| None` | No |  | Your Hugging Face user access token (will default to locally saved token if not provided) |
+
+## Sample Configuration
+
+```yaml
+endpoint_name: ${env.INFERENCE_ENDPOINT_NAME}
+api_token: ${env.HF_API_TOKEN}
+
+```
+
diff --git a/docs/source/providers/inference/remote_hf_serverless.md b/docs/source/providers/inference/remote_hf_serverless.md
new file mode 100644
index 000000000..345af3e49
--- /dev/null
+++ b/docs/source/providers/inference/remote_hf_serverless.md
@@ -0,0 +1,21 @@
+# remote::hf::serverless
+
+## Description
+
+HuggingFace Inference API serverless provider for on-demand model inference.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `huggingface_repo` | `<class 'str'>` | No | PydanticUndefined | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') |
+| `api_token` | `pydantic.types.SecretStr \| None` | No |  | Your Hugging Face user access token (will default to locally saved token if not provided) |
+
+## Sample Configuration
+
+```yaml
+huggingface_repo: ${env.INFERENCE_MODEL}
+api_token: ${env.HF_API_TOKEN}
+
+```
+
diff --git a/docs/source/providers/inference/remote_llama-openai-compat.md b/docs/source/providers/inference/remote_llama-openai-compat.md
new file mode 100644
index 000000000..5c97aebc3
--- /dev/null
+++ b/docs/source/providers/inference/remote_llama-openai-compat.md
@@ -0,0 +1,21 @@
+# remote::llama-openai-compat
+
+## Description
+
+Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | The Llama API key |
+| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
+
+## Sample Configuration
+
+```yaml
+openai_compat_api_base: https://api.llama.com/compat/v1/
+api_key: ${env.LLAMA_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_nvidia.md b/docs/source/providers/inference/remote_nvidia.md
new file mode 100644
index 000000000..eca2ec544
--- /dev/null
+++ b/docs/source/providers/inference/remote_nvidia.md
@@ -0,0 +1,24 @@
+# remote::nvidia
+
+## Description
+
+NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The NVIDIA API key, only needed of using the hosted service |
+| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
+| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
+
+## Sample Configuration
+
+```yaml
+url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+api_key: ${env.NVIDIA_API_KEY:+}
+append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+
+```
+
diff --git a/docs/source/providers/inference/remote_ollama.md b/docs/source/providers/inference/remote_ollama.md
new file mode 100644
index 000000000..7c5fc9437
--- /dev/null
+++ b/docs/source/providers/inference/remote_ollama.md
@@ -0,0 +1,21 @@
+# remote::ollama
+
+## Description
+
+Ollama inference provider for running local models through the Ollama runtime.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `<class 'str'>` | No | http://localhost:11434 |  |
+| `raise_on_connect_error` | `<class 'bool'>` | No | True |  |
+
+## Sample Configuration
+
+```yaml
+url: ${env.OLLAMA_URL:=http://localhost:11434}
+raise_on_connect_error: true
+
+```
+
diff --git a/docs/source/providers/inference/remote_openai.md b/docs/source/providers/inference/remote_openai.md
new file mode 100644
index 000000000..b4cfb5880
--- /dev/null
+++ b/docs/source/providers/inference/remote_openai.md
@@ -0,0 +1,19 @@
+# remote::openai
+
+## Description
+
+OpenAI inference provider for accessing GPT models and other OpenAI services.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | API key for OpenAI models |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.OPENAI_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_passthrough.md b/docs/source/providers/inference/remote_passthrough.md
new file mode 100644
index 000000000..9005e5339
--- /dev/null
+++ b/docs/source/providers/inference/remote_passthrough.md
@@ -0,0 +1,21 @@
+# remote::passthrough
+
+## Description
+
+Passthrough inference provider for connecting to any external inference service not directly supported.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `<class 'str'>` | No |  | The URL for the passthrough endpoint |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | API Key for the passthrouth endpoint |
+
+## Sample Configuration
+
+```yaml
+url: ${env.PASSTHROUGH_URL}
+api_key: ${env.PASSTHROUGH_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_runpod.md b/docs/source/providers/inference/remote_runpod.md
new file mode 100644
index 000000000..b543606d5
--- /dev/null
+++ b/docs/source/providers/inference/remote_runpod.md
@@ -0,0 +1,21 @@
+# remote::runpod
+
+## Description
+
+RunPod inference provider for running models on RunPod's cloud GPU platform.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `str \| None` | No |  | The URL for the Runpod model serving endpoint |
+| `api_token` | `str \| None` | No |  | The API token |
+
+## Sample Configuration
+
+```yaml
+url: ${env.RUNPOD_URL:+}
+api_token: ${env.RUNPOD_API_TOKEN:+}
+
+```
+
diff --git a/docs/source/providers/inference/remote_sambanova-openai-compat.md b/docs/source/providers/inference/remote_sambanova-openai-compat.md
new file mode 100644
index 000000000..c213d962f
--- /dev/null
+++ b/docs/source/providers/inference/remote_sambanova-openai-compat.md
@@ -0,0 +1,21 @@
+# remote::sambanova-openai-compat
+
+## Description
+
+SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | The SambaNova API key |
+| `openai_compat_api_base` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova API server |
+
+## Sample Configuration
+
+```yaml
+openai_compat_api_base: https://api.sambanova.ai/v1
+api_key: ${env.SAMBANOVA_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_sambanova.md b/docs/source/providers/inference/remote_sambanova.md
new file mode 100644
index 000000000..006c41ac1
--- /dev/null
+++ b/docs/source/providers/inference/remote_sambanova.md
@@ -0,0 +1,21 @@
+# remote::sambanova
+
+## Description
+
+SambaNova inference provider for running models on SambaNova's dataflow architecture.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The SambaNova cloud API Key |
+
+## Sample Configuration
+
+```yaml
+url: https://api.sambanova.ai/v1
+api_key: ${env.SAMBANOVA_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_tgi.md b/docs/source/providers/inference/remote_tgi.md
new file mode 100644
index 000000000..c4a749b0b
--- /dev/null
+++ b/docs/source/providers/inference/remote_tgi.md
@@ -0,0 +1,19 @@
+# remote::tgi
+
+## Description
+
+Text Generation Inference (TGI) provider for HuggingFace model serving.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `<class 'str'>` | No | PydanticUndefined | The URL for the TGI serving endpoint |
+
+## Sample Configuration
+
+```yaml
+url: ${env.TGI_URL}
+
+```
+
diff --git a/docs/source/providers/inference/remote_together-openai-compat.md b/docs/source/providers/inference/remote_together-openai-compat.md
new file mode 100644
index 000000000..833fa8cb0
--- /dev/null
+++ b/docs/source/providers/inference/remote_together-openai-compat.md
@@ -0,0 +1,21 @@
+# remote::together-openai-compat
+
+## Description
+
+Together AI OpenAI-compatible provider for using Together models with OpenAI API format.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | The Together API key |
+| `openai_compat_api_base` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together API server |
+
+## Sample Configuration
+
+```yaml
+openai_compat_api_base: https://api.together.xyz/v1
+api_key: ${env.TOGETHER_API_KEY}
+
+```
+
diff --git a/docs/source/providers/inference/remote_together.md b/docs/source/providers/inference/remote_together.md
new file mode 100644
index 000000000..4e0e0a9ce
--- /dev/null
+++ b/docs/source/providers/inference/remote_together.md
@@ -0,0 +1,21 @@
+# remote::together
+
+## Description
+
+Together AI inference provider for open-source models and collaborative AI development.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The Together AI API Key |
+
+## Sample Configuration
+
+```yaml
+url: https://api.together.xyz/v1
+api_key: ${env.TOGETHER_API_KEY:+}
+
+```
+
diff --git a/docs/source/providers/inference/remote_vllm.md b/docs/source/providers/inference/remote_vllm.md
new file mode 100644
index 000000000..6c725fb41
--- /dev/null
+++ b/docs/source/providers/inference/remote_vllm.md
@@ -0,0 +1,25 @@
+# remote::vllm
+
+## Description
+
+Remote vLLM inference provider for connecting to vLLM servers.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `str \| None` | No |  | The URL for the vLLM model serving endpoint |
+| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
+| `api_token` | `str \| None` | No | fake | The API token |
+| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
+
+## Sample Configuration
+
+```yaml
+url: ${env.VLLM_URL}
+max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+api_token: ${env.VLLM_API_TOKEN:=fake}
+tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+
+```
+
diff --git a/docs/source/providers/inference/remote_watsonx.md b/docs/source/providers/inference/remote_watsonx.md
new file mode 100644
index 000000000..fce0373fa
--- /dev/null
+++ b/docs/source/providers/inference/remote_watsonx.md
@@ -0,0 +1,24 @@
+# remote::watsonx
+
+## Description
+
+IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The watsonx API key, only needed of using the hosted service |
+| `project_id` | `str \| None` | No |  | The Project ID key, only needed of using the hosted service |
+| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
+
+## Sample Configuration
+
+```yaml
+url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+api_key: ${env.WATSONX_API_KEY:+}
+project_id: ${env.WATSONX_PROJECT_ID:+}
+
+```
+
diff --git a/docs/source/providers/post_training/index.md b/docs/source/providers/post_training/index.md
new file mode 100644
index 000000000..35d10d14b
--- /dev/null
+++ b/docs/source/providers/post_training/index.md
@@ -0,0 +1,7 @@
+# Post_Training Providers
+
+This section contains documentation for all available providers for the **post_training** API.
+
+- [inline::huggingface](inline_huggingface.md)
+- [inline::torchtune](inline_torchtune.md)
+- [remote::nvidia](remote_nvidia.md)
\ No newline at end of file
diff --git a/docs/source/providers/post_training/inline_huggingface.md b/docs/source/providers/post_training/inline_huggingface.md
new file mode 100644
index 000000000..82b08bf7a
--- /dev/null
+++ b/docs/source/providers/post_training/inline_huggingface.md
@@ -0,0 +1,36 @@
+# inline::huggingface
+
+## Description
+
+HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `device` | `<class 'str'>` | No | cuda |  |
+| `distributed_backend` | `Literal['fsdp', 'deepspeed'` | No |  |  |
+| `checkpoint_format` | `Literal['full_state', 'huggingface'` | No | huggingface |  |
+| `chat_template` | `<class 'str'>` | No | <|user|>
+{input}
+<|assistant|>
+{output} |  |
+| `model_specific_config` | `<class 'dict'>` | No | {'trust_remote_code': True, 'attn_implementation': 'sdpa'} |  |
+| `max_seq_length` | `<class 'int'>` | No | 2048 |  |
+| `gradient_checkpointing` | `<class 'bool'>` | No | False |  |
+| `save_total_limit` | `<class 'int'>` | No | 3 |  |
+| `logging_steps` | `<class 'int'>` | No | 10 |  |
+| `warmup_ratio` | `<class 'float'>` | No | 0.1 |  |
+| `weight_decay` | `<class 'float'>` | No | 0.01 |  |
+| `dataloader_num_workers` | `<class 'int'>` | No | 4 |  |
+| `dataloader_pin_memory` | `<class 'bool'>` | No | True |  |
+
+## Sample Configuration
+
+```yaml
+checkpoint_format: huggingface
+distributed_backend: null
+device: cpu
+
+```
+
diff --git a/docs/source/providers/post_training/inline_torchtune.md b/docs/source/providers/post_training/inline_torchtune.md
new file mode 100644
index 000000000..82730e54b
--- /dev/null
+++ b/docs/source/providers/post_training/inline_torchtune.md
@@ -0,0 +1,20 @@
+# inline::torchtune
+
+## Description
+
+TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `torch_seed` | `int \| None` | No |  |  |
+| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta |  |
+
+## Sample Configuration
+
+```yaml
+checkpoint_format: meta
+
+```
+
diff --git a/docs/source/providers/post_training/remote_nvidia.md b/docs/source/providers/post_training/remote_nvidia.md
new file mode 100644
index 000000000..050afb763
--- /dev/null
+++ b/docs/source/providers/post_training/remote_nvidia.md
@@ -0,0 +1,28 @@
+# remote::nvidia
+
+## Description
+
+NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | The NVIDIA API key. |
+| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
+| `project_id` | `str \| None` | No | test-example-model@v1 | The NVIDIA project ID. |
+| `customizer_url` | `str \| None` | No |  | Base URL for the NeMo Customizer API |
+| `timeout` | `<class 'int'>` | No | 300 | Timeout for the NVIDIA Post Training API |
+| `max_retries` | `<class 'int'>` | No | 3 | Maximum number of retries for the NVIDIA Post Training API |
+| `output_model_dir` | `<class 'str'>` | No | test-example-model@v1 | Directory to save the output model |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.NVIDIA_API_KEY:+}
+dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
+
+```
+
diff --git a/docs/source/providers/safety/index.md b/docs/source/providers/safety/index.md
new file mode 100644
index 000000000..1a245c13d
--- /dev/null
+++ b/docs/source/providers/safety/index.md
@@ -0,0 +1,10 @@
+# Safety Providers
+
+This section contains documentation for all available providers for the **safety** API.
+
+- [inline::code-scanner](inline_code-scanner.md)
+- [inline::llama-guard](inline_llama-guard.md)
+- [inline::prompt-guard](inline_prompt-guard.md)
+- [remote::bedrock](remote_bedrock.md)
+- [remote::nvidia](remote_nvidia.md)
+- [remote::sambanova](remote_sambanova.md)
\ No newline at end of file
diff --git a/docs/source/providers/safety/inline_code-scanner.md b/docs/source/providers/safety/inline_code-scanner.md
new file mode 100644
index 000000000..3a3e90b3d
--- /dev/null
+++ b/docs/source/providers/safety/inline_code-scanner.md
@@ -0,0 +1,13 @@
+# inline::code-scanner
+
+## Description
+
+Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.
+
+## Sample Configuration
+
+```yaml
+{}
+
+```
+
diff --git a/docs/source/providers/safety/inline_llama-guard.md b/docs/source/providers/safety/inline_llama-guard.md
new file mode 100644
index 000000000..4f57898ec
--- /dev/null
+++ b/docs/source/providers/safety/inline_llama-guard.md
@@ -0,0 +1,19 @@
+# inline::llama-guard
+
+## Description
+
+Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `excluded_categories` | `list[str` | No | [] |  |
+
+## Sample Configuration
+
+```yaml
+excluded_categories: []
+
+```
+
diff --git a/docs/source/providers/safety/inline_prompt-guard.md b/docs/source/providers/safety/inline_prompt-guard.md
new file mode 100644
index 000000000..10a6b8d3f
--- /dev/null
+++ b/docs/source/providers/safety/inline_prompt-guard.md
@@ -0,0 +1,19 @@
+# inline::prompt-guard
+
+## Description
+
+Prompt Guard safety provider for detecting and filtering unsafe prompts and content.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `guard_type` | `<class 'str'>` | No | injection |  |
+
+## Sample Configuration
+
+```yaml
+guard_type: injection
+
+```
+
diff --git a/docs/source/providers/safety/remote_bedrock.md b/docs/source/providers/safety/remote_bedrock.md
new file mode 100644
index 000000000..3c1d6bcb0
--- /dev/null
+++ b/docs/source/providers/safety/remote_bedrock.md
@@ -0,0 +1,28 @@
+# remote::bedrock
+
+## Description
+
+AWS Bedrock safety provider for content moderation using AWS's safety services.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `aws_access_key_id` | `str \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
+| `aws_secret_access_key` | `str \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
+| `aws_session_token` | `str \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
+| `region_name` | `str \| None` | No |  | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION |
+| `profile_name` | `str \| None` | No |  | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE |
+| `total_max_attempts` | `int \| None` | No |  | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS |
+| `retry_mode` | `str \| None` | No |  | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE |
+| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |
+| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. |
+| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). |
+
+## Sample Configuration
+
+```yaml
+{}
+
+```
+
diff --git a/docs/source/providers/safety/remote_nvidia.md b/docs/source/providers/safety/remote_nvidia.md
new file mode 100644
index 000000000..40ae744a4
--- /dev/null
+++ b/docs/source/providers/safety/remote_nvidia.md
@@ -0,0 +1,21 @@
+# remote::nvidia
+
+## Description
+
+NVIDIA's safety provider for content moderation and safety filtering.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `guardrails_service_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service |
+| `config_id` | `str \| None` | No | self-check | Guardrails configuration ID to use from the Guardrails configuration store |
+
+## Sample Configuration
+
+```yaml
+guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
+
+```
+
diff --git a/docs/source/providers/safety/remote_sambanova.md b/docs/source/providers/safety/remote_sambanova.md
new file mode 100644
index 000000000..c680f9764
--- /dev/null
+++ b/docs/source/providers/safety/remote_sambanova.md
@@ -0,0 +1,21 @@
+# remote::sambanova
+
+## Description
+
+SambaNova's safety provider for content moderation and safety filtering.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The SambaNova cloud API Key |
+
+## Sample Configuration
+
+```yaml
+url: https://api.sambanova.ai/v1
+api_key: ${env.SAMBANOVA_API_KEY}
+
+```
+
diff --git a/docs/source/providers/scoring/index.md b/docs/source/providers/scoring/index.md
new file mode 100644
index 000000000..3cf7af537
--- /dev/null
+++ b/docs/source/providers/scoring/index.md
@@ -0,0 +1,7 @@
+# Scoring Providers
+
+This section contains documentation for all available providers for the **scoring** API.
+
+- [inline::basic](inline_basic.md)
+- [inline::braintrust](inline_braintrust.md)
+- [inline::llm-as-judge](inline_llm-as-judge.md)
\ No newline at end of file
diff --git a/docs/source/providers/scoring/inline_basic.md b/docs/source/providers/scoring/inline_basic.md
new file mode 100644
index 000000000..e9e50cff4
--- /dev/null
+++ b/docs/source/providers/scoring/inline_basic.md
@@ -0,0 +1,13 @@
+# inline::basic
+
+## Description
+
+Basic scoring provider for simple evaluation metrics and scoring functions.
+
+## Sample Configuration
+
+```yaml
+{}
+
+```
+
diff --git a/docs/source/providers/scoring/inline_braintrust.md b/docs/source/providers/scoring/inline_braintrust.md
new file mode 100644
index 000000000..dae0f246e
--- /dev/null
+++ b/docs/source/providers/scoring/inline_braintrust.md
@@ -0,0 +1,19 @@
+# inline::braintrust
+
+## Description
+
+Braintrust scoring provider for evaluation and scoring using the Braintrust platform.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `openai_api_key` | `str \| None` | No |  | The OpenAI API Key |
+
+## Sample Configuration
+
+```yaml
+openai_api_key: ${env.OPENAI_API_KEY:+}
+
+```
+
diff --git a/docs/source/providers/scoring/inline_llm-as-judge.md b/docs/source/providers/scoring/inline_llm-as-judge.md
new file mode 100644
index 000000000..971e02897
--- /dev/null
+++ b/docs/source/providers/scoring/inline_llm-as-judge.md
@@ -0,0 +1,13 @@
+# inline::llm-as-judge
+
+## Description
+
+LLM-as-judge scoring provider that uses language models to evaluate and score responses.
+
+## Sample Configuration
+
+```yaml
+{}
+
+```
+
diff --git a/docs/source/providers/telemetry/index.md b/docs/source/providers/telemetry/index.md
new file mode 100644
index 000000000..e2b221b50
--- /dev/null
+++ b/docs/source/providers/telemetry/index.md
@@ -0,0 +1,5 @@
+# Telemetry Providers
+
+This section contains documentation for all available providers for the **telemetry** API.
+
+- [inline::meta-reference](inline_meta-reference.md)
\ No newline at end of file
diff --git a/docs/source/providers/telemetry/inline_meta-reference.md b/docs/source/providers/telemetry/inline_meta-reference.md
new file mode 100644
index 000000000..775dba86d
--- /dev/null
+++ b/docs/source/providers/telemetry/inline_meta-reference.md
@@ -0,0 +1,25 @@
+# inline::meta-reference
+
+## Description
+
+Meta's reference implementation of telemetry and observability using OpenTelemetry.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `otel_trace_endpoint` | `str \| None` | No |  | The OpenTelemetry collector endpoint URL for traces |
+| `otel_metric_endpoint` | `str \| None` | No |  | The OpenTelemetry collector endpoint URL for metrics |
+| `service_name` | `<class 'str'>` | No | ​ | The service name to use for telemetry |
+| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.CONSOLE: 'console'>, <TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel, sqlite, console) |
+| `sqlite_db_path` | `<class 'str'>` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces |
+
+## Sample Configuration
+
+```yaml
+service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/trace_store.db
+
+```
+
diff --git a/docs/source/providers/tool_runtime/index.md b/docs/source/providers/tool_runtime/index.md
new file mode 100644
index 000000000..f162c4f9c
--- /dev/null
+++ b/docs/source/providers/tool_runtime/index.md
@@ -0,0 +1,10 @@
+# Tool_Runtime Providers
+
+This section contains documentation for all available providers for the **tool_runtime** API.
+
+- [inline::rag-runtime](inline_rag-runtime.md)
+- [remote::bing-search](remote_bing-search.md)
+- [remote::brave-search](remote_brave-search.md)
+- [remote::model-context-protocol](remote_model-context-protocol.md)
+- [remote::tavily-search](remote_tavily-search.md)
+- [remote::wolfram-alpha](remote_wolfram-alpha.md)
\ No newline at end of file
diff --git a/docs/source/providers/tool_runtime/inline_rag-runtime.md b/docs/source/providers/tool_runtime/inline_rag-runtime.md
new file mode 100644
index 000000000..784b4fdad
--- /dev/null
+++ b/docs/source/providers/tool_runtime/inline_rag-runtime.md
@@ -0,0 +1,13 @@
+# inline::rag-runtime
+
+## Description
+
+RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.
+
+## Sample Configuration
+
+```yaml
+{}
+
+```
+
diff --git a/docs/source/providers/tool_runtime/remote_bing-search.md b/docs/source/providers/tool_runtime/remote_bing-search.md
new file mode 100644
index 000000000..0d5df7679
--- /dev/null
+++ b/docs/source/providers/tool_runtime/remote_bing-search.md
@@ -0,0 +1,20 @@
+# remote::bing-search
+
+## Description
+
+Bing Search tool for web search capabilities using Microsoft's search engine.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  |  |
+| `top_k` | `<class 'int'>` | No | 3 |  |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.BING_API_KEY:}
+
+```
+
diff --git a/docs/source/providers/tool_runtime/remote_brave-search.md b/docs/source/providers/tool_runtime/remote_brave-search.md
new file mode 100644
index 000000000..db10fdc4f
--- /dev/null
+++ b/docs/source/providers/tool_runtime/remote_brave-search.md
@@ -0,0 +1,21 @@
+# remote::brave-search
+
+## Description
+
+Brave Search tool for web search capabilities with privacy-focused results.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | The Brave Search API Key |
+| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.BRAVE_SEARCH_API_KEY:+}
+max_results: 3
+
+```
+
diff --git a/docs/source/providers/tool_runtime/remote_model-context-protocol.md b/docs/source/providers/tool_runtime/remote_model-context-protocol.md
new file mode 100644
index 000000000..cf9401c2c
--- /dev/null
+++ b/docs/source/providers/tool_runtime/remote_model-context-protocol.md
@@ -0,0 +1,13 @@
+# remote::model-context-protocol
+
+## Description
+
+Model Context Protocol (MCP) tool for standardized tool calling and context management.
+
+## Sample Configuration
+
+```yaml
+{}
+
+```
+
diff --git a/docs/source/providers/tool_runtime/remote_tavily-search.md b/docs/source/providers/tool_runtime/remote_tavily-search.md
new file mode 100644
index 000000000..7d1c7fd7f
--- /dev/null
+++ b/docs/source/providers/tool_runtime/remote_tavily-search.md
@@ -0,0 +1,21 @@
+# remote::tavily-search
+
+## Description
+
+Tavily Search tool for AI-optimized web search with structured results.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  | The Tavily Search API Key |
+| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.TAVILY_SEARCH_API_KEY:+}
+max_results: 3
+
+```
+
diff --git a/docs/source/providers/tool_runtime/remote_wolfram-alpha.md b/docs/source/providers/tool_runtime/remote_wolfram-alpha.md
new file mode 100644
index 000000000..d44c93f72
--- /dev/null
+++ b/docs/source/providers/tool_runtime/remote_wolfram-alpha.md
@@ -0,0 +1,19 @@
+# remote::wolfram-alpha
+
+## Description
+
+Wolfram Alpha tool for computational knowledge and mathematical calculations.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `str \| None` | No |  |  |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
+
+```
+
diff --git a/docs/source/providers/vector_io/index.md b/docs/source/providers/vector_io/index.md
new file mode 100644
index 000000000..870d04401
--- /dev/null
+++ b/docs/source/providers/vector_io/index.md
@@ -0,0 +1,16 @@
+# Vector_Io Providers
+
+This section contains documentation for all available providers for the **vector_io** API.
+
+- [inline::chromadb](inline_chromadb.md)
+- [inline::faiss](inline_faiss.md)
+- [inline::meta-reference](inline_meta-reference.md)
+- [inline::milvus](inline_milvus.md)
+- [inline::qdrant](inline_qdrant.md)
+- [inline::sqlite-vec](inline_sqlite-vec.md)
+- [inline::sqlite_vec](inline_sqlite_vec.md)
+- [remote::chromadb](remote_chromadb.md)
+- [remote::milvus](remote_milvus.md)
+- [remote::pgvector](remote_pgvector.md)
+- [remote::qdrant](remote_qdrant.md)
+- [remote::weaviate](remote_weaviate.md)
\ No newline at end of file
diff --git a/docs/source/providers/vector_io/inline_chromadb.md b/docs/source/providers/vector_io/inline_chromadb.md
new file mode 100644
index 000000000..172215414
--- /dev/null
+++ b/docs/source/providers/vector_io/inline_chromadb.md
@@ -0,0 +1,52 @@
+# inline::chromadb
+
+## Description
+
+
+[Chroma](https://www.trychroma.com/) is an inline and remote vector
+database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+Chroma supports:
+- Store embeddings and their metadata
+- Vector search
+- Full-text search
+- Document storage
+- Metadata filtering
+- Multi-modal retrieval
+
+## Usage
+
+To use Chrome in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use chroma.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install chroma using pip:
+
+```bash
+pip install chromadb
+```
+
+## Documentation
+See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
+
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `db_path` | `<class 'str'>` | No | PydanticUndefined |  |
+
+## Sample Configuration
+
+```yaml
+db_path: ${env.CHROMADB_PATH}
+
+```
+
diff --git a/docs/source/providers/vector_io/faiss.md b/docs/source/providers/vector_io/inline_faiss.md
similarity index 60%
rename from docs/source/providers/vector_io/faiss.md
rename to docs/source/providers/vector_io/inline_faiss.md
index c8a2efbe4..2dcf4625b 100644
--- a/docs/source/providers/vector_io/faiss.md
+++ b/docs/source/providers/vector_io/inline_faiss.md
@@ -1,7 +1,7 @@
----
-orphan: true
----
-# Faiss
+# inline::faiss
+
+## Description
+
 
 [Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It
 allows you to store and query vectors directly in memory.
@@ -31,3 +31,21 @@ pip install faiss-cpu
 ## Documentation
 See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for
 more details about Faiss in general.
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+
+## Sample Configuration
+
+```yaml
+kvstore:
+  type: sqlite
+  namespace: null
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
+
+```
+
diff --git a/docs/source/providers/vector_io/inline_meta-reference.md b/docs/source/providers/vector_io/inline_meta-reference.md
new file mode 100644
index 000000000..c9ca12ff2
--- /dev/null
+++ b/docs/source/providers/vector_io/inline_meta-reference.md
@@ -0,0 +1,26 @@
+# inline::meta-reference
+
+## Description
+
+Meta's reference implementation of a vector database.
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+
+## Sample Configuration
+
+```yaml
+kvstore:
+  type: sqlite
+  namespace: null
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
+
+```
+
+## Deprecation Notice
+
+⚠️ **Warning**: Please use the `inline::faiss` provider instead.
+
diff --git a/docs/source/providers/vector_io/inline_milvus.md b/docs/source/providers/vector_io/inline_milvus.md
new file mode 100644
index 000000000..1e9bbfc79
--- /dev/null
+++ b/docs/source/providers/vector_io/inline_milvus.md
@@ -0,0 +1,26 @@
+# inline::milvus
+
+## Description
+
+
+Please refer to the remote provider documentation.
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `db_path` | `<class 'str'>` | No | PydanticUndefined |  |
+| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+
+## Sample Configuration
+
+```yaml
+db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy/milvus.db}
+kvstore:
+  type: sqlite
+  namespace: null
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/${env.MILVUS_KVSTORE_DB_PATH:=~/.llama/dummy/milvus_registry.db}
+
+```
+
diff --git a/docs/source/providers/vector_io/qdrant.md b/docs/source/providers/vector_io/inline_qdrant.md
similarity index 83%
rename from docs/source/providers/vector_io/qdrant.md
rename to docs/source/providers/vector_io/inline_qdrant.md
index 8b0cbeef8..63e2d81d8 100644
--- a/docs/source/providers/vector_io/qdrant.md
+++ b/docs/source/providers/vector_io/inline_qdrant.md
@@ -1,7 +1,7 @@
----
-orphan: true
----
-# Qdrant
+# inline::qdrant
+
+## Description
+
 
 [Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
 allows you to store and query vectors directly in memory.
@@ -44,3 +44,18 @@ docker pull qdrant/qdrant
 ```
 ## Documentation
 See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general.
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `path` | `<class 'str'>` | No | PydanticUndefined |  |
+
+## Sample Configuration
+
+```yaml
+path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
+
+```
+
diff --git a/docs/source/providers/vector_io/sqlite-vec.md b/docs/source/providers/vector_io/inline_sqlite-vec.md
similarity index 95%
rename from docs/source/providers/vector_io/sqlite-vec.md
rename to docs/source/providers/vector_io/inline_sqlite-vec.md
index 3c7c4cbee..fd3ec1dc4 100644
--- a/docs/source/providers/vector_io/sqlite-vec.md
+++ b/docs/source/providers/vector_io/inline_sqlite-vec.md
@@ -1,7 +1,7 @@
----
-orphan: true
----
-# SQLite-Vec
+# inline::sqlite-vec
+
+## Description
+
 
 [SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It
 allows you to store and query vectors directly within an SQLite database.
@@ -199,3 +199,18 @@ pip install sqlite-vec
 See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
 
 [^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `db_path` | `<class 'str'>` | No | PydanticUndefined |  |
+
+## Sample Configuration
+
+```yaml
+db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
+
+```
+
diff --git a/docs/source/providers/vector_io/inline_sqlite_vec.md b/docs/source/providers/vector_io/inline_sqlite_vec.md
new file mode 100644
index 000000000..e4b69c9ab
--- /dev/null
+++ b/docs/source/providers/vector_io/inline_sqlite_vec.md
@@ -0,0 +1,25 @@
+# inline::sqlite_vec
+
+## Description
+
+
+Please refer to the sqlite-vec provider documentation.
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `db_path` | `<class 'str'>` | No | PydanticUndefined |  |
+
+## Sample Configuration
+
+```yaml
+db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
+
+```
+
+## Deprecation Notice
+
+⚠️ **Warning**: Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.
+
diff --git a/docs/source/providers/vector_io/chromadb.md b/docs/source/providers/vector_io/remote_chromadb.md
similarity index 75%
rename from docs/source/providers/vector_io/chromadb.md
rename to docs/source/providers/vector_io/remote_chromadb.md
index 3f0c56f61..cc1dcc4d1 100644
--- a/docs/source/providers/vector_io/chromadb.md
+++ b/docs/source/providers/vector_io/remote_chromadb.md
@@ -1,7 +1,7 @@
----
-orphan: true
----
-# Chroma
+# remote::chromadb
+
+## Description
+
 
 [Chroma](https://www.trychroma.com/) is an inline and remote vector
 database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
@@ -34,3 +34,18 @@ pip install chromadb
 
 ## Documentation
 See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `url` | `str \| None` | No | PydanticUndefined |  |
+
+## Sample Configuration
+
+```yaml
+url: ${env.CHROMADB_URL}
+
+```
+
diff --git a/docs/source/providers/vector_io/milvus.md b/docs/source/providers/vector_io/remote_milvus.md
similarity index 74%
rename from docs/source/providers/vector_io/milvus.md
rename to docs/source/providers/vector_io/remote_milvus.md
index e030c85f8..9017f0e22 100644
--- a/docs/source/providers/vector_io/milvus.md
+++ b/docs/source/providers/vector_io/remote_milvus.md
@@ -1,7 +1,7 @@
----
-orphan: true
----
-# Milvus
+# remote::milvus
+
+## Description
+
 
 [Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
 allows you to store and query vectors directly within a Milvus database.
@@ -96,7 +96,7 @@ vector_io:
 #### Key Parameters for TLS Configuration
 
 - **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
-- **`server_pem_path`**: Path to the **server certificate** for verifying the server’s identity (used in one-way TLS).
+- **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS).
 - **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
 - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
 - **`client_key_path`**: Path to the **client private key** file (required for mTLS).
@@ -105,3 +105,24 @@ vector_io:
 See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
 
 For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `uri` | `<class 'str'>` | No | PydanticUndefined | The URI of the Milvus server |
+| `token` | `str \| None` | No | PydanticUndefined | The token of the Milvus server |
+| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
+| `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
+
+> **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider.
+
+## Sample Configuration
+
+```yaml
+uri: ${env.MILVUS_ENDPOINT}
+token: ${env.MILVUS_TOKEN}
+
+```
+
diff --git a/docs/source/providers/vector_io/pgvector.md b/docs/source/providers/vector_io/remote_pgvector.md
similarity index 55%
rename from docs/source/providers/vector_io/pgvector.md
rename to docs/source/providers/vector_io/remote_pgvector.md
index 070e2c16d..685b98f37 100644
--- a/docs/source/providers/vector_io/pgvector.md
+++ b/docs/source/providers/vector_io/remote_pgvector.md
@@ -1,7 +1,7 @@
----
-orphan: true
----
-# Postgres PGVector
+# remote::pgvector
+
+## Description
+
 
 [PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It
 allows you to store and query vectors directly in memory.
@@ -29,3 +29,26 @@ docker pull pgvector/pgvector:pg17
 ```
 ## Documentation
 See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `host` | `str \| None` | No | localhost |  |
+| `port` | `int \| None` | No | 5432 |  |
+| `db` | `str \| None` | No | postgres |  |
+| `user` | `str \| None` | No | postgres |  |
+| `password` | `str \| None` | No | mysecretpassword |  |
+
+## Sample Configuration
+
+```yaml
+host: ${env.PGVECTOR_HOST:=localhost}
+port: ${env.PGVECTOR_PORT:=5432}
+db: ${env.PGVECTOR_DB}
+user: ${env.PGVECTOR_USER}
+password: ${env.PGVECTOR_PASSWORD}
+
+```
+
diff --git a/docs/source/providers/vector_io/remote_qdrant.md b/docs/source/providers/vector_io/remote_qdrant.md
new file mode 100644
index 000000000..14c821f35
--- /dev/null
+++ b/docs/source/providers/vector_io/remote_qdrant.md
@@ -0,0 +1,30 @@
+# remote::qdrant
+
+## Description
+
+
+Please refer to the inline provider documentation.
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `location` | `str \| None` | No |  |  |
+| `url` | `str \| None` | No |  |  |
+| `port` | `int \| None` | No | 6333 |  |
+| `grpc_port` | `<class 'int'>` | No | 6334 |  |
+| `prefer_grpc` | `<class 'bool'>` | No | False |  |
+| `https` | `bool \| None` | No |  |  |
+| `api_key` | `str \| None` | No |  |  |
+| `prefix` | `str \| None` | No |  |  |
+| `timeout` | `int \| None` | No |  |  |
+| `host` | `str \| None` | No |  |  |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.QDRANT_API_KEY}
+
+```
+
diff --git a/docs/source/providers/vector_io/weaviate.md b/docs/source/providers/vector_io/remote_weaviate.md
similarity index 91%
rename from docs/source/providers/vector_io/weaviate.md
rename to docs/source/providers/vector_io/remote_weaviate.md
index 78c0ddb5b..b7f811c35 100644
--- a/docs/source/providers/vector_io/weaviate.md
+++ b/docs/source/providers/vector_io/remote_weaviate.md
@@ -1,7 +1,7 @@
----
-orphan: true
----
-# Weaviate
+# remote::weaviate
+
+## Description
+
 
 [Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack.
 It allows you to store and query vectors directly within a Weaviate database.
@@ -31,3 +31,12 @@ To install Weaviate see the [Weaviate quickstart documentation](https://weaviate
 
 ## Documentation
 See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
+
+
+## Sample Configuration
+
+```yaml
+{}
+
+```
+
diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py
index 221ed9027..efe8a98fe 100644
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@@ -141,6 +141,12 @@ Fully-qualified name of the module to import. The module is expected to have:
     provider_data_validator: str | None = Field(
         default=None,
     )
+    description: str | None = Field(
+        default=None,
+        description="""
+A description of the provider. This is used to display in the documentation.
+""",
+    )
 
 
 @json_schema_type
@@ -167,6 +173,12 @@ Fully-qualified name of the module to import. The module is expected to have:
     provider_data_validator: str | None = Field(
         default=None,
     )
+    description: str | None = Field(
+        default=None,
+        description="""
+A description of the provider. This is used to display in the documentation.
+""",
+    )
 
 
 class RemoteProviderConfig(BaseModel):
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py
index 50dd8a788..1e4b0c070 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/config.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py
@@ -38,7 +38,7 @@ class TelemetryConfig(BaseModel):
         description="List of telemetry sinks to enable (possible values: otel, sqlite, console)",
     )
     sqlite_db_path: str = Field(
-        default=(RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
+        default_factory=lambda: (RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
         description="The path to the SQLite database to use for storing traces",
     )
 
diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py
index e47f84c65..834e81b96 100644
--- a/llama_stack/providers/registry/agents.py
+++ b/llama_stack/providers/registry/agents.py
@@ -35,5 +35,6 @@ def available_providers() -> list[ProviderSpec]:
                 Api.tool_runtime,
                 Api.tool_groups,
             ],
+            description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
         ),
     ]
diff --git a/llama_stack/providers/registry/datasetio.py b/llama_stack/providers/registry/datasetio.py
index 152cc9cb9..43cde83fb 100644
--- a/llama_stack/providers/registry/datasetio.py
+++ b/llama_stack/providers/registry/datasetio.py
@@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
             module="llama_stack.providers.inline.datasetio.localfs",
             config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig",
             api_dependencies=[],
+            description="Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.",
         ),
         remote_provider_spec(
             api=Api.datasetio,
@@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
                 ],
                 module="llama_stack.providers.remote.datasetio.huggingface",
                 config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
+                description="HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.",
             ),
         ),
         remote_provider_spec(
@@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
                 ],
                 module="llama_stack.providers.remote.datasetio.nvidia",
                 config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",
+                description="NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.",
             ),
         ),
     ]
diff --git a/llama_stack/providers/registry/eval.py b/llama_stack/providers/registry/eval.py
index c9c29bbe0..9f0d17916 100644
--- a/llama_stack/providers/registry/eval.py
+++ b/llama_stack/providers/registry/eval.py
@@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
                 Api.inference,
                 Api.agents,
             ],
+            description="Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.",
         ),
         remote_provider_spec(
             api=Api.eval,
@@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]:
                 ],
                 module="llama_stack.providers.remote.eval.nvidia",
                 config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig",
+                description="NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.",
             ),
             api_dependencies=[
                 Api.datasetio,
diff --git a/llama_stack/providers/registry/files.py b/llama_stack/providers/registry/files.py
index dc5443c3a..e894debaf 100644
--- a/llama_stack/providers/registry/files.py
+++ b/llama_stack/providers/registry/files.py
@@ -21,5 +21,6 @@ def available_providers() -> list[ProviderSpec]:
             pip_packages=sql_store_pip_packages,
             module="llama_stack.providers.inline.files.localfs",
             config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig",
+            description="Local filesystem-based file storage provider for managing files and documents locally.",
         ),
     ]
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index 47be57eee..217870ec9 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -35,6 +35,7 @@ def available_providers() -> list[ProviderSpec]:
             pip_packages=META_REFERENCE_DEPS,
             module="llama_stack.providers.inline.inference.meta_reference",
             config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
+            description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
         ),
         InlineProviderSpec(
             api=Api.inference,
@@ -44,6 +45,7 @@ def available_providers() -> list[ProviderSpec]:
             ],
             module="llama_stack.providers.inline.inference.vllm",
             config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
+            description="vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.",
         ),
         InlineProviderSpec(
             api=Api.inference,
@@ -54,6 +56,7 @@ def available_providers() -> list[ProviderSpec]:
             ],
             module="llama_stack.providers.inline.inference.sentence_transformers",
             config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
+            description="Sentence Transformers inference provider for text embeddings and similarity search.",
         ),
         remote_provider_spec(
             api=Api.inference,
@@ -64,6 +67,7 @@ def available_providers() -> list[ProviderSpec]:
                 ],
                 module="llama_stack.providers.remote.inference.cerebras",
                 config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
+                description="Cerebras inference provider for running models on Cerebras Cloud platform.",
             ),
         ),
         remote_provider_spec(
@@ -73,6 +77,7 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
                 config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
                 module="llama_stack.providers.remote.inference.ollama",
+                description="Ollama inference provider for running local models through the Ollama runtime.",
             ),
         ),
         remote_provider_spec(
@@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["openai"],
                 module="llama_stack.providers.remote.inference.vllm",
                 config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
+                description="Remote vLLM inference provider for connecting to vLLM servers.",
             ),
         ),
         remote_provider_spec(
@@ -91,6 +97,7 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["huggingface_hub", "aiohttp"],
                 module="llama_stack.providers.remote.inference.tgi",
                 config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
+                description="Text Generation Inference (TGI) provider for HuggingFace model serving.",
             ),
         ),
         remote_provider_spec(
@@ -100,6 +107,7 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["huggingface_hub", "aiohttp"],
                 module="llama_stack.providers.remote.inference.tgi",
                 config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
+                description="HuggingFace Inference API serverless provider for on-demand model inference.",
             ),
         ),
         remote_provider_spec(
@@ -109,6 +117,7 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["huggingface_hub", "aiohttp"],
                 module="llama_stack.providers.remote.inference.tgi",
                 config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
+                description="HuggingFace Inference Endpoints provider for dedicated model serving.",
             ),
         ),
         remote_provider_spec(
@@ -121,6 +130,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.fireworks",
                 config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
+                description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.",
             ),
         ),
         remote_provider_spec(
@@ -133,6 +143,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.together",
                 config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
+                description="Together AI inference provider for open-source models and collaborative AI development.",
             ),
         ),
         remote_provider_spec(
@@ -142,6 +153,7 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["boto3"],
                 module="llama_stack.providers.remote.inference.bedrock",
                 config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
+                description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.",
             ),
         ),
         remote_provider_spec(
@@ -153,6 +165,7 @@ def available_providers() -> list[ProviderSpec]:
                 ],
                 module="llama_stack.providers.remote.inference.databricks",
                 config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
+                description="Databricks inference provider for running models on Databricks' unified analytics platform.",
             ),
         ),
         remote_provider_spec(
@@ -164,6 +177,7 @@ def available_providers() -> list[ProviderSpec]:
                 ],
                 module="llama_stack.providers.remote.inference.nvidia",
                 config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
+                description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
             ),
         ),
         remote_provider_spec(
@@ -173,6 +187,7 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["openai"],
                 module="llama_stack.providers.remote.inference.runpod",
                 config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
+                description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
             ),
         ),
         remote_provider_spec(
@@ -183,6 +198,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.openai",
                 config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
+                description="OpenAI inference provider for accessing GPT models and other OpenAI services.",
             ),
         ),
         remote_provider_spec(
@@ -193,6 +209,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.anthropic",
                 config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
+                description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.",
             ),
         ),
         remote_provider_spec(
@@ -203,6 +220,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.gemini",
                 config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
+                description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
             ),
         ),
         remote_provider_spec(
@@ -213,6 +231,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.groq",
                 config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
+                description="Groq inference provider for ultra-fast inference using Groq's LPU technology.",
             ),
         ),
         remote_provider_spec(
@@ -223,6 +242,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.fireworks_openai_compat",
                 config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator",
+                description="Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.",
             ),
         ),
         remote_provider_spec(
@@ -233,6 +253,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.llama_openai_compat",
                 config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
+                description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.",
             ),
         ),
         remote_provider_spec(
@@ -243,6 +264,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.together_openai_compat",
                 config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator",
+                description="Together AI OpenAI-compatible provider for using Together models with OpenAI API format.",
             ),
         ),
         remote_provider_spec(
@@ -253,6 +275,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.groq_openai_compat",
                 config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator",
+                description="Groq OpenAI-compatible provider for using Groq models with OpenAI API format.",
             ),
         ),
         remote_provider_spec(
@@ -263,6 +286,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.sambanova_openai_compat",
                 config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator",
+                description="SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.",
             ),
         ),
         remote_provider_spec(
@@ -273,6 +297,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.cerebras_openai_compat",
                 config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator",
+                description="Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.",
             ),
         ),
         remote_provider_spec(
@@ -283,6 +308,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.sambanova",
                 config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
+                description="SambaNova inference provider for running models on SambaNova's dataflow architecture.",
             ),
         ),
         remote_provider_spec(
@@ -293,6 +319,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.passthrough",
                 config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
+                description="Passthrough inference provider for connecting to any external inference service not directly supported.",
             ),
         ),
         remote_provider_spec(
@@ -303,6 +330,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.inference.watsonx",
                 config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
                 provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
+                description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
             ),
         ),
     ]
diff --git a/llama_stack/providers/registry/post_training.py b/llama_stack/providers/registry/post_training.py
index d752b8819..ffd64ef7c 100644
--- a/llama_stack/providers/registry/post_training.py
+++ b/llama_stack/providers/registry/post_training.py
@@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
                 Api.datasetio,
                 Api.datasets,
             ],
+            description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.",
         ),
         InlineProviderSpec(
             api=Api.post_training,
@@ -31,6 +32,7 @@ def available_providers() -> list[ProviderSpec]:
                 Api.datasetio,
                 Api.datasets,
             ],
+            description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.",
         ),
         remote_provider_spec(
             api=Api.post_training,
@@ -39,6 +41,7 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["requests", "aiohttp"],
                 module="llama_stack.providers.remote.post_training.nvidia",
                 config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig",
+                description="NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.",
             ),
         ),
     ]
diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py
index f0fe1e9f5..9dd791bd8 100644
--- a/llama_stack/providers/registry/safety.py
+++ b/llama_stack/providers/registry/safety.py
@@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]:
             ],
             module="llama_stack.providers.inline.safety.prompt_guard",
             config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig",
+            description="Prompt Guard safety provider for detecting and filtering unsafe prompts and content.",
         ),
         InlineProviderSpec(
             api=Api.safety,
@@ -35,6 +36,7 @@ def available_providers() -> list[ProviderSpec]:
             api_dependencies=[
                 Api.inference,
             ],
+            description="Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.",
         ),
         InlineProviderSpec(
             api=Api.safety,
@@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]:
             ],
             module="llama_stack.providers.inline.safety.code_scanner",
             config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig",
+            description="Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.",
         ),
         remote_provider_spec(
             api=Api.safety,
@@ -52,6 +55,7 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["boto3"],
                 module="llama_stack.providers.remote.safety.bedrock",
                 config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig",
+                description="AWS Bedrock safety provider for content moderation using AWS's safety services.",
             ),
         ),
         remote_provider_spec(
@@ -61,6 +65,7 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["requests"],
                 module="llama_stack.providers.remote.safety.nvidia",
                 config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig",
+                description="NVIDIA's safety provider for content moderation and safety filtering.",
             ),
         ),
         remote_provider_spec(
@@ -71,6 +76,7 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.safety.sambanova",
                 config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
                 provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
+                description="SambaNova's safety provider for content moderation and safety filtering.",
             ),
         ),
     ]
diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py
index 244b06842..79293d888 100644
--- a/llama_stack/providers/registry/scoring.py
+++ b/llama_stack/providers/registry/scoring.py
@@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
                 Api.datasetio,
                 Api.datasets,
             ],
+            description="Basic scoring provider for simple evaluation metrics and scoring functions.",
         ),
         InlineProviderSpec(
             api=Api.scoring,
@@ -32,6 +33,7 @@ def available_providers() -> list[ProviderSpec]:
                 Api.datasets,
                 Api.inference,
             ],
+            description="LLM-as-judge scoring provider that uses language models to evaluate and score responses.",
         ),
         InlineProviderSpec(
             api=Api.scoring,
@@ -44,5 +46,6 @@ def available_providers() -> list[ProviderSpec]:
                 Api.datasets,
             ],
             provider_data_validator="llama_stack.providers.inline.scoring.braintrust.BraintrustProviderDataValidator",
+            description="Braintrust scoring provider for evaluation and scoring using the Braintrust platform.",
         ),
     ]
diff --git a/llama_stack/providers/registry/telemetry.py b/llama_stack/providers/registry/telemetry.py
index 14da06126..b50b422c1 100644
--- a/llama_stack/providers/registry/telemetry.py
+++ b/llama_stack/providers/registry/telemetry.py
@@ -24,5 +24,6 @@ def available_providers() -> list[ProviderSpec]:
             optional_api_dependencies=[Api.datasetio],
             module="llama_stack.providers.inline.telemetry.meta_reference",
             config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
+            description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
         ),
     ]
diff --git a/llama_stack/providers/registry/tool_runtime.py b/llama_stack/providers/registry/tool_runtime.py
index fa359f6b5..0dc880408 100644
--- a/llama_stack/providers/registry/tool_runtime.py
+++ b/llama_stack/providers/registry/tool_runtime.py
@@ -33,6 +33,7 @@ def available_providers() -> list[ProviderSpec]:
             module="llama_stack.providers.inline.tool_runtime.rag",
             config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
             api_dependencies=[Api.vector_io, Api.inference],
+            description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
         ),
         remote_provider_spec(
             api=Api.tool_runtime,
@@ -42,6 +43,7 @@ def available_providers() -> list[ProviderSpec]:
                 config_class="llama_stack.providers.remote.tool_runtime.brave_search.config.BraveSearchToolConfig",
                 pip_packages=["requests"],
                 provider_data_validator="llama_stack.providers.remote.tool_runtime.brave_search.BraveSearchToolProviderDataValidator",
+                description="Brave Search tool for web search capabilities with privacy-focused results.",
             ),
         ),
         remote_provider_spec(
@@ -52,6 +54,7 @@ def available_providers() -> list[ProviderSpec]:
                 config_class="llama_stack.providers.remote.tool_runtime.bing_search.config.BingSearchToolConfig",
                 pip_packages=["requests"],
                 provider_data_validator="llama_stack.providers.remote.tool_runtime.bing_search.BingSearchToolProviderDataValidator",
+                description="Bing Search tool for web search capabilities using Microsoft's search engine.",
             ),
         ),
         remote_provider_spec(
@@ -62,6 +65,7 @@ def available_providers() -> list[ProviderSpec]:
                 config_class="llama_stack.providers.remote.tool_runtime.tavily_search.config.TavilySearchToolConfig",
                 pip_packages=["requests"],
                 provider_data_validator="llama_stack.providers.remote.tool_runtime.tavily_search.TavilySearchToolProviderDataValidator",
+                description="Tavily Search tool for AI-optimized web search with structured results.",
             ),
         ),
         remote_provider_spec(
@@ -72,6 +76,7 @@ def available_providers() -> list[ProviderSpec]:
                 config_class="llama_stack.providers.remote.tool_runtime.wolfram_alpha.config.WolframAlphaToolConfig",
                 pip_packages=["requests"],
                 provider_data_validator="llama_stack.providers.remote.tool_runtime.wolfram_alpha.WolframAlphaToolProviderDataValidator",
+                description="Wolfram Alpha tool for computational knowledge and mathematical calculations.",
             ),
         ),
         remote_provider_spec(
@@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]:
                 config_class="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderConfig",
                 pip_packages=["mcp"],
                 provider_data_validator="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderDataValidator",
+                description="Model Context Protocol (MCP) tool for standardized tool calling and context management.",
             ),
         ),
     ]
diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py
index 6f4366142..5e233b94c 100644
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]:
             deprecation_warning="Please use the `inline::faiss` provider instead.",
             api_dependencies=[Api.inference],
             optional_api_dependencies=[Api.files],
+            description="Meta's reference implementation of a vector database.",
         ),
         InlineProviderSpec(
             api=Api.vector_io,
@@ -34,6 +35,36 @@ def available_providers() -> list[ProviderSpec]:
             config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
             api_dependencies=[Api.inference],
             optional_api_dependencies=[Api.files],
+            description="""
+[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It
+allows you to store and query vectors directly in memory.
+That means you'll get fast and efficient vector retrieval.
+
+## Features
+
+- Lightweight and easy to use
+- Fully integrated with Llama Stack
+- GPU support
+
+## Usage
+
+To use Faiss in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use Faiss.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install Faiss using pip:
+
+```bash
+pip install faiss-cpu
+```
+## Documentation
+See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for
+more details about Faiss in general.
+""",
         ),
         # NOTE: sqlite-vec cannot be bundled into the container image because it does not have a
         # source distribution and the wheels are not available for all platforms.
@@ -45,6 +76,204 @@ def available_providers() -> list[ProviderSpec]:
             config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
             api_dependencies=[Api.inference],
             optional_api_dependencies=[Api.files],
+            description="""
+[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It
+allows you to store and query vectors directly within an SQLite database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+
+- Lightweight and easy to use
+- Fully integrated with Llama Stacks
+- Uses disk-based storage for persistence, allowing for larger vector storage
+
+### Comparison to Faiss
+
+The choice between Faiss and sqlite-vec should be made based on the needs of your application,
+as they have different strengths.
+
+#### Choosing the Right Provider
+
+Scenario | Recommended Tool | Reason
+-- |-----------------| --
+Online Analytical Processing (OLAP) | Faiss           | Fast, in-memory searches
+Online Transaction Processing (OLTP) | sqlite-vec      | Frequent writes and reads
+Frequent writes | sqlite-vec      | Efficient disk-based storage and incremental indexing
+Large datasets | sqlite-vec      | Disk-based storage for larger vector storage
+Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, indexing, and GPU acceleration
+
+#### Empirical Example
+
+Consider the histogram below in which 10,000 randomly generated strings were inserted
+in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
+
+```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
+:alt: Comparison of SQLite-Vec and Faiss write times
+:width: 400px
+```
+
+You will notice that the average write time for `sqlite-vec` was 788ms, compared to
+47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather
+uniformly spread across the [1500, 100000] interval.
+
+Looking at each individual write in the order that the documents are inserted you'll see the increase in
+write speed as Faiss reindexes the vectors after each write.
+```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png
+:alt: Comparison of SQLite-Vec and Faiss write times
+:width: 400px
+```
+
+In comparison, the read times for Faiss was on average 10% faster than sqlite-vec.
+The modes of the two distributions highlight the differences much further where Faiss
+will likely yield faster read performance.
+
+```{image} ../../../../_static/providers/vector_io/read_time_comparison_sqlite-vec-faiss.png
+:alt: Comparison of SQLite-Vec and Faiss read times
+:width: 400px
+```
+
+## Usage
+
+To use sqlite-vec in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use SQLite-Vec.
+3. Start storing and querying vectors.
+
+The SQLite-vec provider supports three search modes:
+
+1. **Vector Search** (`mode="vector"`): Performs pure vector similarity search using the embeddings.
+2. **Keyword Search** (`mode="keyword"`): Performs full-text search using SQLite's FTS5.
+3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword search for better results. First performs keyword search to get candidate matches, then applies vector similarity search on those candidates.
+
+Example with hybrid search:
+```python
+response = await vector_io.query_chunks(
+    vector_db_id="my_db",
+    query="your query here",
+    params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
+)
+
+# Using RRF ranker
+response = await vector_io.query_chunks(
+    vector_db_id="my_db",
+    query="your query here",
+    params={
+        "mode": "hybrid",
+        "max_chunks": 3,
+        "score_threshold": 0.7,
+        "ranker": {"type": "rrf", "impact_factor": 60.0},
+    },
+)
+
+# Using weighted ranker
+response = await vector_io.query_chunks(
+    vector_db_id="my_db",
+    query="your query here",
+    params={
+        "mode": "hybrid",
+        "max_chunks": 3,
+        "score_threshold": 0.7,
+        "ranker": {"type": "weighted", "alpha": 0.7},  # 70% vector, 30% keyword
+    },
+)
+```
+
+Example with explicit vector search:
+```python
+response = await vector_io.query_chunks(
+    vector_db_id="my_db",
+    query="your query here",
+    params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
+)
+```
+
+Example with keyword search:
+```python
+response = await vector_io.query_chunks(
+    vector_db_id="my_db",
+    query="your query here",
+    params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
+)
+```
+
+## Supported Search Modes
+
+The SQLite vector store supports three search modes:
+
+1. **Vector Search** (`mode="vector"`): Uses vector similarity to find relevant chunks
+2. **Keyword Search** (`mode="keyword"`): Uses keyword matching to find relevant chunks
+3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword scores using a ranker
+
+### Hybrid Search
+
+Hybrid search combines the strengths of both vector and keyword search by:
+- Computing vector similarity scores
+- Computing keyword match scores
+- Using a ranker to combine these scores
+
+Two ranker types are supported:
+
+1. **RRF (Reciprocal Rank Fusion)**:
+   - Combines ranks from both vector and keyword results
+   - Uses an impact factor (default: 60.0) to control the weight of higher-ranked results
+   - Good for balancing between vector and keyword results
+   - The default impact factor of 60.0 comes from the original RRF paper by Cormack et al. (2009) [^1], which found this value to provide optimal performance across various retrieval tasks
+
+2. **Weighted**:
+   - Linearly combines normalized vector and keyword scores
+   - Uses an alpha parameter (0-1) to control the blend:
+     - alpha=0: Only use keyword scores
+     - alpha=1: Only use vector scores
+     - alpha=0.5: Equal weight to both (default)
+
+Example using RAGQueryConfig with different search modes:
+
+```python
+from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+
+# Vector search
+config = RAGQueryConfig(mode="vector", max_chunks=5)
+
+# Keyword search
+config = RAGQueryConfig(mode="keyword", max_chunks=5)
+
+# Hybrid search with custom RRF ranker
+config = RAGQueryConfig(
+    mode="hybrid",
+    max_chunks=5,
+    ranker=RRFRanker(impact_factor=50.0),  # Custom impact factor
+)
+
+# Hybrid search with weighted ranker
+config = RAGQueryConfig(
+    mode="hybrid",
+    max_chunks=5,
+    ranker=WeightedRanker(alpha=0.7),  # 70% vector, 30% keyword
+)
+
+# Hybrid search with default RRF ranker
+config = RAGQueryConfig(
+    mode="hybrid", max_chunks=5
+)  # Will use RRF with impact_factor=60.0
+```
+
+Note: The ranker configuration is only used in hybrid mode. For vector or keyword modes, the ranker parameter is ignored.
+
+## Installation
+
+You can install SQLite-Vec using pip:
+
+```bash
+pip install sqlite-vec
+```
+
+## Documentation
+
+See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
+
+[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
+""",
         ),
         InlineProviderSpec(
             api=Api.vector_io,
@@ -55,6 +284,9 @@ def available_providers() -> list[ProviderSpec]:
             deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
             api_dependencies=[Api.inference],
             optional_api_dependencies=[Api.files],
+            description="""
+Please refer to the sqlite-vec provider documentation.
+""",
         ),
         remote_provider_spec(
             Api.vector_io,
@@ -63,6 +295,39 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["chromadb-client"],
                 module="llama_stack.providers.remote.vector_io.chroma",
                 config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig",
+                description="""
+[Chroma](https://www.trychroma.com/) is an inline and remote vector
+database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+Chroma supports:
+- Store embeddings and their metadata
+- Vector search
+- Full-text search
+- Document storage
+- Metadata filtering
+- Multi-modal retrieval
+
+## Usage
+
+To use Chrome in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use chroma.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install chroma using pip:
+
+```bash
+pip install chromadb
+```
+
+## Documentation
+See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
+""",
             ),
             api_dependencies=[Api.inference],
         ),
@@ -73,6 +338,40 @@ def available_providers() -> list[ProviderSpec]:
             module="llama_stack.providers.inline.vector_io.chroma",
             config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
             api_dependencies=[Api.inference],
+            description="""
+[Chroma](https://www.trychroma.com/) is an inline and remote vector
+database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+Chroma supports:
+- Store embeddings and their metadata
+- Vector search
+- Full-text search
+- Document storage
+- Metadata filtering
+- Multi-modal retrieval
+
+## Usage
+
+To use Chrome in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use chroma.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install chroma using pip:
+
+```bash
+pip install chromadb
+```
+
+## Documentation
+See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general.
+
+""",
         ),
         remote_provider_spec(
             Api.vector_io,
@@ -81,6 +380,34 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["psycopg2-binary"],
                 module="llama_stack.providers.remote.vector_io.pgvector",
                 config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig",
+                description="""
+[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It
+allows you to store and query vectors directly in memory.
+That means you'll get fast and efficient vector retrieval.
+
+## Features
+
+- Easy to use
+- Fully integrated with Llama Stack
+
+## Usage
+
+To use PGVector in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use Faiss.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install PGVector using docker:
+
+```bash
+docker pull pgvector/pgvector:pg17
+```
+## Documentation
+See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
+""",
             ),
             api_dependencies=[Api.inference],
         ),
@@ -92,6 +419,36 @@ def available_providers() -> list[ProviderSpec]:
                 module="llama_stack.providers.remote.vector_io.weaviate",
                 config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig",
                 provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
+                description="""
+[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack.
+It allows you to store and query vectors directly within a Weaviate database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+Weaviate supports:
+- Store embeddings and their metadata
+- Vector search
+- Full-text search
+- Hybrid search
+- Document storage
+- Metadata filtering
+- Multi-modal retrieval
+
+## Usage
+
+To use Weaviate in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use chroma.
+3. Start storing and querying vectors.
+
+## Installation
+
+To install Weaviate see the [Weaviate quickstart documentation](https://weaviate.io/developers/weaviate/quickstart).
+
+## Documentation
+See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
+""",
             ),
             api_dependencies=[Api.inference],
         ),
@@ -102,6 +459,49 @@ def available_providers() -> list[ProviderSpec]:
             module="llama_stack.providers.inline.vector_io.qdrant",
             config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
             api_dependencies=[Api.inference],
+            description=r"""
+[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It
+allows you to store and query vectors directly in memory.
+That means you'll get fast and efficient vector retrieval.
+
+> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in
+> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative.
+>
+> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\]
+
+
+
+## Features
+
+- Lightweight and easy to use
+- Fully integrated with Llama Stack
+- Apache 2.0 license terms
+- Store embeddings and their metadata
+- Supports search by
+  [Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/)
+  and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search
+- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/)
+- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/)
+- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/)
+
+## Usage
+
+To use Qdrant in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use Qdrant.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install Qdrant using docker:
+
+```bash
+docker pull qdrant/qdrant
+```
+## Documentation
+See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general.
+""",
         ),
         remote_provider_spec(
             Api.vector_io,
@@ -110,6 +510,9 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["qdrant-client"],
                 module="llama_stack.providers.remote.vector_io.qdrant",
                 config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig",
+                description="""
+Please refer to the inline provider documentation.
+""",
             ),
             api_dependencies=[Api.inference],
         ),
@@ -120,6 +523,110 @@ def available_providers() -> list[ProviderSpec]:
                 pip_packages=["pymilvus"],
                 module="llama_stack.providers.remote.vector_io.milvus",
                 config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig",
+                description="""
+[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It
+allows you to store and query vectors directly within a Milvus database.
+That means you're not limited to storing vectors in memory or in a separate service.
+
+## Features
+
+- Easy to use
+- Fully integrated with Llama Stack
+
+## Usage
+
+To use Milvus in your Llama Stack project, follow these steps:
+
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use Milvus.
+3. Start storing and querying vectors.
+
+## Installation
+
+You can install Milvus using pymilvus:
+
+```bash
+pip install pymilvus
+```
+
+## Configuration
+
+In Llama Stack, Milvus can be configured in two ways:
+- **Inline (Local) Configuration** - Uses Milvus-Lite for local storage
+- **Remote Configuration** - Connects to a remote Milvus server
+
+### Inline (Local) Configuration
+
+The simplest method is local configuration, which requires setting `db_path`, a path for locally storing Milvus-Lite files:
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: inline::milvus
+    config:
+      db_path: ~/.llama/distributions/together/milvus_store.db
+```
+
+### Remote Configuration
+
+Remote configuration is suitable for larger data storage requirements:
+
+#### Standard Remote Connection
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: remote::milvus
+    config:
+      uri: "http://<host>:<port>"
+      token: "<user>:<password>"
+```
+
+#### TLS-Enabled Remote Connection (One-way TLS)
+
+For connections to Milvus instances with one-way TLS enabled:
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: remote::milvus
+    config:
+      uri: "https://<host>:<port>"
+      token: "<user>:<password>"
+      secure: True
+      server_pem_path: "/path/to/server.pem"
+```
+
+#### Mutual TLS (mTLS) Remote Connection
+
+For connections to Milvus instances with mutual TLS (mTLS) enabled:
+
+```yaml
+vector_io:
+  - provider_id: milvus
+    provider_type: remote::milvus
+    config:
+      uri: "https://<host>:<port>"
+      token: "<user>:<password>"
+      secure: True
+      ca_pem_path: "/path/to/ca.pem"
+      client_pem_path: "/path/to/client.pem"
+      client_key_path: "/path/to/client.key"
+```
+
+#### Key Parameters for TLS Configuration
+
+- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`.
+- **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS).
+- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS).
+- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
+- **`client_key_path`**: Path to the **client private key** file (required for mTLS).
+
+## Documentation
+See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
+
+For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md).
+""",
             ),
             api_dependencies=[Api.inference],
         ),
@@ -131,5 +638,8 @@ def available_providers() -> list[ProviderSpec]:
             config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
             api_dependencies=[Api.inference],
             optional_api_dependencies=[Api.files],
+            description="""
+Please refer to the remote provider documentation.
+""",
         ),
     ]
diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/llama_stack/providers/remote/vector_io/milvus/config.py
index 9bdc7ed5c..b42233d6d 100644
--- a/llama_stack/providers/remote/vector_io/milvus/config.py
+++ b/llama_stack/providers/remote/vector_io/milvus/config.py
@@ -6,17 +6,19 @@
 
 from typing import Any
 
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, Field
 
 from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
 class MilvusVectorIOConfig(BaseModel):
-    uri: str
-    token: str | None = None
-    consistency_level: str = "Strong"
+    uri: str = Field(description="The URI of the Milvus server")
+    token: str | None = Field(description="The token of the Milvus server")
+    consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
 
+    # This configuration allows additional fields to be passed through to the underlying Milvus client.
+    # See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general.
     model_config = ConfigDict(extra="allow")
 
     @classmethod
diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py
new file mode 100755
index 000000000..eff04a40f
--- /dev/null
+++ b/scripts/provider_codegen.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import subprocess
+import sys
+from pathlib import Path
+from typing import Any
+
+from rich.progress import Progress, SpinnerColumn, TextColumn
+
+from llama_stack.distribution.distribution import get_provider_registry
+
+REPO_ROOT = Path(__file__).parent.parent
+
+
+class ChangedPathTracker:
+    """Track a list of paths we may have changed."""
+
+    def __init__(self):
+        self._changed_paths = []
+
+    def add_paths(self, *paths):
+        for path in paths:
+            path = str(path)
+            if path not in self._changed_paths:
+                self._changed_paths.append(path)
+
+    def changed_paths(self):
+        return self._changed_paths
+
+
+def get_config_class_info(config_class_path: str) -> dict[str, Any]:
+    """Extract configuration information from a config class."""
+    try:
+        module_path, class_name = config_class_path.rsplit(".", 1)
+        module = __import__(module_path, fromlist=[class_name])
+        config_class = getattr(module, class_name)
+
+        docstring = config_class.__doc__ or ""
+
+        accepts_extra_config = False
+        try:
+            schema = config_class.model_json_schema()
+            if schema.get("additionalProperties") is True:
+                accepts_extra_config = True
+        except Exception:
+            if hasattr(config_class, "model_config"):
+                model_config = config_class.model_config
+                if hasattr(model_config, "extra") and model_config.extra == "allow":
+                    accepts_extra_config = True
+                elif isinstance(model_config, dict) and model_config.get("extra") == "allow":
+                    accepts_extra_config = True
+
+        fields_info = {}
+        if hasattr(config_class, "model_fields"):
+            for field_name, field in config_class.model_fields.items():
+                field_type = str(field.annotation) if field.annotation else "Any"
+                field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
+                field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
+                field_type = field_type.replace("llama_stack.apis.inference.inference.", "")
+                field_type = field_type.replace("llama_stack.providers.", "")
+
+                default_value = field.default
+                if field.default_factory is not None:
+                    try:
+                        default_value = field.default_factory()
+                        # HACK ALERT:
+                        # If the default value contains a path that looks like it came from RUNTIME_BASE_DIR,
+                        # replace it with a generic ~/.llama/ path for documentation
+                        if isinstance(default_value, str) and "/.llama/" in default_value:
+                            if ".llama/" in default_value:
+                                path_part = default_value.split(".llama/")[-1]
+                                default_value = f"~/.llama/{path_part}"
+                    except Exception:
+                        default_value = ""
+                elif field.default is None:
+                    default_value = ""
+
+                field_info = {
+                    "type": field_type,
+                    "description": field.description or "",
+                    "default": default_value,
+                    "required": field.default is None and not field.is_required,
+                }
+                fields_info[field_name] = field_info
+
+        if accepts_extra_config:
+            config_description = "Additional configuration options that will be forwarded to the underlying provider"
+            try:
+                import inspect
+
+                source = inspect.getsource(config_class)
+                lines = source.split("\n")
+
+                for i, line in enumerate(lines):
+                    if "model_config" in line and "ConfigDict" in line and 'extra="allow"' in line:
+                        comments = []
+                        for j in range(i - 1, -1, -1):
+                            stripped = lines[j].strip()
+                            if stripped.startswith("#"):
+                                comments.append(stripped[1:].strip())
+                            elif stripped == "":
+                                continue
+                            else:
+                                break
+
+                        if comments:
+                            config_description = " ".join(reversed(comments))
+                        break
+            except Exception:
+                pass
+
+            fields_info["config"] = {
+                "type": "dict",
+                "description": config_description,
+                "default": "{}",
+                "required": False,
+            }
+
+        return {
+            "docstring": docstring,
+            "fields": fields_info,
+            "sample_config": getattr(config_class, "sample_run_config", None),
+            "accepts_extra_config": accepts_extra_config,
+        }
+    except Exception as e:
+        return {
+            "error": f"Failed to load config class {config_class_path}: {str(e)}",
+            "docstring": "",
+            "fields": {},
+            "sample_config": None,
+            "accepts_extra_config": False,
+        }
+
+
+def generate_provider_docs(provider_spec: Any, api_name: str) -> str:
+    """Generate markdown documentation for a provider."""
+    provider_type = provider_spec.provider_type
+    config_class = provider_spec.config_class
+
+    config_info = get_config_class_info(config_class)
+
+    md_lines = []
+    md_lines.append(f"# {provider_type}")
+    md_lines.append("")
+
+    description = ""
+    if hasattr(provider_spec, "description") and provider_spec.description:
+        description = provider_spec.description
+    elif (
+        hasattr(provider_spec, "adapter")
+        and hasattr(provider_spec.adapter, "description")
+        and provider_spec.adapter.description
+    ):
+        description = provider_spec.adapter.description
+    elif config_info.get("docstring"):
+        description = config_info["docstring"]
+
+    if description:
+        md_lines.append("## Description")
+        md_lines.append("")
+        md_lines.append(description)
+        md_lines.append("")
+
+    if config_info.get("fields"):
+        md_lines.append("## Configuration")
+        md_lines.append("")
+        md_lines.append("| Field | Type | Required | Default | Description |")
+        md_lines.append("|-------|------|----------|---------|-------------|")
+
+        for field_name, field_info in config_info["fields"].items():
+            field_type = field_info["type"].replace("|", "\\|")
+            required = "Yes" if field_info["required"] else "No"
+            default = str(field_info["default"]) if field_info["default"] is not None else ""
+            description = field_info["description"] or ""
+
+            md_lines.append(f"| `{field_name}` | `{field_type}` | {required} | {default} | {description} |")
+
+        md_lines.append("")
+
+        if config_info.get("accepts_extra_config"):
+            md_lines.append(
+                "> **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider."
+            )
+            md_lines.append("")
+
+    if config_info.get("sample_config"):
+        md_lines.append("## Sample Configuration")
+        md_lines.append("")
+        md_lines.append("```yaml")
+        try:
+            sample_config_func = config_info["sample_config"]
+            import inspect
+
+            import yaml
+
+            if sample_config_func is not None:
+                sig = inspect.signature(sample_config_func)
+                if "__distro_dir__" in sig.parameters:
+                    sample_config = sample_config_func(__distro_dir__="~/.llama/dummy")
+                else:
+                    sample_config = sample_config_func()
+
+                def convert_pydantic_to_dict(obj):
+                    if hasattr(obj, "model_dump"):
+                        return obj.model_dump()
+                    elif hasattr(obj, "dict"):
+                        return obj.dict()
+                    elif isinstance(obj, dict):
+                        return {k: convert_pydantic_to_dict(v) for k, v in obj.items()}
+                    elif isinstance(obj, list):
+                        return [convert_pydantic_to_dict(item) for item in obj]
+                    else:
+                        return obj
+
+                sample_config_dict = convert_pydantic_to_dict(sample_config)
+                md_lines.append(yaml.dump(sample_config_dict, default_flow_style=False, sort_keys=False))
+            else:
+                md_lines.append("# No sample configuration available.")
+        except Exception as e:
+            md_lines.append(f"# Error generating sample config: {str(e)}")
+        md_lines.append("```")
+        md_lines.append("")
+
+    if hasattr(provider_spec, "deprecation_warning") and provider_spec.deprecation_warning:
+        md_lines.append("## Deprecation Notice")
+        md_lines.append("")
+        md_lines.append(f"⚠️ **Warning**: {provider_spec.deprecation_warning}")
+        md_lines.append("")
+
+    if hasattr(provider_spec, "deprecation_error") and provider_spec.deprecation_error:
+        md_lines.append("## Deprecation Error")
+        md_lines.append("")
+        md_lines.append(f"❌ **Error**: {provider_spec.deprecation_error}")
+
+    return "\n".join(md_lines) + "\n"
+
+
+def process_provider_registry(progress, change_tracker: ChangedPathTracker) -> None:
+    """Process the complete provider registry."""
+    progress.print("Processing provider registry")
+
+    try:
+        provider_registry = get_provider_registry()
+
+        for api, providers in provider_registry.items():
+            api_name = api.value
+
+            doc_output_dir = REPO_ROOT / "docs" / "source" / "providers" / api_name
+            doc_output_dir.mkdir(parents=True, exist_ok=True)
+            change_tracker.add_paths(doc_output_dir)
+
+            index_content = []
+            index_content.append(f"# {api_name.title()} Providers")
+            index_content.append("")
+            index_content.append(
+                f"This section contains documentation for all available providers for the **{api_name}** API."
+            )
+            index_content.append("")
+
+            for provider_type, provider in sorted(providers.items()):
+                provider_doc_file = doc_output_dir / f"{provider_type.replace('::', '_').replace(':', '_')}.md"
+
+                provider_docs = generate_provider_docs(provider, api_name)
+
+                provider_doc_file.write_text(provider_docs)
+                change_tracker.add_paths(provider_doc_file)
+
+                index_content.append(f"- [{provider_type}]({provider_doc_file.name})")
+
+            index_file = doc_output_dir / "index.md"
+            index_file.write_text("\n".join(index_content))
+            change_tracker.add_paths(index_file)
+
+    except Exception as e:
+        progress.print(f"[red]Error processing provider registry: {str(e)}")
+        raise e
+
+
+def check_for_changes(change_tracker: ChangedPathTracker) -> bool:
+    """Check if there are any uncommitted changes, including new files."""
+    has_changes = False
+    for path in change_tracker.changed_paths():
+        result = subprocess.run(
+            ["git", "diff", "--exit-code", path],
+            cwd=REPO_ROOT,
+            capture_output=True,
+        )
+        if result.returncode != 0:
+            print(f"Change detected in '{path}'.", file=sys.stderr)
+            has_changes = True
+        status_result = subprocess.run(
+            ["git", "status", "--porcelain", path],
+            cwd=REPO_ROOT,
+            capture_output=True,
+            text=True,
+        )
+        for line in status_result.stdout.splitlines():
+            if line.startswith("??"):
+                print(f"New file detected: '{path}'.", file=sys.stderr)
+                has_changes = True
+    return has_changes
+
+
+def main():
+    change_tracker = ChangedPathTracker()
+
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+    ) as progress:
+        task = progress.add_task("Processing provider registry...", total=1)
+
+        process_provider_registry(progress, change_tracker)
+        progress.update(task, advance=1)
+
+    if check_for_changes(change_tracker):
+        print(
+            "Provider documentation changes detected. Please commit the changes.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()