From 9d7320769bc89280b3969fd188cbb306d3d8c6f8 Mon Sep 17 00:00:00 2001
From: Young Han <younghan@meta.com>
Date: Tue, 28 Oct 2025 16:34:05 -0700
Subject: [PATCH] fix: apply pre-commit hook auto-fixes and resolve mypy errors

- Fixed mypy type error in MongoDB aggregate pipeline
- Auto-formatted code with ruff
- Generated provider documentation
- Applied formatting to YAML files
---
 .../providers/vector_io/remote_mongodb.mdx    | 27 +++++++++++++++++++
 .../distributions/starter/starter.py          | 24 +++++------------
 .../remote/vector_io/mongodb/mongodb.py       |  8 ++----
 3 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/docs/docs/providers/vector_io/remote_mongodb.mdx b/docs/docs/providers/vector_io/remote_mongodb.mdx
index 29bdd8f83..8e23919b3 100644
--- a/docs/docs/providers/vector_io/remote_mongodb.mdx
+++ b/docs/docs/providers/vector_io/remote_mongodb.mdx
@@ -239,3 +239,30 @@ See [MongoDB Atlas Vector Search documentation](https://www.mongodb.com/docs/atl
 For general MongoDB documentation, visit [MongoDB Documentation](https://docs.mongodb.com/).
 
 
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `connection_string` | `<class 'str'>` | No |  | MongoDB Atlas connection string (e.g., mongodb+srv://user:pass@cluster.mongodb.net/) |
+| `database_name` | `<class 'str'>` | No | llama_stack | Database name to use for vector collections |
+| `index_name` | `<class 'str'>` | No | vector_index | Name of the vector search index |
+| `path_field` | `<class 'str'>` | No | embedding | Field name for storing embeddings |
+| `similarity_metric` | `<class 'str'>` | No | cosine | Similarity metric: cosine, euclidean, or dotProduct |
+| `max_pool_size` | `<class 'int'>` | No | 100 | Maximum connection pool size |
+| `timeout_ms` | `<class 'int'>` | No | 30000 | Connection timeout in milliseconds |
+| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend for metadata storage |
+
+## Sample Configuration
+
+```yaml
+connection_string: ${env.MONGODB_CONNECTION_STRING:=}
+database_name: ${env.MONGODB_DATABASE_NAME:=llama_stack}
+index_name: ${env.MONGODB_INDEX_NAME:=vector_index}
+path_field: ${env.MONGODB_PATH_FIELD:=embedding}
+similarity_metric: ${env.MONGODB_SIMILARITY_METRIC:=cosine}
+max_pool_size: ${env.MONGODB_MAX_POOL_SIZE:=100}
+timeout_ms: ${env.MONGODB_TIMEOUT_MS:=30000}
+persistence:
+  namespace: vector_io::mongodb_atlas
+  backend: kv_default
+```
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index b26e77eff..c3de0aeba 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -86,8 +86,7 @@ def get_remote_inference_providers() -> list[Provider]:
     remote_providers = [
         provider
         for provider in available_providers()
-        if isinstance(provider, RemoteProviderSpec)
-        and provider.adapter_type in ENABLED_INFERENCE_PROVIDERS
+        if isinstance(provider, RemoteProviderSpec) and provider.adapter_type in ENABLED_INFERENCE_PROVIDERS
     ]
 
     inference_providers = []
@@ -114,10 +113,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
     remote_inference_providers = get_remote_inference_providers()
 
     providers = {
-        "inference": [
-            BuildProvider(provider_type=p.provider_type, module=p.module)
-            for p in remote_inference_providers
-        ]
+        "inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers]
         + [BuildProvider(provider_type="inline::sentence-transformers")],
         "vector_io": [
             BuildProvider(provider_type="inline::faiss"),
@@ -159,9 +155,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
     files_provider = Provider(
         provider_id="meta-reference-files",
         provider_type="inline::localfs",
-        config=LocalfsFilesImplConfig.sample_run_config(
-            f"~/.llama/distributions/{name}"
-        ),
+        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
     )
     embedding_provider = Provider(
         provider_id="sentence-transformers",
@@ -208,23 +202,17 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
                         Provider(
                             provider_id="faiss",
                             provider_type="inline::faiss",
-                            config=FaissVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}"
-                            ),
+                            config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
                         ),
                         Provider(
                             provider_id="sqlite-vec",
                             provider_type="inline::sqlite-vec",
-                            config=SQLiteVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}"
-                            ),
+                            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
                         ),
                         Provider(
                             provider_id="${env.MILVUS_URL:+milvus}",
                             provider_type="inline::milvus",
-                            config=MilvusVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}"
-                            ),
+                            config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
                         ),
                         Provider(
                             provider_id="${env.CHROMADB_URL:+chromadb}",
diff --git a/src/llama_stack/providers/remote/vector_io/mongodb/mongodb.py b/src/llama_stack/providers/remote/vector_io/mongodb/mongodb.py
index 4feeb88d1..275414746 100644
--- a/src/llama_stack/providers/remote/vector_io/mongodb/mongodb.py
+++ b/src/llama_stack/providers/remote/vector_io/mongodb/mongodb.py
@@ -17,12 +17,8 @@ from pymongo.server_api import ServerApi
 
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.inference import InterleavedContent
+from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.apis.vector_io import (
-    Chunk,
-    QueryChunksResponse,
-    VectorIO,
-)
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
     HealthResponse,
@@ -267,7 +263,7 @@ class MongoDBIndex(EmbeddingIndex):
             # Ensure text index exists
             await self._ensure_text_index()
 
-            pipeline = [
+            pipeline: list[dict[str, Any]] = [
                 {"$match": {"$text": {"$search": query_string}}},
                 {
                     "$project": {