From 9d7320769bc89280b3969fd188cbb306d3d8c6f8 Mon Sep 17 00:00:00 2001 From: Young Han Date: Tue, 28 Oct 2025 16:34:05 -0700 Subject: [PATCH] fix: apply pre-commit hook auto-fixes and resolve mypy errors - Fixed mypy type error in MongoDB aggregate pipeline - Auto-formatted code with ruff - Generated provider documentation - Applied formatting to YAML files --- .../providers/vector_io/remote_mongodb.mdx | 27 +++++++++++++++++++ .../distributions/starter/starter.py | 24 +++++------------ .../remote/vector_io/mongodb/mongodb.py | 8 ++---- 3 files changed, 35 insertions(+), 24 deletions(-) diff --git a/docs/docs/providers/vector_io/remote_mongodb.mdx b/docs/docs/providers/vector_io/remote_mongodb.mdx index 29bdd8f83..8e23919b3 100644 --- a/docs/docs/providers/vector_io/remote_mongodb.mdx +++ b/docs/docs/providers/vector_io/remote_mongodb.mdx @@ -239,3 +239,30 @@ See [MongoDB Atlas Vector Search documentation](https://www.mongodb.com/docs/atl For general MongoDB documentation, visit [MongoDB Documentation](https://docs.mongodb.com/). +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `connection_string` | `` | No | | MongoDB Atlas connection string (e.g., mongodb+srv://user:pass@cluster.mongodb.net/) | +| `database_name` | `` | No | llama_stack | Database name to use for vector collections | +| `index_name` | `` | No | vector_index | Name of the vector search index | +| `path_field` | `` | No | embedding | Field name for storing embeddings | +| `similarity_metric` | `` | No | cosine | Similarity metric: cosine, euclidean, or dotProduct | +| `max_pool_size` | `` | No | 100 | Maximum connection pool size | +| `timeout_ms` | `` | No | 30000 | Connection timeout in milliseconds | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend for metadata storage | + +## Sample Configuration + +```yaml +connection_string: ${env.MONGODB_CONNECTION_STRING:=} +database_name: ${env.MONGODB_DATABASE_NAME:=llama_stack} +index_name: ${env.MONGODB_INDEX_NAME:=vector_index} +path_field: ${env.MONGODB_PATH_FIELD:=embedding} +similarity_metric: ${env.MONGODB_SIMILARITY_METRIC:=cosine} +max_pool_size: ${env.MONGODB_MAX_POOL_SIZE:=100} +timeout_ms: ${env.MONGODB_TIMEOUT_MS:=30000} +persistence: + namespace: vector_io::mongodb_atlas + backend: kv_default +``` diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index b26e77eff..c3de0aeba 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -86,8 +86,7 @@ def get_remote_inference_providers() -> list[Provider]: remote_providers = [ provider for provider in available_providers() - if isinstance(provider, RemoteProviderSpec) - and provider.adapter_type in ENABLED_INFERENCE_PROVIDERS + if isinstance(provider, RemoteProviderSpec) and provider.adapter_type in ENABLED_INFERENCE_PROVIDERS ] inference_providers = [] @@ -114,10 +113,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: remote_inference_providers = get_remote_inference_providers() providers = { - "inference": [ - BuildProvider(provider_type=p.provider_type, module=p.module) - for p in remote_inference_providers - ] + "inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers] + [BuildProvider(provider_type="inline::sentence-transformers")], "vector_io": [ BuildProvider(provider_type="inline::faiss"), @@ -159,9 +155,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: files_provider = Provider( provider_id="meta-reference-files", provider_type="inline::localfs", - config=LocalfsFilesImplConfig.sample_run_config( - f"~/.llama/distributions/{name}" - ), + config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"), ) embedding_provider = Provider( provider_id="sentence-transformers", @@ -208,23 +202,17 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: Provider( provider_id="faiss", provider_type="inline::faiss", - config=FaissVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}" - ), + config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( provider_id="sqlite-vec", provider_type="inline::sqlite-vec", - config=SQLiteVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}" - ), + config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( provider_id="${env.MILVUS_URL:+milvus}", provider_type="inline::milvus", - config=MilvusVectorIOConfig.sample_run_config( - f"~/.llama/distributions/{name}" - ), + config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ), Provider( provider_id="${env.CHROMADB_URL:+chromadb}", diff --git a/src/llama_stack/providers/remote/vector_io/mongodb/mongodb.py b/src/llama_stack/providers/remote/vector_io/mongodb/mongodb.py index 4feeb88d1..275414746 100644 --- a/src/llama_stack/providers/remote/vector_io/mongodb/mongodb.py +++ b/src/llama_stack/providers/remote/vector_io/mongodb/mongodb.py @@ -17,12 +17,8 @@ from pymongo.server_api import ServerApi from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.inference import InterleavedContent +from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.apis.vector_stores import VectorStore -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, -) from llama_stack.log import get_logger from llama_stack.providers.datatypes import ( HealthResponse, @@ -267,7 +263,7 @@ class MongoDBIndex(EmbeddingIndex): # Ensure text index exists await self._ensure_text_index() - pipeline = [ + pipeline: list[dict[str, Any]] = [ {"$match": {"$text": {"$search": query_string}}}, { "$project": {