Merge 503ad16002 into 356f37b1ba

2025-12-03 09:53:45 +00:00 · 2025-11-12 18:16:28 +00:00 · 2025-11-12 18:16:28 +00:00 · fdc9ba2687
commit fdc9ba2687
parent 356f37b1ba 503ad16002
17 changed files with 2066 additions and 3 deletions
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@ -31,7 +31,7 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::chromadb", "remote::pgvector", "remote::weaviate", "remote::qdrant"]
+        vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::chromadb", "remote::pgvector", "remote::weaviate", "remote::qdrant", "remote::mongodb"]
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
      fail-fast: false # we want to run all tests regardless of failure
@ -101,6 +101,16 @@ jobs:
            -p 6333:6333 \
            qdrant/qdrant
      - name: Setup MongoDB
        if: matrix.vector-io-provider == 'remote::mongodb'
        run: |
          docker run --rm -d --pull always \
            --name mongodb \
            -p 27017:27017 \
            -e MONGO_INITDB_ROOT_USERNAME=llamastack \
            -e MONGO_INITDB_ROOT_PASSWORD=llamastack \
            mongodb/mongodb-atlas-local:latest
      - name: Wait for Qdrant to be ready
        if: matrix.vector-io-provider == 'remote::qdrant'
        run: |
@ -116,6 +126,21 @@ jobs:
          docker logs qdrant
          exit 1
      - name: Wait for MongoDB to be ready
        if: matrix.vector-io-provider == 'remote::mongodb'
        run: |
          echo "Waiting for MongoDB to be ready..."
          for i in {1..30}; do
            if docker exec mongodb mongosh --quiet --eval "db.adminCommand('ping').ok" > /dev/null 2>&1; then
              echo "MongoDB is ready!"
              exit 0
            fi
            sleep 2
          done
          echo "MongoDB failed to start"
          docker logs mongodb
          exit 1
      - name: Wait for ChromaDB to be ready
        if: matrix.vector-io-provider == 'remote::chromadb'
        run: |
@ -170,6 +195,11 @@ jobs:
          QDRANT_URL: ${{ matrix.vector-io-provider == 'remote::qdrant' && 'http://localhost:6333' || '' }}
          ENABLE_WEAVIATE: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'true' || '' }}
          WEAVIATE_CLUSTER_URL: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'localhost:8080' || '' }}
          ENABLE_MONGODB: ${{ matrix.vector-io-provider == 'remote::mongodb' && 'true' || '' }}
          MONGODB_HOST: ${{ matrix.vector-io-provider == 'remote::mongodb' && 'localhost' || '' }}
          MONGODB_PORT: ${{ matrix.vector-io-provider == 'remote::mongodb' && '27017' || '' }}
          MONGODB_USERNAME: ${{ matrix.vector-io-provider == 'remote::mongodb' && 'llamastack' || '' }}
          MONGODB_PASSWORD: ${{ matrix.vector-io-provider == 'remote::mongodb' && 'llamastack' || '' }}
        run: |
          uv run --no-sync \
            pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
@ -196,6 +226,11 @@ jobs:
        run: |
          docker logs qdrant > qdrant.log
      - name: Write MongoDB logs to file
        if: ${{ always() && matrix.vector-io-provider == 'remote::mongodb' }}
        run: |
          docker logs mongodb > mongodb.log
      - name: Upload all logs to artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
--- a/docs/docs/providers/vector_io/remote_mongodb.mdx
+++ b/docs/docs/providers/vector_io/remote_mongodb.mdx
@ -0,0 +1,276 @@
 ---
 description: |
  [MongoDB Atlas](https://www.mongodb.com/products/platform/atlas-vector-search) is a remote vector database provider for Llama Stack. It
  uses MongoDB Atlas Vector Search to store and query vectors in the cloud.
  That means you get enterprise-grade vector search with MongoDB's scalability and reliability.
  ## Features
  - Cloud-native vector search with MongoDB Atlas
  - Fully integrated with Llama Stack
  - Enterprise-grade security and scalability
  - Supports multiple search modes: vector, keyword, and hybrid search
  - Built-in metadata filtering and text search capabilities
  - Automatic index management
  ## Search Modes
  MongoDB Atlas Vector Search supports three different search modes:
  ### Vector Search
  Vector search uses MongoDB's `$vectorSearch` aggregation stage to perform semantic similarity search using embedding vectors.
  ```python
  # Vector search example
  search_response = client.vector_stores.search(
      vector_store_id=vector_store.id,
      query="What is machine learning?",
      search_mode="vector",
      max_num_results=5,
  )
  ```
  ### Keyword Search
  Keyword search uses MongoDB's text search capabilities with full-text indexes to find chunks containing specific terms.
  ```python
  # Keyword search example
  search_response = client.vector_stores.search(
      vector_store_id=vector_store.id,
      query="Python programming language",
      search_mode="keyword",
      max_num_results=5,
  )
  ```
  ### Hybrid Search
  Hybrid search combines both vector and keyword search methods using configurable reranking algorithms.
  ```python
  # Hybrid search with RRF ranker (default)
  search_response = client.vector_stores.search(
      vector_store_id=vector_store.id,
      query="neural networks in Python",
      search_mode="hybrid",
      max_num_results=5,
  )
  # Hybrid search with weighted ranker
  search_response = client.vector_stores.search(
      vector_store_id=vector_store.id,
      query="neural networks in Python",
      search_mode="hybrid",
      max_num_results=5,
      ranking_options={
          "ranker": {
              "type": "weighted",
              "alpha": 0.7,  # 70% vector search, 30% keyword search
          }
      },
  )
  ```
  ## Usage
  To use MongoDB Atlas in your Llama Stack project, follow these steps:
  1. Create a MongoDB Atlas cluster with Vector Search enabled
  2. Install the necessary dependencies
  3. Configure your Llama Stack project to use MongoDB
  4. Start storing and querying vectors
  ## Configuration
  ### Environment Variables
  Set up the following environment variable for your MongoDB Atlas connection:
  ```bash
  export MONGODB_CONNECTION_STRING="mongodb+srv://username:password@cluster.mongodb.net/?retryWrites=true&w=majority&appName=llama-stack"
  ```
  ### Configuration Example
  ```yaml
  vector_io:
    - provider_id: mongodb_atlas
      provider_type: remote::mongodb
      config:
        connection_string: "${env.MONGODB_CONNECTION_STRING}"
        database_name: "llama_stack"
        index_name: "vector_index"
        similarity_metric: "cosine"
  ```
  ## Installation
  You can install the MongoDB Python driver using pip:
  ```bash
  pip install pymongo
  ```
  ## Documentation
  See [MongoDB Atlas Vector Search documentation](https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-overview/) for more details about MongoDB Atlas Vector Search.
  For general MongoDB documentation, visit [MongoDB Documentation](https://docs.mongodb.com/).
 sidebar_label: Remote - Mongodb
 title: remote::mongodb
 ---
 # remote::mongodb
 ## Description
 [MongoDB Atlas](https://www.mongodb.com/products/platform/atlas-vector-search) is a remote vector database provider for Llama Stack. It
 uses MongoDB Atlas Vector Search to store and query vectors in the cloud.
 That means you get enterprise-grade vector search with MongoDB's scalability and reliability.
 ## Features
 - Cloud-native vector search with MongoDB Atlas
 - Fully integrated with Llama Stack
 - Enterprise-grade security and scalability
 - Supports multiple search modes: vector, keyword, and hybrid search
 - Built-in metadata filtering and text search capabilities
 - Automatic index management
 ## Search Modes
 MongoDB Atlas Vector Search supports three different search modes:
 ### Vector Search
 Vector search uses MongoDB's `$vectorSearch` aggregation stage to perform semantic similarity search using embedding vectors.
 ```python
 # Vector search example
 search_response = client.vector_stores.search(
    vector_store_id=vector_store.id,
    query="What is machine learning?",
    search_mode="vector",
    max_num_results=5,
 )
 ```
 ### Keyword Search
 Keyword search uses MongoDB's text search capabilities with full-text indexes to find chunks containing specific terms.
 ```python
 # Keyword search example
 search_response = client.vector_stores.search(
    vector_store_id=vector_store.id,
    query="Python programming language",
    search_mode="keyword",
    max_num_results=5,
 )
 ```
 ### Hybrid Search
 Hybrid search combines both vector and keyword search methods using configurable reranking algorithms.
 ```python
 # Hybrid search with RRF ranker (default)
 search_response = client.vector_stores.search(
    vector_store_id=vector_store.id,
    query="neural networks in Python",
    search_mode="hybrid",
    max_num_results=5,
 )
 # Hybrid search with weighted ranker
 search_response = client.vector_stores.search(
    vector_store_id=vector_store.id,
    query="neural networks in Python",
    search_mode="hybrid",
    max_num_results=5,
    ranking_options={
        "ranker": {
            "type": "weighted",
            "alpha": 0.7,  # 70% vector search, 30% keyword search
        }
    },
 )
 ```
 ## Usage
 To use MongoDB Atlas in your Llama Stack project, follow these steps:
 1. Create a MongoDB Atlas cluster with Vector Search enabled
 2. Install the necessary dependencies
 3. Configure your Llama Stack project to use MongoDB
 4. Start storing and querying vectors
 ## Configuration
 ### Environment Variables
 Set up the following environment variable for your MongoDB Atlas connection:
 ```bash
 export MONGODB_CONNECTION_STRING="mongodb+srv://username:password@cluster.mongodb.net/?retryWrites=true&w=majority&appName=llama-stack"
 ```
 ### Configuration Example
 ```yaml
 vector_io:
  - provider_id: mongodb_atlas
    provider_type: remote::mongodb
    config:
      connection_string: "${env.MONGODB_CONNECTION_STRING}"
      database_name: "llama_stack"
      index_name: "vector_index"
      similarity_metric: "cosine"
 ```
 ## Installation
 You can install the MongoDB Python driver using pip:
 ```bash
 pip install pymongo
 ```
 ## Documentation
 See [MongoDB Atlas Vector Search documentation](https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-overview/) for more details about MongoDB Atlas Vector Search.
 For general MongoDB documentation, visit [MongoDB Documentation](https://docs.mongodb.com/).
 ## Configuration
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `connection_string` | `str \| None` | No |  | MongoDB connection string (e.g., mongodb://user:pass@localhost:27017/ or mongodb+srv://user:pass@cluster.mongodb.net/) |
 | `host` | `str \| None` | No |  | MongoDB host (used if connection_string is not provided) |
 | `port` | `int \| None` | No |  | MongoDB port (used if connection_string is not provided) |
 | `username` | `str \| None` | No |  | MongoDB username (used if connection_string is not provided) |
 | `password` | `str \| None` | No |  | MongoDB password (used if connection_string is not provided) |
 | `database_name` | `<class 'str'>` | No | llama_stack | Database name to use for vector collections |
 | `index_name` | `<class 'str'>` | No | vector_index | Name of the vector search index |
 | `path_field` | `<class 'str'>` | No | embedding | Field name for storing embeddings |
 | `similarity_metric` | `<class 'str'>` | No | cosine | Similarity metric: cosine, euclidean, or dotProduct |
 | `max_pool_size` | `<class 'int'>` | No | 100 | Maximum connection pool size |
 | `timeout_ms` | `<class 'int'>` | No | 30000 | Connection timeout in milliseconds |
 | `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend for metadata storage |
 ## Sample Configuration
 ```yaml
 connection_string: ${env.MONGODB_CONNECTION_STRING:=}
 host: ${env.MONGODB_HOST:=localhost}
 port: ${env.MONGODB_PORT:=27017}
 username: ${env.MONGODB_USERNAME:=}
 password: ${env.MONGODB_PASSWORD:=}
 database_name: ${env.MONGODB_DATABASE_NAME:=llama_stack}
 index_name: ${env.MONGODB_INDEX_NAME:=vector_index}
 path_field: ${env.MONGODB_PATH_FIELD:=embedding}
 similarity_metric: ${env.MONGODB_SIMILARITY_METRIC:=cosine}
 max_pool_size: ${env.MONGODB_MAX_POOL_SIZE:=100}
 timeout_ms: ${env.MONGODB_TIMEOUT_MS:=30000}
 persistence:
  namespace: vector_io::mongodb_atlas
  backend: kv_default
 ```
--- a/llama_stack/providers/remote/vector_io/mongodb/init.py
+++ b/llama_stack/providers/remote/vector_io/mongodb/init.py
@ -0,0 +1,20 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.providers.datatypes import Api, ProviderSpec
 from .config import MongoDBVectorIOConfig
 async def get_adapter_impl(config: MongoDBVectorIOConfig, deps: dict[Api, ProviderSpec]):
    from .mongodb import MongoDBVectorIOAdapter
    # Handle the deps resolution - if files API exists, pass it, otherwise None
    files_api = deps.get(Api.files)
    models_api = deps.get(Api.models)
    impl = MongoDBVectorIOAdapter(config, deps[Api.inference], files_api, models_api)
    await impl.initialize()
    return impl
--- a/llama_stack/providers/remote/vector_io/mongodb/config.py
+++ b/llama_stack/providers/remote/vector_io/mongodb/config.py
@ -0,0 +1,102 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any
 from pydantic import BaseModel, Field
 from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
@json_schema_type
 class MongoDBVectorIOConfig(BaseModel):
    """Configuration for MongoDB Atlas Vector Search provider.
    This provider connects to MongoDB Atlas and uses Vector Search for RAG operations.
    """
    # MongoDB connection details - either connection_string or individual parameters
    connection_string: str | None = Field(
        default=None,
        description="MongoDB connection string (e.g., mongodb://user:pass@localhost:27017/ or mongodb+srv://user:pass@cluster.mongodb.net/)",
    )
    host: str | None = Field(default=None, description="MongoDB host (used if connection_string is not provided)")
    port: int | None = Field(default=None, description="MongoDB port (used if connection_string is not provided)")
    username: str | None = Field(
        default=None, description="MongoDB username (used if connection_string is not provided)"
    )
    password: str | None = Field(
        default=None, description="MongoDB password (used if connection_string is not provided)"
    )
    database_name: str = Field(default="llama_stack", description="Database name to use for vector collections")
    # Vector search configuration
    index_name: str = Field(default="vector_index", description="Name of the vector search index")
    path_field: str = Field(default="embedding", description="Field name for storing embeddings")
    similarity_metric: str = Field(
        default="cosine",
        description="Similarity metric: cosine, euclidean, or dotProduct",
    )
    # Connection options
    max_pool_size: int = Field(default=100, description="Maximum connection pool size")
    timeout_ms: int = Field(default=30000, description="Connection timeout in milliseconds")
    # KV store configuration
    persistence: KVStoreReference | None = Field(
        description="Config for KV store backend for metadata storage", default=None
    )
    def get_connection_string(self) -> str | None:
        """Build connection string from individual parameters if not provided directly.
        If both connection_string and individual parameters (host/port) are provided,
        individual parameters take precedence to allow test environment overrides.
        """
        # Prioritize individual connection parameters over connection_string
        # This allows test environments to override with MONGODB_HOST/PORT/etc
        if self.host and self.port:
            auth_part = ""
            if self.username and self.password:
                auth_part = f"{self.username}:{self.password}@"
            return f"mongodb://{auth_part}{self.host}:{self.port}/"
        # Fall back to connection_string if provided
        if self.connection_string:
            return self.connection_string
        return None
    @classmethod
    def sample_run_config(
        cls,
        __distro_dir__: str,
        connection_string: str = "${env.MONGODB_CONNECTION_STRING:=}",
        host: str = "${env.MONGODB_HOST:=localhost}",
        port: int = "${env.MONGODB_PORT:=27017}",
        username: str = "${env.MONGODB_USERNAME:=}",
        password: str = "${env.MONGODB_PASSWORD:=}",
        database_name: str = "${env.MONGODB_DATABASE_NAME:=llama_stack}",
        **kwargs: Any,
    ) -> dict[str, Any]:
        return {
            "connection_string": connection_string,
            "host": host,
            "port": port,
            "username": username,
            "password": password,
            "database_name": database_name,
            "index_name": "${env.MONGODB_INDEX_NAME:=vector_index}",
            "path_field": "${env.MONGODB_PATH_FIELD:=embedding}",
            "similarity_metric": "${env.MONGODB_SIMILARITY_METRIC:=cosine}",
            "max_pool_size": "${env.MONGODB_MAX_POOL_SIZE:=100}",
            "timeout_ms": "${env.MONGODB_TIMEOUT_MS:=30000}",
            "persistence": KVStoreReference(
                backend="kv_default",
                namespace="vector_io::mongodb_atlas",
            ).model_dump(exclude_none=True),
        }
--- a/llama_stack/providers/remote/vector_io/mongodb/mongodb.py
+++ b/llama_stack/providers/remote/vector_io/mongodb/mongodb.py
@ -0,0 +1,609 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import heapq
 import time
 from typing import Any
 from numpy.typing import NDArray
 from pymongo import MongoClient
 from pymongo.collection import Collection
 from pymongo.database import Database
 from pymongo.operations import SearchIndexModel
 from pymongo.server_api import ServerApi
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    HealthResponse,
    HealthStatus,
    VectorStoresProtocolPrivate,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import (
    interleaved_content_as_str,
 )
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import (
    OpenAIVectorStoreMixin,
 )
 from llama_stack.providers.utils.memory.vector_store import (
    ChunkForDeletion,
    EmbeddingIndex,
    VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import (
    WeightedInMemoryAggregator,
    sanitize_collection_name,
 )
 from .config import MongoDBVectorIOConfig
 logger = get_logger(name=__name__, category="vector_io::mongodb")
 VERSION = "v1"
 VECTOR_DBS_PREFIX = f"vector_dbs:mongodb:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:mongodb:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:mongodb:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:mongodb:{VERSION}::"
 class MongoDBIndex(EmbeddingIndex):
    """MongoDB Atlas Vector Search index implementation optimized for RAG."""
    def __init__(
        self,
        vector_store: VectorStore,
        collection: Collection,
        config: MongoDBVectorIOConfig,
    ):
        self.vector_store = vector_store
        self.collection = collection
        self.config = config
        self.dimension = vector_store.embedding_dimension
    async def initialize(self) -> None:
        """Initialize the MongoDB collection and ensure vector search index exists."""
        try:
            # Create the collection if it doesn't exist
            collection_names = self.collection.database.list_collection_names()
            if self.collection.name not in collection_names:
                logger.info(f"Creating collection '{self.collection.name}'")
                # Create collection by inserting a dummy document
                dummy_doc = {"_id": "__dummy__", "dummy": True}
                self.collection.insert_one(dummy_doc)
                # Remove the dummy document
                self.collection.delete_one({"_id": "__dummy__"})
                logger.info(f"Collection '{self.collection.name}' created successfully")
            # Create optimized vector search index for RAG
            await self._create_vector_search_index()
            # Create text index for hybrid search
            await self._ensure_text_index()
        except Exception as e:
            logger.exception(
                f"Failed to initialize MongoDB index for vector_store: {self.vector_store.identifier}. "
                f"Collection name: {self.collection.name}. Error: {str(e)}"
            )
            # Don't fail completely - just log the error and continue
            logger.warning(
                "Continuing without complete index initialization. "
                "You may need to create indexes manually in MongoDB Atlas dashboard."
            )
    async def _create_vector_search_index(self) -> None:
        """Create optimized vector search index based on MongoDB RAG best practices."""
        try:
            # Check if vector search index exists
            indexes = list(self.collection.list_search_indexes())
            index_exists = any(idx.get("name") == self.config.index_name for idx in indexes)
            if not index_exists:
                # Create vector search index optimized for RAG
                # Based on MongoDB's RAG example using new vectorSearch format
                search_index_model = SearchIndexModel(
                    definition={
                        "fields": [
                            {
                                "type": "vector",
                                "numDimensions": self.dimension,
                                "path": self.config.path_field,
                                "similarity": self._convert_similarity_metric(self.config.similarity_metric),
                            }
                        ]
                    },
                    name=self.config.index_name,
                    type="vectorSearch",
                )
                logger.info(
                    f"Creating vector search index '{self.config.index_name}' for RAG on collection '{self.collection.name}'"
                )
                self.collection.create_search_index(model=search_index_model)
                # Wait for index to be ready (like in MongoDB RAG example)
                await self._wait_for_index_ready()
                logger.info("Vector search index created and ready for RAG queries")
        except Exception as e:
            logger.warning(f"Failed to create vector search index: {e}")
    def _convert_similarity_metric(self, metric: str) -> str:
        """Convert internal similarity metric to MongoDB Atlas format."""
        metric_map = {
            "cosine": "cosine",
            "euclidean": "euclidean",
            "dotProduct": "dotProduct",
            "dot_product": "dotProduct",
        }
        return metric_map.get(metric, "cosine")
    async def _wait_for_index_ready(self) -> None:
        """Wait for the vector search index to be ready, based on MongoDB RAG example."""
        logger.info("Waiting for vector search index to be ready...")
        max_wait_time = 300  # 5 minutes max wait
        wait_interval = 5
        elapsed_time = 0
        while elapsed_time < max_wait_time:
            try:
                indices = list(self.collection.list_search_indexes(self.config.index_name))
                if len(indices) and indices[0].get("queryable") is True:
                    logger.info(f"Vector search index '{self.config.index_name}' is ready for querying")
                    return
            except Exception:
                pass
            time.sleep(wait_interval)
            elapsed_time += wait_interval
        logger.warning(f"Vector search index may not be fully ready after {max_wait_time}s")
    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray) -> None:
        """Add chunks with embeddings to MongoDB collection optimized for RAG."""
        if len(chunks) != len(embeddings):
            raise ValueError(f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}")
        documents = []
        for i, chunk in enumerate(chunks):
            # Structure document for optimal RAG retrieval
            doc = {
                "_id": chunk.chunk_id,
                "chunk_id": chunk.chunk_id,
                "text": interleaved_content_as_str(chunk.content),  # Key field for RAG context
                "content": interleaved_content_as_str(chunk.content),  # Backward compatibility
                "metadata": chunk.metadata or {},
                "chunk_metadata": (chunk.chunk_metadata.model_dump() if chunk.chunk_metadata else {}),
                self.config.path_field: embeddings[i].tolist(),  # Vector embeddings
                "document": chunk.model_dump(),  # Full chunk data
            }
            documents.append(doc)
        try:
            # Use upsert behavior for chunks
            for doc in documents:
                self.collection.replace_one({"_id": doc["_id"]}, doc, upsert=True)
            logger.debug(f"Successfully added {len(chunks)} chunks optimized for RAG to MongoDB collection")
        except Exception as e:
            logger.exception(f"Failed to add chunks to MongoDB collection: {e}")
            raise
    async def query_vector(
        self,
        embedding: NDArray,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
        """Perform vector similarity search optimized for RAG based on MongoDB example."""
        try:
            # Use MongoDB's vector search aggregation pipeline optimized for RAG
            pipeline = [
                {
                    "$vectorSearch": {
                        "index": self.config.index_name,
                        "queryVector": embedding.tolist(),
                        "path": self.config.path_field,
                        "numCandidates": min(k * 10, 1000),  # Cap at 1000 to prevent excessive candidates
                        "limit": k,
                    }
                },
                {
                    "$project": {
                        "_id": 0,
                        "text": 1,  # Primary field for RAG context
                        "content": 1,  # Backward compatibility
                        "metadata": 1,
                        "chunk_metadata": 1,
                        "document": 1,
                        "score": {"$meta": "vectorSearchScore"},
                    }
                },
                {"$match": {"score": {"$gte": score_threshold}}},
            ]
            results = list(self.collection.aggregate(pipeline))
            chunks = []
            scores = []
            for result in results:
                score = result.get("score", 0.0)
                if score >= score_threshold:
                    chunk_data = result.get("document", {})
                    if chunk_data:
                        chunks.append(Chunk(**chunk_data))
                        scores.append(float(score))
            logger.debug(f"Vector search for RAG returned {len(chunks)} results")
            return QueryChunksResponse(chunks=chunks, scores=scores)
        except Exception as e:
            logger.exception(f"Vector search for RAG failed: {e}")
            raise RuntimeError(f"Vector search for RAG failed: {e}") from e
    async def query_keyword(
        self,
        query_string: str,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
        """Perform text search using MongoDB's text search for RAG context retrieval."""
        try:
            # Ensure text index exists
            await self._ensure_text_index()
            pipeline: list[dict[str, Any]] = [
                {"$match": {"$text": {"$search": query_string}}},
                {
                    "$project": {
                        "_id": 0,
                        "text": 1,  # Primary field for RAG context
                        "content": 1,  # Backward compatibility
                        "metadata": 1,
                        "chunk_metadata": 1,
                        "document": 1,
                        "score": {"$meta": "textScore"},
                    }
                },
                {"$match": {"score": {"$gte": score_threshold}}},
                {"$sort": {"score": {"$meta": "textScore"}}},
                {"$limit": k},
            ]
            results = list(self.collection.aggregate(pipeline))
            chunks = []
            scores = []
            for result in results:
                score = result.get("score", 0.0)
                if score >= score_threshold:
                    chunk_data = result.get("document", {})
                    if chunk_data:
                        chunks.append(Chunk(**chunk_data))
                        scores.append(float(score))
            logger.debug(f"Keyword search for RAG returned {len(chunks)} results")
            return QueryChunksResponse(chunks=chunks, scores=scores)
        except Exception as e:
            logger.exception(f"Keyword search for RAG failed: {e}")
            raise RuntimeError(f"Keyword search for RAG failed: {e}") from e
    async def query_hybrid(
        self,
        embedding: NDArray,
        query_string: str,
        k: int,
        score_threshold: float,
        reranker_type: str,
        reranker_params: dict[str, Any] | None = None,
    ) -> QueryChunksResponse:
        """Perform hybrid search for enhanced RAG context retrieval."""
        if reranker_params is None:
            reranker_params = {}
        # Get results from both search methods
        vector_response = await self.query_vector(embedding, k, 0.0)
        keyword_response = await self.query_keyword(query_string, k, 0.0)
        # Convert responses to score dictionaries
        vector_scores = {
            chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
        }
        keyword_scores = {
            chunk.chunk_id: score
            for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
        }
        # Combine scores using the reranking utility
        combined_scores = WeightedInMemoryAggregator.combine_search_results(
            vector_scores, keyword_scores, reranker_type, reranker_params
        )
        # Get top-k results
        top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
        # Filter by score threshold
        filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
        # Create chunk map
        chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
        # Build final results
        chunks = []
        scores = []
        for doc_id, score in filtered_items:
            if doc_id in chunk_map:
                chunks.append(chunk_map[doc_id])
                scores.append(score)
        logger.debug(f"Hybrid search for RAG returned {len(chunks)} results")
        return QueryChunksResponse(chunks=chunks, scores=scores)
    async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
        """Delete chunks from MongoDB collection."""
        chunk_ids = [c.chunk_id for c in chunks_for_deletion]
        try:
            result = self.collection.delete_many({"_id": {"$in": chunk_ids}})
            logger.debug(f"Deleted {result.deleted_count} chunks from MongoDB collection")
        except Exception as e:
            logger.exception(f"Failed to delete chunks: {e}")
            raise
    async def delete(self) -> None:
        """Delete the entire collection."""
        try:
            self.collection.drop()
            logger.debug(f"Dropped MongoDB collection: {self.collection.name}")
        except Exception as e:
            logger.exception(f"Failed to drop collection: {e}")
            raise
    async def _ensure_text_index(self) -> None:
        """Ensure text search index exists on content fields for RAG."""
        try:
            indexes = list(self.collection.list_indexes())
            text_index_exists = any(
                any(key.startswith(("content", "text")) for key in idx.get("key", {}).keys())
                and idx.get("textIndexVersion") is not None
                for idx in indexes
            )
            if not text_index_exists:
                logger.info("Creating text search index on content fields for RAG")
                # Index both 'text' and 'content' fields for comprehensive text search
                self.collection.create_index([("text", "text"), ("content", "text")])
                logger.info("Text search index created successfully for RAG")
        except Exception as e:
            logger.warning(f"Failed to create text index for RAG: {e}")
 class MongoDBVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
    """MongoDB Atlas Vector Search adapter for Llama Stack optimized for RAG workflows."""
    def __init__(
        self,
        config: MongoDBVectorIOConfig,
        inference_api,
        files_api=None,
        models_api=None,
    ) -> None:
        # Handle the case where files_api might be a ProviderSpec that needs resolution
        resolved_files_api = files_api
        super().__init__(files_api=resolved_files_api, kvstore=None)
        self.config = config
        self.inference_api = inference_api
        self.models_api = models_api
        self.client: MongoClient | None = None
        self.database: Database | None = None
        self.cache: dict[str, VectorStoreWithIndex] = {}
        self.kvstore: KVStore | None = None
    async def initialize(self) -> None:
        """Initialize MongoDB connection optimized for RAG workflows."""
        logger.info("Initializing MongoDB Atlas Vector IO adapter for RAG")
        try:
            # Initialize KV store for metadata
            if self.config.persistence:
                self.kvstore = await kvstore_impl(self.config.persistence)
            # Skip MongoDB connection if no connection string provided
            # This allows other providers to work without MongoDB credentials
            if not self.config.connection_string:
                logger.warning(
                    "MongoDB connection_string not provided. "
                    "MongoDB vector store will not be available until credentials are configured."
                )
                return
            # Connect to MongoDB with optimized settings for RAG
            self.client = MongoClient(
                self.config.connection_string,
                server_api=ServerApi("1"),
                maxPoolSize=self.config.max_pool_size,
                serverSelectionTimeoutMS=self.config.timeout_ms,
                # Additional settings for RAG performance
                retryWrites=True,
                readPreference="primaryPreferred",
            )
            # Test connection
            self.client.admin.command("ping")
            logger.info("Successfully connected to MongoDB Atlas for RAG")
            # Get database
            self.database = self.client[self.config.database_name]
            # Initialize OpenAI vector stores
            await self.initialize_openai_vector_stores()
            # Load existing vector databases
            await self._load_existing_vector_dbs()
            logger.info("MongoDB Atlas Vector IO adapter for RAG initialized successfully")
        except Exception as e:
            logger.exception("Failed to initialize MongoDB Atlas Vector IO adapter for RAG")
            raise RuntimeError("Failed to initialize MongoDB Atlas Vector IO adapter for RAG") from e
    async def shutdown(self) -> None:
        """Shutdown MongoDB connection."""
        if self.client:
            self.client.close()
            logger.info("MongoDB Atlas RAG connection closed")
    async def health(self) -> HealthResponse:
        """Perform health check on MongoDB connection."""
        try:
            if self.client:
                self.client.admin.command("ping")
                return HealthResponse(status=HealthStatus.OK)
            else:
                return HealthResponse(status=HealthStatus.ERROR, message="MongoDB client not initialized")
        except Exception as e:
            return HealthResponse(
                status=HealthStatus.ERROR,
                message=f"MongoDB RAG health check failed: {str(e)}",
            )
    async def register_vector_store(self, vector_store: VectorStore) -> None:
        """Register a new vector store optimized for RAG."""
        if self.database is None:
            raise RuntimeError("MongoDB database not initialized")
        # Create collection name from vector store identifier
        collection_name = sanitize_collection_name(vector_store.identifier)
        collection = self.database[collection_name]
        # Create and initialize MongoDB index optimized for RAG
        mongodb_index = MongoDBIndex(vector_store, collection, self.config)
        await mongodb_index.initialize()
        # Create vector store with index wrapper
        vector_store_with_index = VectorStoreWithIndex(
            vector_store=vector_store,
            index=mongodb_index,
            inference_api=self.inference_api,
        )
        # Cache the vector store
        self.cache[vector_store.identifier] = vector_store_with_index
        # Save vector store info to KVStore for persistence
        if self.kvstore:
            await self.kvstore.set(
                f"{VECTOR_DBS_PREFIX}{vector_store.identifier}",
                vector_store.model_dump_json(),
            )
        logger.info(f"Registered vector store for RAG: {vector_store.identifier}")
    async def unregister_vector_store(self, vector_store_id: str) -> None:
        """Unregister a vector store."""
        if vector_store_id in self.cache:
            await self.cache[vector_store_id].index.delete()
            del self.cache[vector_store_id]
        # Clean up from KV store
        if self.kvstore:
            await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
        logger.info(f"Unregistered vector store: {vector_store_id}")
    async def insert_chunks(
        self,
        vector_db_id: str,
        chunks: list[Chunk],
        ttl_seconds: int | None = None,
    ) -> None:
        """Insert chunks into the vector database optimized for RAG."""
        vector_db_with_index = await self._get_vector_db_index(vector_db_id)
        await vector_db_with_index.insert_chunks(chunks)
    async def query_chunks(
        self,
        vector_db_id: str,
        query: InterleavedContent,
        params: dict[str, Any] | None = None,
    ) -> QueryChunksResponse:
        """Query chunks from the vector database optimized for RAG context retrieval."""
        vector_db_with_index = await self._get_vector_db_index(vector_db_id)
        return await vector_db_with_index.query_chunks(query, params)
    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
        """Delete chunks from the vector database."""
        vector_db_with_index = await self._get_vector_db_index(store_id)
        await vector_db_with_index.index.delete_chunks(chunks_for_deletion)
    async def _get_vector_db_index(self, vector_db_id: str) -> VectorStoreWithIndex:
        """Get vector store index from cache."""
        if vector_db_id in self.cache:
            return self.cache[vector_db_id]
        raise VectorStoreNotFoundError(vector_db_id)
    async def _load_existing_vector_dbs(self) -> None:
        """Load existing vector databases from KVStore."""
        if not self.kvstore:
            return
        try:
            # Use keys_in_range to get all vector database keys from KVStore
            # This searches for keys with the prefix by using range scan
            start_key = VECTOR_DBS_PREFIX
            # Create an end key by incrementing the last character
            end_key = VECTOR_DBS_PREFIX[:-1] + chr(ord(VECTOR_DBS_PREFIX[-1]) + 1)
            vector_db_keys = await self.kvstore.keys_in_range(start_key, end_key)
            for key in vector_db_keys:
                try:
                    vector_store_data = await self.kvstore.get(key)
                    if vector_store_data:
                        import json
                        vector_store = VectorStore(**json.loads(vector_store_data))
                        # Register the vector store without re-initializing
                        await self._register_existing_vector_store(vector_store)
                        logger.info(f"Loaded existing RAG-optimized vector store: {vector_store.identifier}")
                except Exception as e:
                    logger.warning(f"Failed to load vector store from key {key}: {e}")
                    continue
        except Exception as e:
            logger.warning(f"Failed to load existing vector stores: {e}")
    async def _register_existing_vector_store(self, vector_store: VectorStore) -> None:
        """Register an existing vector store without re-initialization."""
        if self.database is None:
            raise RuntimeError("MongoDB database not initialized")
        # Create collection name from vector store identifier
        collection_name = sanitize_collection_name(vector_store.identifier)
        collection = self.database[collection_name]
        # Create MongoDB index without initialization (collection already exists)
        mongodb_index = MongoDBIndex(vector_store, collection, self.config)
        # Create vector store with index wrapper
        vector_store_with_index = VectorStoreWithIndex(
            vector_store=vector_store,
            index=mongodb_index,
            inference_api=self.inference_api,
        )
        # Cache the vector store
        self.cache[vector_store.identifier] = vector_store_with_index
--- a/src/llama_stack/distributions/ci-tests/build.yaml
+++ b/src/llama_stack/distributions/ci-tests/build.yaml
@ -25,6 +25,7 @@ distribution_spec:
    - provider_type: inline::milvus
    - provider_type: remote::chromadb
    - provider_type: remote::pgvector
    - provider_type: remote::mongodb
    - provider_type: remote::qdrant
    - provider_type: remote::weaviate
    files:
--- a/src/llama_stack/distributions/ci-tests/run.yaml
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@ -131,6 +131,23 @@ providers:
      persistence:
        namespace: vector_io::pgvector
        backend: kv_default
  - provider_id: ${env.MONGODB_CONNECTION_STRING:+mongodb_atlas}
    provider_type: remote::mongodb
    config:
      connection_string: ${env.MONGODB_CONNECTION_STRING:=}
      host: ${env.MONGODB_HOST:=localhost}
      port: ${env.MONGODB_PORT:=27017}
      username: ${env.MONGODB_USERNAME:=}
      password: ${env.MONGODB_PASSWORD:=}
      database_name: ${env.MONGODB_DATABASE_NAME:=llama_stack}
      index_name: ${env.MONGODB_INDEX_NAME:=vector_index}
      path_field: ${env.MONGODB_PATH_FIELD:=embedding}
      similarity_metric: ${env.MONGODB_SIMILARITY_METRIC:=cosine}
      max_pool_size: ${env.MONGODB_MAX_POOL_SIZE:=100}
      timeout_ms: ${env.MONGODB_TIMEOUT_MS:=30000}
      persistence:
        namespace: vector_io::mongodb_atlas
        backend: kv_default
  - provider_id: ${env.QDRANT_URL:+qdrant}
    provider_type: remote::qdrant
    config:
--- a/src/llama_stack/distributions/starter-gpu/build.yaml
+++ b/src/llama_stack/distributions/starter-gpu/build.yaml
@ -26,6 +26,7 @@ distribution_spec:
    - provider_type: inline::milvus
    - provider_type: remote::chromadb
    - provider_type: remote::pgvector
    - provider_type: remote::mongodb
    - provider_type: remote::qdrant
    - provider_type: remote::weaviate
    files:
--- a/src/llama_stack/distributions/starter-gpu/run.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@ -131,6 +131,23 @@ providers:
      persistence:
        namespace: vector_io::pgvector
        backend: kv_default
  - provider_id: ${env.MONGODB_CONNECTION_STRING:+mongodb_atlas}
    provider_type: remote::mongodb
    config:
      connection_string: ${env.MONGODB_CONNECTION_STRING:=}
      host: ${env.MONGODB_HOST:=localhost}
      port: ${env.MONGODB_PORT:=27017}
      username: ${env.MONGODB_USERNAME:=}
      password: ${env.MONGODB_PASSWORD:=}
      database_name: ${env.MONGODB_DATABASE_NAME:=llama_stack}
      index_name: ${env.MONGODB_INDEX_NAME:=vector_index}
      path_field: ${env.MONGODB_PATH_FIELD:=embedding}
      similarity_metric: ${env.MONGODB_SIMILARITY_METRIC:=cosine}
      max_pool_size: ${env.MONGODB_MAX_POOL_SIZE:=100}
      timeout_ms: ${env.MONGODB_TIMEOUT_MS:=30000}
      persistence:
        namespace: vector_io::mongodb_atlas
        backend: kv_default
  - provider_id: ${env.QDRANT_URL:+qdrant}
    provider_type: remote::qdrant
    config:
--- a/src/llama_stack/distributions/starter/build.yaml
+++ b/src/llama_stack/distributions/starter/build.yaml
@ -26,6 +26,7 @@ distribution_spec:
    - provider_type: inline::milvus
    - provider_type: remote::chromadb
    - provider_type: remote::pgvector
    - provider_type: remote::mongodb
    - provider_type: remote::qdrant
    - provider_type: remote::weaviate
    files:
--- a/src/llama_stack/distributions/starter/run.yaml
+++ b/src/llama_stack/distributions/starter/run.yaml
@ -131,6 +131,23 @@ providers:
      persistence:
        namespace: vector_io::pgvector
        backend: kv_default
  - provider_id: ${env.MONGODB_CONNECTION_STRING:+mongodb_atlas}
    provider_type: remote::mongodb
    config:
      connection_string: ${env.MONGODB_CONNECTION_STRING:=}
      host: ${env.MONGODB_HOST:=localhost}
      port: ${env.MONGODB_PORT:=27017}
      username: ${env.MONGODB_USERNAME:=}
      password: ${env.MONGODB_PASSWORD:=}
      database_name: ${env.MONGODB_DATABASE_NAME:=llama_stack}
      index_name: ${env.MONGODB_INDEX_NAME:=vector_index}
      path_field: ${env.MONGODB_PATH_FIELD:=embedding}
      similarity_metric: ${env.MONGODB_SIMILARITY_METRIC:=cosine}
      max_pool_size: ${env.MONGODB_MAX_POOL_SIZE:=100}
      timeout_ms: ${env.MONGODB_TIMEOUT_MS:=30000}
      persistence:
        namespace: vector_io::mongodb_atlas
        backend: kv_default
  - provider_id: ${env.QDRANT_URL:+qdrant}
    provider_type: remote::qdrant
    config:
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@ -36,11 +36,14 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
 )
 from llama_stack.providers.registry.inference import available_providers
 from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
 from llama_stack.providers.remote.vector_io.mongodb.config import MongoDBVectorIOConfig
 from llama_stack.providers.remote.vector_io.pgvector.config import (
    PGVectorVectorIOConfig,
 )
 from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
-from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
+from llama_stack.providers.remote.vector_io.weaviate.config import (
    WeaviateVectorIOConfig,
 )
 from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
@ -124,6 +127,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
            BuildProvider(provider_type="inline::milvus"),
            BuildProvider(provider_type="remote::chromadb"),
            BuildProvider(provider_type="remote::pgvector"),
            BuildProvider(provider_type="remote::mongodb"),
            BuildProvider(provider_type="remote::qdrant"),
            BuildProvider(provider_type="remote::weaviate"),
        ],
@ -254,7 +258,70 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
        additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())),
        run_configs={
            "run.yaml": RunConfigSettings(
-                provider_overrides=default_overrides,
+                provider_overrides={
                    "inference": remote_inference_providers + [embedding_provider],
                    "vector_io": [
                        Provider(
                            provider_id="faiss",
                            provider_type="inline::faiss",
                            config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
                        ),
                        Provider(
                            provider_id="sqlite-vec",
                            provider_type="inline::sqlite-vec",
                            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
                        ),
                        Provider(
                            provider_id="${env.MILVUS_URL:+milvus}",
                            provider_type="inline::milvus",
                            config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
                        ),
                        Provider(
                            provider_id="${env.CHROMADB_URL:+chromadb}",
                            provider_type="remote::chromadb",
                            config=ChromaVectorIOConfig.sample_run_config(
                                f"~/.llama/distributions/{name}/",
                                url="${env.CHROMADB_URL:=}",
                            ),
                        ),
                        Provider(
                            provider_id="${env.PGVECTOR_DB:+pgvector}",
                            provider_type="remote::pgvector",
                            config=PGVectorVectorIOConfig.sample_run_config(
                                f"~/.llama/distributions/{name}",
                                db="${env.PGVECTOR_DB:=}",
                                user="${env.PGVECTOR_USER:=}",
                                password="${env.PGVECTOR_PASSWORD:=}",
                            ),
                        ),
                        Provider(
                            provider_id="${env.MONGODB_CONNECTION_STRING:+mongodb_atlas}",
                            provider_type="remote::mongodb",
                            config=MongoDBVectorIOConfig.sample_run_config(
                                f"~/.llama/distributions/{name}",
                                connection_string="${env.MONGODB_CONNECTION_STRING:=}",
                                database_name="${env.MONGODB_DATABASE_NAME:=llama_stack}",
                            ),
                        ),
                        Provider(
                            provider_id="${env.QDRANT_URL:+qdrant}",
                            provider_type="remote::qdrant",
                            config=QdrantVectorIOConfig.sample_run_config(
                                f"~/.llama/distributions/{name}",
                                url="${env.QDRANT_URL:=}",
                            ),
                        ),
                        Provider(
                            provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
                            provider_type="remote::weaviate",
                            config=WeaviateVectorIOConfig.sample_run_config(
                                f"~/.llama/distributions/{name}",
                                cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
                            ),
                        ),
                    ],
                    "files": [files_provider],
                },
                default_models=[],
                default_tool_groups=default_tool_groups,
                default_shields=default_shields,
@ -384,5 +451,13 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
                "azure",
                "Azure API Type",
            ),
            "MONGODB_CONNECTION_STRING": (
                "",
                "MongoDB Atlas connection string (e.g., mongodb+srv://user:pass@cluster.mongodb.net/)",
            ),
            "MONGODB_DATABASE_NAME": (
                "llama_stack",
                "MongoDB database name",
            ),
        },
    )
--- a/src/llama_stack/providers/registry/vector_io.py
+++ b/src/llama_stack/providers/registry/vector_io.py
@ -823,6 +823,132 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
            optional_api_dependencies=[Api.files, Api.models],
            description="""
 Please refer to the remote provider documentation.
 """,
        ),
        RemoteProviderSpec(
            api=Api.vector_io,
            adapter_type="mongodb",
            provider_type="remote::mongodb",
            pip_packages=["pymongo>=4.0.0"],
            module="llama_stack.providers.remote.vector_io.mongodb",
            config_class="llama_stack.providers.remote.vector_io.mongodb.MongoDBVectorIOConfig",
            api_dependencies=[Api.inference],
            optional_api_dependencies=[Api.files],
            description="""
 [MongoDB Atlas](https://www.mongodb.com/products/platform/atlas-vector-search) is a remote vector database provider for Llama Stack. It
 uses MongoDB Atlas Vector Search to store and query vectors in the cloud.
 That means you get enterprise-grade vector search with MongoDB's scalability and reliability.
 ## Features
 - Cloud-native vector search with MongoDB Atlas
 - Fully integrated with Llama Stack
 - Enterprise-grade security and scalability
 - Supports multiple search modes: vector, keyword, and hybrid search
 - Built-in metadata filtering and text search capabilities
 - Automatic index management
 ## Search Modes
 MongoDB Atlas Vector Search supports three different search modes:
 ### Vector Search
 Vector search uses MongoDB's `$vectorSearch` aggregation stage to perform semantic similarity search using embedding vectors.
 ```python
 # Vector search example
 search_response = client.vector_stores.search(
    vector_store_id=vector_store.id,
    query="What is machine learning?",
    search_mode="vector",
    max_num_results=5,
 )
 ```
 ### Keyword Search
 Keyword search uses MongoDB's text search capabilities with full-text indexes to find chunks containing specific terms.
 ```python
 # Keyword search example
 search_response = client.vector_stores.search(
    vector_store_id=vector_store.id,
    query="Python programming language",
    search_mode="keyword",
    max_num_results=5,
 )
 ```
 ### Hybrid Search
 Hybrid search combines both vector and keyword search methods using configurable reranking algorithms.
 ```python
 # Hybrid search with RRF ranker (default)
 search_response = client.vector_stores.search(
    vector_store_id=vector_store.id,
    query="neural networks in Python",
    search_mode="hybrid",
    max_num_results=5,
 )
 # Hybrid search with weighted ranker
 search_response = client.vector_stores.search(
    vector_store_id=vector_store.id,
    query="neural networks in Python",
    search_mode="hybrid",
    max_num_results=5,
    ranking_options={
        "ranker": {
            "type": "weighted",
            "alpha": 0.7,  # 70% vector search, 30% keyword search
        }
    },
 )
 ```
 ## Usage
 To use MongoDB Atlas in your Llama Stack project, follow these steps:
 1. Create a MongoDB Atlas cluster with Vector Search enabled
 2. Install the necessary dependencies
 3. Configure your Llama Stack project to use MongoDB
 4. Start storing and querying vectors
 ## Configuration
 ### Environment Variables
 Set up the following environment variable for your MongoDB Atlas connection:
 ```bash
 export MONGODB_CONNECTION_STRING="mongodb+srv://username:password@cluster.mongodb.net/?retryWrites=true&w=majority&appName=llama-stack"
 ```
 ### Configuration Example
 ```yaml
 vector_io:
  - provider_id: mongodb_atlas
    provider_type: remote::mongodb
    config:
      connection_string: "${env.MONGODB_CONNECTION_STRING}"
      database_name: "llama_stack"
      index_name: "vector_index"
      similarity_metric: "cosine"
 ```
 ## Installation
 You can install the MongoDB Python driver using pip:
 ```bash
 pip install pymongo
 ```
 ## Documentation
 See [MongoDB Atlas Vector Search documentation](https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-overview/) for more details about MongoDB Atlas Vector Search.
 For general MongoDB documentation, visit [MongoDB Documentation](https://docs.mongodb.com/).
 """,
        ),
    ]
--- a/src/llama_stack/providers/remote/vector_io/mongodb/init.py
+++ b/src/llama_stack/providers/remote/vector_io/mongodb/init.py
@ -0,0 +1,20 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.providers.datatypes import Api, ProviderSpec
 from .config import MongoDBVectorIOConfig
 async def get_adapter_impl(config: MongoDBVectorIOConfig, deps: dict[Api, ProviderSpec]):
    from .mongodb import MongoDBVectorIOAdapter
    # Handle the deps resolution - if files API exists, pass it, otherwise None
    files_api = deps.get(Api.files)
    models_api = deps.get(Api.models)
    impl = MongoDBVectorIOAdapter(config, deps[Api.inference], files_api, models_api)
    await impl.initialize()
    return impl
--- a/src/llama_stack/providers/remote/vector_io/mongodb/config.py
+++ b/src/llama_stack/providers/remote/vector_io/mongodb/config.py
@ -0,0 +1,110 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any
 from pydantic import BaseModel, Field
 from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
@json_schema_type
 class MongoDBVectorIOConfig(BaseModel):
    """Configuration for MongoDB Atlas Vector Search provider.
    This provider connects to MongoDB Atlas and uses Vector Search for RAG operations.
    """
    # MongoDB connection details - either connection_string or individual parameters
    connection_string: str | None = Field(
        default=None,
        description="MongoDB connection string (e.g., mongodb://user:pass@localhost:27017/ or mongodb+srv://user:pass@cluster.mongodb.net/)",
    )
    host: str | None = Field(
        default=None,
        description="MongoDB host (used if connection_string is not provided)",
    )
    port: int | None = Field(
        default=None,
        description="MongoDB port (used if connection_string is not provided)",
    )
    username: str | None = Field(
        default=None,
        description="MongoDB username (used if connection_string is not provided)",
    )
    password: str | None = Field(
        default=None,
        description="MongoDB password (used if connection_string is not provided)",
    )
    database_name: str = Field(default="llama_stack", description="Database name to use for vector collections")
    # Vector search configuration
    index_name: str = Field(default="vector_index", description="Name of the vector search index")
    path_field: str = Field(default="embedding", description="Field name for storing embeddings")
    similarity_metric: str = Field(
        default="cosine",
        description="Similarity metric: cosine, euclidean, or dotProduct",
    )
    # Connection options
    max_pool_size: int = Field(default=100, description="Maximum connection pool size")
    timeout_ms: int = Field(default=30000, description="Connection timeout in milliseconds")
    # KV store configuration
    persistence: KVStoreReference | None = Field(
        description="Config for KV store backend for metadata storage", default=None
    )
    def get_connection_string(self) -> str | None:
        """Build connection string from individual parameters if not provided directly.
        If both connection_string and individual parameters (host/port) are provided,
        individual parameters take precedence to allow test environment overrides.
        """
        # Prioritize individual connection parameters over connection_string
        # This allows test environments to override with MONGODB_HOST/PORT/etc
        if self.host and self.port:
            auth_part = ""
            if self.username and self.password:
                auth_part = f"{self.username}:{self.password}@"
            return f"mongodb://{auth_part}{self.host}:{self.port}/"
        # Fall back to connection_string if provided
        if self.connection_string:
            return self.connection_string
        return None
    @classmethod
    def sample_run_config(
        cls,
        __distro_dir__: str,
        connection_string: str = "${env.MONGODB_CONNECTION_STRING:=}",
        host: str = "${env.MONGODB_HOST:=localhost}",
        port: str = "${env.MONGODB_PORT:=27017}",
        username: str = "${env.MONGODB_USERNAME:=}",
        password: str = "${env.MONGODB_PASSWORD:=}",
        database_name: str = "${env.MONGODB_DATABASE_NAME:=llama_stack}",
        **kwargs: Any,
    ) -> dict[str, Any]:
        return {
            "connection_string": connection_string,
            "host": host,
            "port": port,
            "username": username,
            "password": password,
            "database_name": database_name,
            "index_name": "${env.MONGODB_INDEX_NAME:=vector_index}",
            "path_field": "${env.MONGODB_PATH_FIELD:=embedding}",
            "similarity_metric": "${env.MONGODB_SIMILARITY_METRIC:=cosine}",
            "max_pool_size": "${env.MONGODB_MAX_POOL_SIZE:=100}",
            "timeout_ms": "${env.MONGODB_TIMEOUT_MS:=30000}",
            "persistence": KVStoreReference(
                backend="kv_default",
                namespace="vector_io::mongodb_atlas",
            ).model_dump(exclude_none=True),
        }
--- a/src/llama_stack/providers/remote/vector_io/mongodb/mongodb.py
+++ b/src/llama_stack/providers/remote/vector_io/mongodb/mongodb.py
@ -0,0 +1,631 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import heapq
 import time
 from typing import Any
 from numpy.typing import NDArray
 from pymongo import MongoClient
 from pymongo.collection import Collection
 from pymongo.database import Database
 from pymongo.operations import SearchIndexModel
 from pymongo.server_api import ServerApi
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    HealthResponse,
    HealthStatus,
    VectorStoresProtocolPrivate,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import (
    interleaved_content_as_str,
 )
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import (
    OpenAIVectorStoreMixin,
 )
 from llama_stack.providers.utils.memory.vector_store import (
    ChunkForDeletion,
    EmbeddingIndex,
    VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import (
    WeightedInMemoryAggregator,
    sanitize_collection_name,
 )
 from .config import MongoDBVectorIOConfig
 logger = get_logger(name=__name__, category="vector_io::mongodb")
 VERSION = "v1"
 VECTOR_DBS_PREFIX = f"vector_dbs:mongodb:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:mongodb:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:mongodb:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:mongodb:{VERSION}::"
 class MongoDBIndex(EmbeddingIndex):
    """MongoDB Atlas Vector Search index implementation optimized for RAG."""
    def __init__(
        self,
        vector_store: VectorStore,
        collection: Collection,
        config: MongoDBVectorIOConfig,
    ):
        self.vector_store = vector_store
        self.collection = collection
        self.config = config
        self.dimension = vector_store.embedding_dimension
    async def initialize(self) -> None:
        """Initialize the MongoDB collection and ensure vector search index exists."""
        try:
            # Create the collection if it doesn't exist
            collection_names = self.collection.database.list_collection_names()
            if self.collection.name not in collection_names:
                logger.info(f"Creating collection '{self.collection.name}'")
                # Create collection by inserting a dummy document
                dummy_doc = {"_id": "__dummy__", "dummy": True}
                self.collection.insert_one(dummy_doc)
                # Remove the dummy document
                self.collection.delete_one({"_id": "__dummy__"})
                logger.info(f"Collection '{self.collection.name}' created successfully")
            # Create optimized vector search index for RAG
            await self._create_vector_search_index()
            # Create text index for hybrid search
            await self._ensure_text_index()
        except Exception as e:
            logger.exception(
                f"Failed to initialize MongoDB index for vector_store: {self.vector_store.identifier}. "
                f"Collection name: {self.collection.name}. Error: {str(e)}"
            )
            # Don't fail completely - just log the error and continue
            logger.warning(
                "Continuing without complete index initialization. "
                "You may need to create indexes manually in MongoDB Atlas dashboard."
            )
    async def _create_vector_search_index(self) -> None:
        """Create optimized vector search index based on MongoDB RAG best practices."""
        try:
            # Check if vector search index exists
            indexes = list(self.collection.list_search_indexes())
            index_exists = any(idx.get("name") == self.config.index_name for idx in indexes)
            if not index_exists:
                # Create vector search index optimized for RAG
                # Based on MongoDB's RAG example using new vectorSearch format
                search_index_model = SearchIndexModel(
                    definition={
                        "fields": [
                            {
                                "type": "vector",
                                "numDimensions": self.dimension,
                                "path": self.config.path_field,
                                "similarity": self._convert_similarity_metric(self.config.similarity_metric),
                            }
                        ]
                    },
                    name=self.config.index_name,
                    type="vectorSearch",
                )
                logger.info(
                    f"Creating vector search index '{self.config.index_name}' for RAG on collection '{self.collection.name}'"
                )
                self.collection.create_search_index(model=search_index_model)
                # Wait for index to be ready (like in MongoDB RAG example)
                await self._wait_for_index_ready()
                logger.info("Vector search index created and ready for RAG queries")
        except Exception as e:
            logger.warning(f"Failed to create vector search index: {e}")
    def _convert_similarity_metric(self, metric: str) -> str:
        """Convert internal similarity metric to MongoDB Atlas format."""
        metric_map = {
            "cosine": "cosine",
            "euclidean": "euclidean",
            "dotProduct": "dotProduct",
            "dot_product": "dotProduct",
        }
        return metric_map.get(metric, "cosine")
    async def _wait_for_index_ready(self) -> None:
        """Wait for the vector search index to be ready, based on MongoDB RAG example."""
        logger.info("Waiting for vector search index to be ready...")
        max_wait_time = 300  # 5 minutes max wait
        wait_interval = 5
        elapsed_time = 0
        while elapsed_time < max_wait_time:
            try:
                indices = list(self.collection.list_search_indexes(self.config.index_name))
                if len(indices) and indices[0].get("queryable") is True:
                    logger.info(f"Vector search index '{self.config.index_name}' is ready for querying")
                    return
            except Exception:
                pass
            time.sleep(wait_interval)
            elapsed_time += wait_interval
        logger.warning(f"Vector search index may not be fully ready after {max_wait_time}s")
    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray) -> None:
        """Add chunks with embeddings to MongoDB collection optimized for RAG."""
        if len(chunks) != len(embeddings):
            raise ValueError(f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}")
        documents = []
        for i, chunk in enumerate(chunks):
            # Structure document for optimal RAG retrieval
            doc = {
                "_id": chunk.chunk_id,
                "chunk_id": chunk.chunk_id,
                "text": interleaved_content_as_str(chunk.content),  # Key field for RAG context
                "content": interleaved_content_as_str(chunk.content),  # Backward compatibility
                "metadata": chunk.metadata or {},
                "chunk_metadata": (chunk.chunk_metadata.model_dump() if chunk.chunk_metadata else {}),
                self.config.path_field: embeddings[i].tolist(),  # Vector embeddings
                "document": chunk.model_dump(),  # Full chunk data
            }
            documents.append(doc)
        try:
            # Use upsert behavior for chunks
            for doc in documents:
                self.collection.replace_one({"_id": doc["_id"]}, doc, upsert=True)
            logger.debug(f"Successfully added {len(chunks)} chunks optimized for RAG to MongoDB collection")
        except Exception as e:
            logger.exception(f"Failed to add chunks to MongoDB collection: {e}")
            raise
    async def query_vector(
        self,
        embedding: NDArray,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
        """Perform vector similarity search optimized for RAG based on MongoDB example."""
        try:
            # Use MongoDB's vector search aggregation pipeline optimized for RAG
            pipeline = [
                {
                    "$vectorSearch": {
                        "index": self.config.index_name,
                        "queryVector": embedding.tolist(),
                        "path": self.config.path_field,
                        "numCandidates": min(k * 10, 1000),  # Cap at 1000 to prevent excessive candidates
                        "limit": k,
                    }
                },
                {
                    "$project": {
                        "_id": 0,
                        "text": 1,  # Primary field for RAG context
                        "content": 1,  # Backward compatibility
                        "metadata": 1,
                        "chunk_metadata": 1,
                        "document": 1,
                        "score": {"$meta": "vectorSearchScore"},
                    }
                },
                {"$match": {"score": {"$gte": score_threshold}}},
            ]
            results = list(self.collection.aggregate(pipeline))
            chunks = []
            scores = []
            for result in results:
                score = result.get("score", 0.0)
                if score >= score_threshold:
                    chunk_data = result.get("document", {})
                    if chunk_data:
                        chunks.append(Chunk(**chunk_data))
                        scores.append(float(score))
            logger.debug(f"Vector search for RAG returned {len(chunks)} results")
            return QueryChunksResponse(chunks=chunks, scores=scores)
        except Exception as e:
            logger.exception(f"Vector search for RAG failed: {e}")
            raise RuntimeError(f"Vector search for RAG failed: {e}") from e
    async def query_keyword(
        self,
        query_string: str,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
        """Perform text search using MongoDB's text search for RAG context retrieval."""
        try:
            # Ensure text index exists
            await self._ensure_text_index()
            pipeline: list[dict[str, Any]] = [
                {"$match": {"$text": {"$search": query_string}}},
                {
                    "$project": {
                        "_id": 0,
                        "text": 1,  # Primary field for RAG context
                        "content": 1,  # Backward compatibility
                        "metadata": 1,
                        "chunk_metadata": 1,
                        "document": 1,
                        "score": {"$meta": "textScore"},
                    }
                },
                {"$match": {"score": {"$gte": score_threshold}}},
                {"$sort": {"score": {"$meta": "textScore"}}},
                {"$limit": k},
            ]
            results = list(self.collection.aggregate(pipeline))
            chunks = []
            scores = []
            for result in results:
                score = result.get("score", 0.0)
                if score >= score_threshold:
                    chunk_data = result.get("document", {})
                    if chunk_data:
                        chunks.append(Chunk(**chunk_data))
                        scores.append(float(score))
            logger.debug(f"Keyword search for RAG returned {len(chunks)} results")
            return QueryChunksResponse(chunks=chunks, scores=scores)
        except Exception as e:
            logger.exception(f"Keyword search for RAG failed: {e}")
            raise RuntimeError(f"Keyword search for RAG failed: {e}") from e
    async def query_hybrid(
        self,
        embedding: NDArray,
        query_string: str,
        k: int,
        score_threshold: float,
        reranker_type: str,
        reranker_params: dict[str, Any] | None = None,
    ) -> QueryChunksResponse:
        """Perform hybrid search for enhanced RAG context retrieval."""
        if reranker_params is None:
            reranker_params = {}
        # Get results from both search methods
        vector_response = await self.query_vector(embedding, k, 0.0)
        keyword_response = await self.query_keyword(query_string, k, 0.0)
        # Convert responses to score dictionaries
        vector_scores = {
            chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
        }
        keyword_scores = {
            chunk.chunk_id: score
            for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
        }
        # Combine scores using the reranking utility
        combined_scores = WeightedInMemoryAggregator.combine_search_results(
            vector_scores, keyword_scores, reranker_type, reranker_params
        )
        # Get top-k results
        top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
        # Filter by score threshold
        filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
        # Create chunk map
        chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
        # Build final results
        chunks = []
        scores = []
        for doc_id, score in filtered_items:
            if doc_id in chunk_map:
                chunks.append(chunk_map[doc_id])
                scores.append(score)
        logger.debug(f"Hybrid search for RAG returned {len(chunks)} results")
        return QueryChunksResponse(chunks=chunks, scores=scores)
    async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
        """Delete chunks from MongoDB collection."""
        chunk_ids = [c.chunk_id for c in chunks_for_deletion]
        try:
            result = self.collection.delete_many({"_id": {"$in": chunk_ids}})
            logger.debug(f"Deleted {result.deleted_count} chunks from MongoDB collection")
        except Exception as e:
            logger.exception(f"Failed to delete chunks: {e}")
            raise
    async def delete(self) -> None:
        """Delete the entire collection."""
        try:
            self.collection.drop()
            logger.debug(f"Dropped MongoDB collection: {self.collection.name}")
        except Exception as e:
            logger.exception(f"Failed to drop collection: {e}")
            raise
    async def _ensure_text_index(self) -> None:
        """Ensure text search index exists on content fields for RAG."""
        try:
            indexes = list(self.collection.list_indexes())
            text_index_exists = any(
                any(key.startswith(("content", "text")) for key in idx.get("key", {}).keys())
                and idx.get("textIndexVersion") is not None
                for idx in indexes
            )
            if not text_index_exists:
                logger.info("Creating text search index on content fields for RAG")
                # Index both 'text' and 'content' fields for comprehensive text search
                self.collection.create_index([("text", "text"), ("content", "text")])
                logger.info("Text search index created successfully for RAG")
        except Exception as e:
            logger.warning(f"Failed to create text index for RAG: {e}")
 class MongoDBVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
    """MongoDB Atlas Vector Search adapter for Llama Stack optimized for RAG workflows."""
    def __init__(
        self,
        config: MongoDBVectorIOConfig,
        inference_api,
        files_api=None,
        models_api=None,
    ) -> None:
        # Handle the case where files_api might be a ProviderSpec that needs resolution
        resolved_files_api = files_api
        super().__init__(files_api=resolved_files_api, kvstore=None)
        self.config = config
        self.inference_api = inference_api
        self.models_api = models_api
        self.client: MongoClient | None = None
        self.database: Database | None = None
        self.cache: dict[str, VectorStoreWithIndex] = {}
        self.kvstore: KVStore | None = None
    async def initialize(self) -> None:
        """Initialize MongoDB connection optimized for RAG workflows."""
        logger.info("Initializing MongoDB Atlas Vector IO adapter for RAG")
        try:
            # Initialize KV store for metadata
            if self.config.persistence:
                self.kvstore = await kvstore_impl(self.config.persistence)
            # Get connection string from config (either direct or built from parameters)
            connection_string = self.config.get_connection_string()
            # Skip MongoDB connection if no connection parameters provided
            # This allows other providers to work without MongoDB credentials
            if not connection_string:
                logger.warning(
                    "MongoDB connection parameters not provided. "
                    "MongoDB vector store will not be available until credentials are configured."
                )
                return
            # Connect to MongoDB with optimized settings for RAG
            self.client = MongoClient(
                connection_string,
                server_api=ServerApi("1"),
                maxPoolSize=self.config.max_pool_size,
                serverSelectionTimeoutMS=self.config.timeout_ms,
                # Additional settings for RAG performance
                retryWrites=True,
                readPreference="primaryPreferred",
            )
            # Test connection
            try:
                self.client.admin.command("ping")
                logger.info("Successfully connected to MongoDB Atlas for RAG")
            except Exception as conn_error:
                # Extract just the basic error type without the full traceback
                error_type = type(conn_error).__name__
                logger.warning(
                    f"MongoDB connection failed ({error_type}). "
                    "MongoDB vector store will not be available. "
                    f"Attempted to connect to: {self.config.host or 'connection_string'}:{self.config.port or '(from connection_string)'}"
                )
                # Close the client and clear it
                if self.client:
                    self.client.close()
                    self.client = None
                return
            # Get database
            self.database = self.client[self.config.database_name]
            # Initialize OpenAI vector stores
            await self.initialize_openai_vector_stores()
            # Load existing vector databases
            await self._load_existing_vector_dbs()
            logger.info("MongoDB Atlas Vector IO adapter for RAG initialized successfully")
        except Exception as e:
            logger.exception("Failed to initialize MongoDB Atlas Vector IO adapter for RAG")
            # Close the client if it was created
            if self.client:
                self.client.close()
                self.client = None
            # Log warning instead of raising to allow tests to skip gracefully
            logger.warning(f"MongoDB initialization failed: {e}. MongoDB vector store will not be available.")
    async def shutdown(self) -> None:
        """Shutdown MongoDB connection."""
        if self.client:
            self.client.close()
            logger.info("MongoDB Atlas RAG connection closed")
    async def health(self) -> HealthResponse:
        """Perform health check on MongoDB connection."""
        try:
            if self.client:
                self.client.admin.command("ping")
                return HealthResponse(status=HealthStatus.OK)
            else:
                return HealthResponse(status=HealthStatus.ERROR, message="MongoDB client not initialized")
        except Exception as e:
            return HealthResponse(
                status=HealthStatus.ERROR,
                message=f"MongoDB RAG health check failed: {str(e)}",
            )
    async def register_vector_store(self, vector_store: VectorStore) -> None:
        """Register a new vector store optimized for RAG."""
        if self.database is None:
            raise RuntimeError("MongoDB database not initialized")
        # Create collection name from vector store identifier
        collection_name = sanitize_collection_name(vector_store.identifier)
        collection = self.database[collection_name]
        # Create and initialize MongoDB index optimized for RAG
        mongodb_index = MongoDBIndex(vector_store, collection, self.config)
        await mongodb_index.initialize()
        # Create vector store with index wrapper
        vector_store_with_index = VectorStoreWithIndex(
            vector_store=vector_store,
            index=mongodb_index,
            inference_api=self.inference_api,
        )
        # Cache the vector store
        self.cache[vector_store.identifier] = vector_store_with_index
        # Save vector store info to KVStore for persistence
        if self.kvstore:
            await self.kvstore.set(
                f"{VECTOR_DBS_PREFIX}{vector_store.identifier}",
                vector_store.model_dump_json(),
            )
        logger.info(f"Registered vector store for RAG: {vector_store.identifier}")
    async def unregister_vector_store(self, vector_store_id: str) -> None:
        """Unregister a vector store."""
        if vector_store_id in self.cache:
            await self.cache[vector_store_id].index.delete()
            del self.cache[vector_store_id]
        # Clean up from KV store
        if self.kvstore:
            await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
        logger.info(f"Unregistered vector store: {vector_store_id}")
    async def insert_chunks(
        self,
        vector_store_id: str,
        chunks: list[Chunk],
        ttl_seconds: int | None = None,
    ) -> None:
        """Insert chunks into the vector database optimized for RAG."""
        vector_db_with_index = await self._get_vector_db_index(vector_store_id)
        await vector_db_with_index.insert_chunks(chunks)
    async def query_chunks(
        self,
        vector_store_id: str,
        query: InterleavedContent,
        params: dict[str, Any] | None = None,
    ) -> QueryChunksResponse:
        """Query chunks from the vector database optimized for RAG context retrieval."""
        vector_db_with_index = await self._get_vector_db_index(vector_store_id)
        return await vector_db_with_index.query_chunks(query, params)
    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
        """Delete chunks from the vector database."""
        vector_db_with_index = await self._get_vector_db_index(store_id)
        await vector_db_with_index.index.delete_chunks(chunks_for_deletion)
    async def _get_vector_db_index(self, vector_db_id: str) -> VectorStoreWithIndex:
        """Get vector store index from cache."""
        if vector_db_id in self.cache:
            return self.cache[vector_db_id]
        raise VectorStoreNotFoundError(vector_db_id)
    async def _load_existing_vector_dbs(self) -> None:
        """Load existing vector databases from KVStore."""
        if not self.kvstore:
            return
        try:
            # Use keys_in_range to get all vector database keys from KVStore
            # This searches for keys with the prefix by using range scan
            start_key = VECTOR_DBS_PREFIX
            # Create an end key by incrementing the last character
            end_key = VECTOR_DBS_PREFIX[:-1] + chr(ord(VECTOR_DBS_PREFIX[-1]) + 1)
            vector_db_keys = await self.kvstore.keys_in_range(start_key, end_key)
            for key in vector_db_keys:
                try:
                    vector_store_data = await self.kvstore.get(key)
                    if vector_store_data:
                        import json
                        vector_store = VectorStore(**json.loads(vector_store_data))
                        # Register the vector store without re-initializing
                        await self._register_existing_vector_store(vector_store)
                        logger.info(f"Loaded existing RAG-optimized vector store: {vector_store.identifier}")
                except Exception as e:
                    logger.warning(f"Failed to load vector store from key {key}: {e}")
                    continue
        except Exception as e:
            logger.warning(f"Failed to load existing vector stores: {e}")
    async def _register_existing_vector_store(self, vector_store: VectorStore) -> None:
        """Register an existing vector store without re-initialization."""
        if self.database is None:
            raise RuntimeError("MongoDB database not initialized")
        # Create collection name from vector store identifier
        collection_name = sanitize_collection_name(vector_store.identifier)
        collection = self.database[collection_name]
        # Create MongoDB index without initialization (collection already exists)
        mongodb_index = MongoDBIndex(vector_store, collection, self.config)
        # Create vector store with index wrapper
        vector_store_with_index = VectorStoreWithIndex(
            vector_store=vector_store,
            index=mongodb_index,
            inference_api=self.inference_api,
        )
        # Cache the vector store
        self.cache[vector_store.identifier] = vector_store_with_index
--- a/tests/unit/providers/vector_io/init.py
+++ b/tests/unit/providers/vector_io/init.py
@ -0,0 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.