chore!: BREAKING CHANGE: vector_db_id -> vector_store_id (#3923)

# What does this PR do? ## Test Plan CI vector_io tests will fail until next client sync passed with https://github.com/llamastack/llama-stack-client-python/pull/286 checked out locally
2025-12-03 09:53:45 +00:00 · 2025-10-27 14:26:06 -07:00 · 2025-10-27 14:26:06 -07:00 · b7dd3f5c56
commit b7dd3f5c56
parent b6954c9882
29 changed files with 175 additions and 175 deletions
--- a/src/llama_stack/apis/agents/agents.py
+++ b/src/llama_stack/apis/agents/agents.py
@ -149,13 +149,13 @@ class ShieldCallStep(StepCommon):
 class MemoryRetrievalStep(StepCommon):
    """A memory retrieval step in an agent turn.

-    :param vector_db_ids: The IDs of the vector databases to retrieve context from.
+    :param vector_store_ids: The IDs of the vector databases to retrieve context from.
    :param inserted_context: The context retrieved from the vector databases.
    """

    step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
    # TODO: should this be List[str]?
-    vector_db_ids: str
+    vector_store_ids: str
    inserted_context: InterleavedContent


--- a/src/llama_stack/apis/tools/rag_tool.py
+++ b/src/llama_stack/apis/tools/rag_tool.py
@ -190,13 +190,13 @@ class RAGToolRuntime(Protocol):
    async def insert(
        self,
        documents: list[RAGDocument],
-        vector_db_id: str,
+        vector_store_id: str,
        chunk_size_in_tokens: int = 512,
    ) -> None:
        """Index documents so they can be used by the RAG system.

        :param documents: List of documents to index in the RAG system
-        :param vector_db_id: ID of the vector database to store the document embeddings
+        :param vector_store_id: ID of the vector database to store the document embeddings
        :param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
        """
        ...
@ -205,13 +205,13 @@ class RAGToolRuntime(Protocol):
    async def query(
        self,
        content: InterleavedContent,
-        vector_db_ids: list[str],
+        vector_store_ids: list[str],
        query_config: RAGQueryConfig | None = None,
    ) -> RAGQueryResult:
        """Query the RAG system for context; typically invoked by the agent.

        :param content: The query content to search for in the indexed documents
-        :param vector_db_ids: List of vector database IDs to search within
+        :param vector_store_ids: List of vector database IDs to search within
        :param query_config: (Optional) Configuration parameters for the query operation
        :returns: RAGQueryResult containing the retrieved content and metadata
        """
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@ -529,17 +529,17 @@ class VectorIO(Protocol):

    # this will just block now until chunks are inserted, but it should
    # probably return a Job instance which can be polled for completion
-    # TODO: rename vector_db_id to vector_store_id once Stainless is working
+    # TODO: rename vector_store_id to vector_store_id once Stainless is working
    @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
    async def insert_chunks(
        self,
-        vector_db_id: str,
+        vector_store_id: str,
        chunks: list[Chunk],
        ttl_seconds: int | None = None,
    ) -> None:
        """Insert chunks into a vector database.

-        :param vector_db_id: The identifier of the vector database to insert the chunks into.
+        :param vector_store_id: The identifier of the vector database to insert the chunks into.
        :param chunks: The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types.
            `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional.
            If `metadata` is provided, you configure how Llama Stack formats the chunk during generation.
@ -548,17 +548,17 @@ class VectorIO(Protocol):
        """
        ...

-    # TODO: rename vector_db_id to vector_store_id once Stainless is working
+    # TODO: rename vector_store_id to vector_store_id once Stainless is working
    @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
    async def query_chunks(
        self,
-        vector_db_id: str,
+        vector_store_id: str,
        query: InterleavedContent,
        params: dict[str, Any] | None = None,
    ) -> QueryChunksResponse:
        """Query chunks from a vector database.

-        :param vector_db_id: The identifier of the vector database to query.
+        :param vector_store_id: The identifier of the vector database to query.
        :param query: The query to search for.
        :param params: The parameters of the query.
        :returns: A QueryChunksResponse.
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@ -73,27 +73,27 @@ class VectorIORouter(VectorIO):

    async def insert_chunks(
        self,
-        vector_db_id: str,
+        vector_store_id: str,
        chunks: list[Chunk],
        ttl_seconds: int | None = None,
    ) -> None:
        doc_ids = [chunk.document_id for chunk in chunks[:3]]
        logger.debug(
-            f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, "
+            f"VectorIORouter.insert_chunks: {vector_store_id}, {len(chunks)} chunks, "
            f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}"
        )
-        provider = await self.routing_table.get_provider_impl(vector_db_id)
-        return await provider.insert_chunks(vector_db_id, chunks, ttl_seconds)
+        provider = await self.routing_table.get_provider_impl(vector_store_id)
+        return await provider.insert_chunks(vector_store_id, chunks, ttl_seconds)

    async def query_chunks(
        self,
-        vector_db_id: str,
+        vector_store_id: str,
        query: InterleavedContent,
        params: dict[str, Any] | None = None,
    ) -> QueryChunksResponse:
-        logger.debug(f"VectorIORouter.query_chunks: {vector_db_id}")
-        provider = await self.routing_table.get_provider_impl(vector_db_id)
-        return await provider.query_chunks(vector_db_id, query, params)
+        logger.debug(f"VectorIORouter.query_chunks: {vector_store_id}")
+        provider = await self.routing_table.get_provider_impl(vector_store_id)
+        return await provider.query_chunks(vector_store_id, query, params)

    # OpenAI Vector Stores API endpoints
    async def openai_create_vector_store(
--- a/src/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -488,13 +488,13 @@ class ChatAgent(ShieldRunnerMixin):

        session_info = await self.storage.get_session_info(session_id)
        # if the session has a memory bank id, let the memory tool use it
-        if session_info and session_info.vector_db_id:
+        if session_info and session_info.vector_store_id:
            for tool_name in self.tool_name_to_args.keys():
                if tool_name == MEMORY_QUERY_TOOL:
-                    if "vector_db_ids" not in self.tool_name_to_args[tool_name]:
-                        self.tool_name_to_args[tool_name]["vector_db_ids"] = [session_info.vector_db_id]
+                    if "vector_store_ids" not in self.tool_name_to_args[tool_name]:
+                        self.tool_name_to_args[tool_name]["vector_store_ids"] = [session_info.vector_store_id]
                    else:
-                        self.tool_name_to_args[tool_name]["vector_db_ids"].append(session_info.vector_db_id)
+                        self.tool_name_to_args[tool_name]["vector_store_ids"].append(session_info.vector_store_id)

        output_attachments = []

--- a/src/llama_stack/providers/inline/agents/meta_reference/persistence.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/persistence.py
@ -22,7 +22,7 @@ log = get_logger(name=__name__, category="agents::meta_reference")

 class AgentSessionInfo(Session):
    # TODO: is this used anywhere?
-    vector_db_id: str | None = None
+    vector_store_id: str | None = None
    started_at: datetime
    owner: User | None = None
    identifier: str | None = None
@ -93,12 +93,12 @@ class AgentPersistence:

        return session_info

-    async def add_vector_db_to_session(self, session_id: str, vector_db_id: str):
+    async def add_vector_db_to_session(self, session_id: str, vector_store_id: str):
        session_info = await self.get_session_if_accessible(session_id)
        if session_info is None:
            raise SessionNotFoundError(session_id)

-        session_info.vector_db_id = vector_db_id
+        session_info.vector_store_id = vector_store_id
        await self.kvstore.set(
            key=f"session:{self.agent_id}:{session_id}",
            value=session_info.model_dump_json(),
--- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
@ -119,7 +119,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
    async def insert(
        self,
        documents: list[RAGDocument],
-        vector_db_id: str,
+        vector_store_id: str,
        chunk_size_in_tokens: int = 512,
    ) -> None:
        if not documents:
@ -158,14 +158,14 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti

                try:
                    await self.vector_io_api.openai_attach_file_to_vector_store(
-                        vector_store_id=vector_db_id,
+                        vector_store_id=vector_store_id,
                        file_id=created_file.id,
                        attributes=doc.metadata,
                        chunking_strategy=chunking_strategy,
                    )
                except Exception as e:
                    log.error(
-                        f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}"
+                        f"Failed to attach file {created_file.id} to vector store {vector_store_id} for document {doc.document_id}: {e}"
                    )
                    continue

@ -176,10 +176,10 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
    async def query(
        self,
        content: InterleavedContent,
-        vector_db_ids: list[str],
+        vector_store_ids: list[str],
        query_config: RAGQueryConfig | None = None,
    ) -> RAGQueryResult:
-        if not vector_db_ids:
+        if not vector_store_ids:
            raise ValueError(
                "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
            )
@ -192,7 +192,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
        )
        tasks = [
            self.vector_io_api.query_chunks(
-                vector_db_id=vector_db_id,
+                vector_store_id=vector_store_id,
                query=query,
                params={
                    "mode": query_config.mode,
@ -201,18 +201,18 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
                    "ranker": query_config.ranker,
                },
            )
-            for vector_db_id in vector_db_ids
+            for vector_store_id in vector_store_ids
        ]
        results: list[QueryChunksResponse] = await asyncio.gather(*tasks)

        chunks = []
        scores = []

-        for vector_db_id, result in zip(vector_db_ids, results, strict=False):
+        for vector_store_id, result in zip(vector_store_ids, results, strict=False):
            for chunk, score in zip(result.chunks, result.scores, strict=False):
                if not hasattr(chunk, "metadata") or chunk.metadata is None:
                    chunk.metadata = {}
-                chunk.metadata["vector_db_id"] = vector_db_id
+                chunk.metadata["vector_store_id"] = vector_store_id

                chunks.append(chunk)
                scores.append(score)
@ -250,7 +250,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
            metadata_keys_to_exclude_from_context = [
                "token_count",
                "metadata_token_count",
-                "vector_db_id",
+                "vector_store_id",
            ]
            metadata_for_context = {}
            for k in chunk_metadata_keys_to_include_from_context:
@ -275,7 +275,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
                "document_ids": [c.document_id for c in chunks[: len(picked)]],
                "chunks": [c.content for c in chunks[: len(picked)]],
                "scores": scores[: len(picked)],
-                "vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]],
+                "vector_store_ids": [c.metadata["vector_store_id"] for c in chunks[: len(picked)]],
            },
        )

@ -309,7 +309,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
        )

    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
-        vector_db_ids = kwargs.get("vector_db_ids", [])
+        vector_store_ids = kwargs.get("vector_store_ids", [])
        query_config = kwargs.get("query_config")
        if query_config:
            query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
@ -319,7 +319,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
        query = kwargs["query"]
        result = await self.query(
            content=query,
-            vector_db_ids=vector_db_ids,
+            vector_store_ids=vector_store_ids,
            query_config=query_config,
        )

--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@ -248,19 +248,19 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
        del self.cache[vector_store_id]
        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")

-    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = self.cache.get(vector_db_id)
+    async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
+        index = self.cache.get(vector_store_id)
        if index is None:
-            raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}")
+            raise ValueError(f"Vector DB {vector_store_id} not found. found: {self.cache.keys()}")

        await index.insert_chunks(chunks)

    async def query_chunks(
-        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
+        self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = self.cache.get(vector_db_id)
+        index = self.cache.get(vector_store_id)
        if index is None:
-            raise VectorStoreNotFoundError(vector_db_id)
+            raise VectorStoreNotFoundError(vector_store_id)

        return await index.query_chunks(query, params)

--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@ -447,20 +447,20 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
        await self.cache[vector_store_id].index.delete()
        del self.cache[vector_store_id]

-    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+    async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
        if not index:
-            raise VectorStoreNotFoundError(vector_db_id)
+            raise VectorStoreNotFoundError(vector_store_id)
        # The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
        # and then call our index's add_chunks.
        await index.insert_chunks(chunks)

    async def query_chunks(
-        self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
+        self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
        if not index:
-            raise VectorStoreNotFoundError(vector_db_id)
+            raise VectorStoreNotFoundError(vector_store_id)
        return await index.query_chunks(query, params)

    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
--- a/src/llama_stack/providers/registry/vector_io.py
+++ b/src/llama_stack/providers/registry/vector_io.py
@ -163,14 +163,14 @@ The SQLite-vec provider supports three search modes:
 Example with hybrid search:
 ```python
 response = await vector_io.query_chunks(
-    vector_db_id="my_db",
+    vector_store_id="my_db",
    query="your query here",
    params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
 )

 # Using RRF ranker
 response = await vector_io.query_chunks(
-    vector_db_id="my_db",
+    vector_store_id="my_db",
    query="your query here",
    params={
        "mode": "hybrid",
@ -182,7 +182,7 @@ response = await vector_io.query_chunks(

 # Using weighted ranker
 response = await vector_io.query_chunks(
-    vector_db_id="my_db",
+    vector_store_id="my_db",
    query="your query here",
    params={
        "mode": "hybrid",
@ -196,7 +196,7 @@ response = await vector_io.query_chunks(
 Example with explicit vector search:
 ```python
 response = await vector_io.query_chunks(
-    vector_db_id="my_db",
+    vector_store_id="my_db",
    query="your query here",
    params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
 )
@ -205,7 +205,7 @@ response = await vector_io.query_chunks(
 Example with keyword search:
 ```python
 response = await vector_io.query_chunks(
-    vector_db_id="my_db",
+    vector_store_id="my_db",
    query="your query here",
    params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
 )
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -169,20 +169,20 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
        await self.cache[vector_store_id].index.delete()
        del self.cache[vector_store_id]

-    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+    async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
        if index is None:
-            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
+            raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")

        await index.insert_chunks(chunks)

    async def query_chunks(
-        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
+        self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_store_id)

        if index is None:
-            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
+            raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")

        return await index.query_chunks(query, params)

--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@ -348,19 +348,19 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
            await self.cache[vector_store_id].index.delete()
            del self.cache[vector_store_id]

-    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+    async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
        if not index:
-            raise VectorStoreNotFoundError(vector_db_id)
+            raise VectorStoreNotFoundError(vector_store_id)

        await index.insert_chunks(chunks)

    async def query_chunks(
-        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
+        self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
        if not index:
-            raise VectorStoreNotFoundError(vector_db_id)
+            raise VectorStoreNotFoundError(vector_store_id)
        return await index.query_chunks(query, params)

    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@ -399,14 +399,14 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
        assert self.kvstore is not None
        await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")

-    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+    async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
        await index.insert_chunks(chunks)

    async def query_chunks(
-        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
+        self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
        return await index.query_chunks(query, params)

    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -222,19 +222,19 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
        self.cache[vector_store_id] = index
        return index

-    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+    async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
        if not index:
-            raise VectorStoreNotFoundError(vector_db_id)
+            raise VectorStoreNotFoundError(vector_store_id)

        await index.insert_chunks(chunks)

    async def query_chunks(
-        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
+        self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
        if not index:
-            raise VectorStoreNotFoundError(vector_db_id)
+            raise VectorStoreNotFoundError(vector_store_id)

        return await index.query_chunks(query, params)

--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@ -366,19 +366,19 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
        self.cache[vector_store_id] = index
        return index

-    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+    async def insert_chunks(self, vector_store_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
        if not index:
-            raise VectorStoreNotFoundError(vector_db_id)
+            raise VectorStoreNotFoundError(vector_store_id)

        await index.insert_chunks(chunks)

    async def query_chunks(
-        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
+        self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_store_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_store_id)
        if not index:
-            raise VectorStoreNotFoundError(vector_db_id)
+            raise VectorStoreNotFoundError(vector_store_id)

        return await index.query_chunks(query, params)

--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -333,7 +333,7 @@ class OpenAIVectorStoreMixin(ABC):
    @abstractmethod
    async def insert_chunks(
        self,
-        vector_db_id: str,
+        vector_store_id: str,
        chunks: list[Chunk],
        ttl_seconds: int | None = None,
    ) -> None:
@ -342,7 +342,7 @@ class OpenAIVectorStoreMixin(ABC):

    @abstractmethod
    async def query_chunks(
-        self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
+        self, vector_store_id: str, query: Any, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
        """Query chunks from a vector database (provider-specific implementation)."""
        pass
@ -609,7 +609,7 @@ class OpenAIVectorStoreMixin(ABC):
            # TODO: Add support for ranking_options.ranker

            response = await self.query_chunks(
-                vector_db_id=vector_store_id,
+                vector_store_id=vector_store_id,
                query=search_query,
                params=params,
            )
@ -803,7 +803,7 @@ class OpenAIVectorStoreMixin(ABC):
                )
            else:
                await self.insert_chunks(
-                    vector_db_id=vector_store_id,
+                    vector_store_id=vector_store_id,
                    chunks=chunks,
                )
                vector_store_file_object.status = "completed"