diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml
index 3929df09c..ac600d570 100644
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@@ -86,10 +86,9 @@ runs:
       if: ${{ always() }}
       shell: bash
       run: |
-        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
-        distro_name=$(echo "${{ inputs.stack-config }}" | sed 's/^docker://' | sed 's/^server://')
-        stack_container_name="llama-stack-test-$distro_name"
-        sudo docker logs $stack_container_name > docker-${distro_name}-${{ inputs.inference-mode }}.log || true
+        # Ollama logs (if ollama container exists)
+        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
+        # Note: distro container logs are now dumped in integration-tests.sh before container is removed
 
     - name: Upload logs
       if: ${{ always() }}
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index b5845be53..0fdd50acc 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -37,7 +37,7 @@ jobs:
             .pre-commit-config.yaml
 
       - name: Set up Node.js
-        uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
         with:
           node-version: '20'
           cache: 'npm'
diff --git a/.github/workflows/precommit-trigger.yml b/.github/workflows/precommit-trigger.yml
index 0c23b57de..b05898d29 100644
--- a/.github/workflows/precommit-trigger.yml
+++ b/.github/workflows/precommit-trigger.yml
@@ -99,7 +99,7 @@ jobs:
               owner: context.repo.owner,
               repo: context.repo.repo,
               issue_number: ${{ steps.check_author.outputs.pr_number }},
-              body: `⏳ Running pre-commit hooks on PR #${{ steps.check_author.outputs.pr_number }}...`
+              body: `⏳ Running [pre-commit hooks](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) on PR #${{ steps.check_author.outputs.pr_number }}...`
             });
 
       - name: Checkout PR branch (same-repo)
@@ -141,7 +141,7 @@ jobs:
 
       - name: Set up Node.js
         if: steps.check_author.outputs.authorized == 'true'
-        uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
         with:
           node-version: '20'
           cache: 'npm'
diff --git a/.github/workflows/providers-list-deps.yml b/.github/workflows/providers-list-deps.yml
index df491b680..e30e1e5fb 100644
--- a/.github/workflows/providers-list-deps.yml
+++ b/.github/workflows/providers-list-deps.yml
@@ -36,7 +36,7 @@ jobs:
       distros: ${{ steps.set-matrix.outputs.distros }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 
       - name: Generate Distribution List
         id: set-matrix
@@ -55,7 +55,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
@@ -79,7 +79,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
@@ -92,7 +92,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index dfa844175..96243285f 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -24,7 +24,7 @@ jobs:
       uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 
     - name: Install uv
-      uses: astral-sh/setup-uv@eb1897b8dc4b5d5bfe39a428a8f2304605e0983c # v7.0.0
+      uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0
       with:
         python-version: ${{ matrix.python-version }}
         activate-environment: true
diff --git a/.github/workflows/ui-unit-tests.yml b/.github/workflows/ui-unit-tests.yml
index c16f512d1..e8f318b8e 100644
--- a/.github/workflows/ui-unit-tests.yml
+++ b/.github/workflows/ui-unit-tests.yml
@@ -29,7 +29,7 @@ jobs:
         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 
       - name: Setup Node.js
-        uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
         with:
           node-version: ${{ matrix.node-version }}
           cache: 'npm'
diff --git a/client-sdks/stainless/openapi.stainless.yml b/client-sdks/stainless/openapi.stainless.yml
index 0a5dfc044..9461be996 100644
--- a/client-sdks/stainless/openapi.stainless.yml
+++ b/client-sdks/stainless/openapi.stainless.yml
@@ -208,19 +208,6 @@ resources:
             type: http
             endpoint: post /v1/conversations/{conversation_id}/items
 
-  datasets:
-    models:
-      list_datasets_response: ListDatasetsResponse
-    methods:
-      register: post /v1beta/datasets
-      retrieve: get /v1beta/datasets/{dataset_id}
-      list:
-        endpoint: get /v1beta/datasets
-        paginated: false
-      unregister: delete /v1beta/datasets/{dataset_id}
-      iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
-      appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
-
   inspect:
     models:
       healthInfo: HealthInfo
@@ -521,6 +508,21 @@ resources:
                   stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
                   param_discriminator: stream
 
+  beta:
+    subresources:
+      datasets:
+        models:
+          list_datasets_response: ListDatasetsResponse
+        methods:
+          register: post /v1beta/datasets
+          retrieve: get /v1beta/datasets/{dataset_id}
+          list:
+            endpoint: get /v1beta/datasets
+            paginated: false
+          unregister: delete /v1beta/datasets/{dataset_id}
+          iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
+          appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
+
 
 settings:
   license: MIT
diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index eff01931f..98a309f12 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -2039,69 +2039,6 @@ paths:
           schema:
             $ref: '#/components/schemas/URL'
       deprecated: false
-  /v1/tool-runtime/rag-tool/insert:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Index documents so they can be used by the RAG system.
-      description: >-
-        Index documents so they can be used by the RAG system.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InsertRequest'
-        required: true
-      deprecated: false
-  /v1/tool-runtime/rag-tool/query:
-    post:
-      responses:
-        '200':
-          description: >-
-            RAGQueryResult containing the retrieved content and metadata
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RAGQueryResult'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Query the RAG system for context; typically invoked by the agent.
-      description: >-
-        Query the RAG system for context; typically invoked by the agent.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryRequest'
-        required: true
-      deprecated: false
   /v1/toolgroups:
     get:
       responses:
@@ -6440,7 +6377,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -9132,7 +9069,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -9440,7 +9377,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -9921,274 +9858,6 @@ components:
       title: ListToolDefsResponse
       description: >-
         Response containing a list of tool definitions.
-    RAGDocument:
-      type: object
-      properties:
-        document_id:
-          type: string
-          description: The unique identifier for the document.
-        content:
-          oneOf:
-            - type: string
-            - $ref: '#/components/schemas/InterleavedContentItem'
-            - type: array
-              items:
-                $ref: '#/components/schemas/InterleavedContentItem'
-            - $ref: '#/components/schemas/URL'
-          description: The content of the document.
-        mime_type:
-          type: string
-          description: The MIME type of the document.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the document.
-      additionalProperties: false
-      required:
-        - document_id
-        - content
-        - metadata
-      title: RAGDocument
-      description: >-
-        A document to be used for document ingestion in the RAG Tool.
-    InsertRequest:
-      type: object
-      properties:
-        documents:
-          type: array
-          items:
-            $ref: '#/components/schemas/RAGDocument'
-          description: >-
-            List of documents to index in the RAG system
-        vector_db_id:
-          type: string
-          description: >-
-            ID of the vector database to store the document embeddings
-        chunk_size_in_tokens:
-          type: integer
-          description: >-
-            (Optional) Size in tokens for document chunking during indexing
-      additionalProperties: false
-      required:
-        - documents
-        - vector_db_id
-        - chunk_size_in_tokens
-      title: InsertRequest
-    DefaultRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: default
-          default: default
-          description: >-
-            Type of query generator, always 'default'
-        separator:
-          type: string
-          default: ' '
-          description: >-
-            String separator used to join query terms
-      additionalProperties: false
-      required:
-        - type
-        - separator
-      title: DefaultRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the default RAG query generator.
-    LLMRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: llm
-          default: llm
-          description: Type of query generator, always 'llm'
-        model:
-          type: string
-          description: >-
-            Name of the language model to use for query generation
-        template:
-          type: string
-          description: >-
-            Template string for formatting the query generation prompt
-      additionalProperties: false
-      required:
-        - type
-        - model
-        - template
-      title: LLMRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the LLM-based RAG query generator.
-    RAGQueryConfig:
-      type: object
-      properties:
-        query_generator_config:
-          oneOf:
-            - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-            - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          discriminator:
-            propertyName: type
-            mapping:
-              default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-              llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          description: Configuration for the query generator.
-        max_tokens_in_context:
-          type: integer
-          default: 4096
-          description: Maximum number of tokens in the context.
-        max_chunks:
-          type: integer
-          default: 5
-          description: Maximum number of chunks to retrieve.
-        chunk_template:
-          type: string
-          default: >
-            Result {index}
-
-            Content: {chunk.content}
-
-            Metadata: {metadata}
-          description: >-
-            Template for formatting each retrieved chunk in the context. Available
-            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
-            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
-            {chunk.content}\nMetadata: {metadata}\n"
-        mode:
-          $ref: '#/components/schemas/RAGSearchMode'
-          default: vector
-          description: >-
-            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
-            "vector".
-        ranker:
-          $ref: '#/components/schemas/Ranker'
-          description: >-
-            Configuration for the ranker to use in hybrid search. Defaults to RRF
-            ranker.
-      additionalProperties: false
-      required:
-        - query_generator_config
-        - max_tokens_in_context
-        - max_chunks
-        - chunk_template
-      title: RAGQueryConfig
-      description: >-
-        Configuration for the RAG query generation.
-    RAGSearchMode:
-      type: string
-      enum:
-        - vector
-        - keyword
-        - hybrid
-      title: RAGSearchMode
-      description: >-
-        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
-        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
-        - HYBRID: Combines both vector and keyword search for better results
-    RRFRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: rrf
-          default: rrf
-          description: The type of ranker, always "rrf"
-        impact_factor:
-          type: number
-          default: 60.0
-          description: >-
-            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
-            results. Must be greater than 0
-      additionalProperties: false
-      required:
-        - type
-        - impact_factor
-      title: RRFRanker
-      description: >-
-        Reciprocal Rank Fusion (RRF) ranker configuration.
-    Ranker:
-      oneOf:
-        - $ref: '#/components/schemas/RRFRanker'
-        - $ref: '#/components/schemas/WeightedRanker'
-      discriminator:
-        propertyName: type
-        mapping:
-          rrf: '#/components/schemas/RRFRanker'
-          weighted: '#/components/schemas/WeightedRanker'
-    WeightedRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: weighted
-          default: weighted
-          description: The type of ranker, always "weighted"
-        alpha:
-          type: number
-          default: 0.5
-          description: >-
-            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
-            only use vector scores, values in between blend both scores.
-      additionalProperties: false
-      required:
-        - type
-        - alpha
-      title: WeightedRanker
-      description: >-
-        Weighted ranker configuration that combines vector and keyword scores.
-    QueryRequest:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The query content to search for in the indexed documents
-        vector_db_ids:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of vector database IDs to search within
-        query_config:
-          $ref: '#/components/schemas/RAGQueryConfig'
-          description: >-
-            (Optional) Configuration parameters for the query operation
-      additionalProperties: false
-      required:
-        - content
-        - vector_db_ids
-      title: QueryRequest
-    RAGQueryResult:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The retrieved content from the query
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Additional metadata about the query result
-      additionalProperties: false
-      required:
-        - metadata
-      title: RAGQueryResult
-      description: >-
-        Result of a RAG query containing retrieved content and metadata.
     ToolGroup:
       type: object
       properties:
@@ -10203,7 +9872,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -11325,7 +10994,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -12652,7 +12321,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
index b7134b3e1..666850976 100644
--- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -21,7 +21,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
 | inference | `inline::meta-reference` |
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::model-context-protocol` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
diff --git a/docs/docs/distributions/self_hosted_distro/nvidia.md b/docs/docs/distributions/self_hosted_distro/nvidia.md
index 4a7d99ff5..b1de9ddb8 100644
--- a/docs/docs/distributions/self_hosted_distro/nvidia.md
+++ b/docs/docs/distributions/self_hosted_distro/nvidia.md
@@ -16,7 +16,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
 | post_training | `remote::nvidia` |
 | safety | `remote::nvidia` |
 | scoring | `inline::basic` |
-| tool_runtime | `inline::rag-runtime` |
+| tool_runtime |  |
 | vector_io | `inline::faiss` |
 
 
diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
index 98a372250..459498a59 100644
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@@ -28,7 +28,7 @@ description: |
   #### Empirical Example
 
   Consider the histogram below in which 10,000 randomly generated strings were inserted
-  in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
+  in batches of 100 into both Faiss and sqlite-vec.
 
   ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
   :alt: Comparison of SQLite-Vec and Faiss write times
@@ -233,7 +233,7 @@ Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, i
 #### Empirical Example
 
 Consider the histogram below in which 10,000 randomly generated strings were inserted
-in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
+in batches of 100 into both Faiss and sqlite-vec.
 
 ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
 :alt: Comparison of SQLite-Vec and Faiss write times
diff --git a/docs/docs/references/llama_stack_client_cli_reference.md b/docs/docs/references/llama_stack_client_cli_reference.md
index 9bb514a2d..a4321938a 100644
--- a/docs/docs/references/llama_stack_client_cli_reference.md
+++ b/docs/docs/references/llama_stack_client_cli_reference.md
@@ -32,7 +32,6 @@ Commands:
   scoring_functions  Manage scoring functions.
   shields            Manage safety shield services.
   toolgroups         Manage available tool groups.
-  vector_dbs         Manage vector databases.
 ```
 
 ### `llama-stack-client configure`
@@ -211,53 +210,6 @@ Unregister a model from distribution endpoint
 llama-stack-client models unregister <model_id>
 ```
 
-## Vector DB Management
-Manage vector databases.
-
-
-### `llama-stack-client vector_dbs list`
-Show available vector dbs on distribution endpoint
-```bash
-llama-stack-client vector_dbs list
-```
-```
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ identifier               ┃ provider_id ┃ provider_resource_id     ┃ vector_db_type ┃ params                            ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ my_demo_vector_db        │ faiss       │ my_demo_vector_db        │                │ embedding_dimension: 768          │
-│                          │             │                          │                │ embedding_model: nomic-embed-text-v1.5 │
-│                          │             │                          │                │ type: vector_db                   │
-│                          │             │                          │                │                                   │
-└──────────────────────────┴─────────────┴──────────────────────────┴────────────────┴───────────────────────────────────┘
-```
-
-### `llama-stack-client vector_dbs register`
-Create a new vector db
-```bash
-llama-stack-client vector_dbs register <vector-db-id> [--provider-id <provider-id>] [--provider-vector-db-id <provider-vector-db-id>] [--embedding-model <embedding-model>] [--embedding-dimension <embedding-dimension>]
-```
-
-
-Required arguments:
-- `VECTOR_DB_ID`: Vector DB ID
-
-Optional arguments:
-- `--provider-id`: Provider ID for the vector db
-- `--provider-vector-db-id`: Provider's vector db ID
-- `--embedding-model`: Embedding model to use. Default: `nomic-embed-text-v1.5`
-- `--embedding-dimension`: Dimension of embeddings. Default: 768
-
-### `llama-stack-client vector_dbs unregister`
-Delete a vector db
-```bash
-llama-stack-client vector_dbs unregister <vector-db-id>
-```
-
-
-Required arguments:
-- `VECTOR_DB_ID`: Vector DB ID
-
-
 ## Shield Management
 Manage safety shield services.
 ### `llama-stack-client shields list`
diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
index 2970d7e53..e5f33f13d 100644
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@@ -196,16 +196,10 @@ def _get_endpoint_functions(
 def _get_defining_class(member_fn: str, derived_cls: type) -> type:
     "Find the class in which a member function is first defined in a class inheritance hierarchy."
 
-    # This import must be dynamic here
-    from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime
-
     # iterate in reverse member resolution order to find most specific class first
     for cls in reversed(inspect.getmro(derived_cls)):
         for name, _ in inspect.getmembers(cls, inspect.isfunction):
             if name == member_fn:
-                # HACK ALERT
-                if cls == RAGToolRuntime:
-                    return ToolRuntime
                 return cls
 
     raise ValidationError(
diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html
index 98ed50c4f..d920317cf 100644
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@@ -5547,7 +5547,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
@@ -5798,7 +5798,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index 99c8dd03e..66b2caeca 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -4114,7 +4114,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -4303,7 +4303,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
diff --git a/docs/static/experimental-llama-stack-spec.html b/docs/static/experimental-llama-stack-spec.html
index 7d572f89f..ab474180e 100644
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
@@ -1850,7 +1850,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
@@ -3983,7 +3983,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index fee20814c..dd9e43cc5 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -1320,7 +1320,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -2927,7 +2927,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 1091a1cb6..7dfb2ed13 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -2624,89 +2624,6 @@
                 "deprecated": false
             }
         },
-        "/v1/tool-runtime/rag-tool/insert": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "ToolRuntime"
-                ],
-                "summary": "Index documents so they can be used by the RAG system.",
-                "description": "Index documents so they can be used by the RAG system.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/InsertRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1/tool-runtime/rag-tool/query": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "RAGQueryResult containing the retrieved content and metadata",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/RAGQueryResult"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "ToolRuntime"
-                ],
-                "summary": "Query the RAG system for context; typically invoked by the agent.",
-                "description": "Query the RAG system for context; typically invoked by the agent.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QueryRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
         "/v1/toolgroups": {
             "get": {
                 "responses": {
@@ -6800,7 +6717,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
@@ -10205,7 +10122,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
@@ -10687,7 +10604,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
@@ -11383,346 +11300,6 @@
                 "title": "ListToolDefsResponse",
                 "description": "Response containing a list of tool definitions."
             },
-            "RAGDocument": {
-                "type": "object",
-                "properties": {
-                    "document_id": {
-                        "type": "string",
-                        "description": "The unique identifier for the document."
-                    },
-                    "content": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "$ref": "#/components/schemas/InterleavedContentItem"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "$ref": "#/components/schemas/InterleavedContentItem"
-                                }
-                            },
-                            {
-                                "$ref": "#/components/schemas/URL"
-                            }
-                        ],
-                        "description": "The content of the document."
-                    },
-                    "mime_type": {
-                        "type": "string",
-                        "description": "The MIME type of the document."
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Additional metadata for the document."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "document_id",
-                    "content",
-                    "metadata"
-                ],
-                "title": "RAGDocument",
-                "description": "A document to be used for document ingestion in the RAG Tool."
-            },
-            "InsertRequest": {
-                "type": "object",
-                "properties": {
-                    "documents": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/RAGDocument"
-                        },
-                        "description": "List of documents to index in the RAG system"
-                    },
-                    "vector_db_id": {
-                        "type": "string",
-                        "description": "ID of the vector database to store the document embeddings"
-                    },
-                    "chunk_size_in_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) Size in tokens for document chunking during indexing"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "documents",
-                    "vector_db_id",
-                    "chunk_size_in_tokens"
-                ],
-                "title": "InsertRequest"
-            },
-            "DefaultRAGQueryGeneratorConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "default",
-                        "default": "default",
-                        "description": "Type of query generator, always 'default'"
-                    },
-                    "separator": {
-                        "type": "string",
-                        "default": " ",
-                        "description": "String separator used to join query terms"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "separator"
-                ],
-                "title": "DefaultRAGQueryGeneratorConfig",
-                "description": "Configuration for the default RAG query generator."
-            },
-            "LLMRAGQueryGeneratorConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "llm",
-                        "default": "llm",
-                        "description": "Type of query generator, always 'llm'"
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "Name of the language model to use for query generation"
-                    },
-                    "template": {
-                        "type": "string",
-                        "description": "Template string for formatting the query generation prompt"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "model",
-                    "template"
-                ],
-                "title": "LLMRAGQueryGeneratorConfig",
-                "description": "Configuration for the LLM-based RAG query generator."
-            },
-            "RAGQueryConfig": {
-                "type": "object",
-                "properties": {
-                    "query_generator_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
-                            }
-                        ],
-                        "discriminator": {
-                            "propertyName": "type",
-                            "mapping": {
-                                "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig",
-                                "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig"
-                            }
-                        },
-                        "description": "Configuration for the query generator."
-                    },
-                    "max_tokens_in_context": {
-                        "type": "integer",
-                        "default": 4096,
-                        "description": "Maximum number of tokens in the context."
-                    },
-                    "max_chunks": {
-                        "type": "integer",
-                        "default": 5,
-                        "description": "Maximum number of chunks to retrieve."
-                    },
-                    "chunk_template": {
-                        "type": "string",
-                        "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
-                        "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
-                    },
-                    "mode": {
-                        "$ref": "#/components/schemas/RAGSearchMode",
-                        "default": "vector",
-                        "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
-                    },
-                    "ranker": {
-                        "$ref": "#/components/schemas/Ranker",
-                        "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "query_generator_config",
-                    "max_tokens_in_context",
-                    "max_chunks",
-                    "chunk_template"
-                ],
-                "title": "RAGQueryConfig",
-                "description": "Configuration for the RAG query generation."
-            },
-            "RAGSearchMode": {
-                "type": "string",
-                "enum": [
-                    "vector",
-                    "keyword",
-                    "hybrid"
-                ],
-                "title": "RAGSearchMode",
-                "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results"
-            },
-            "RRFRanker": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "rrf",
-                        "default": "rrf",
-                        "description": "The type of ranker, always \"rrf\""
-                    },
-                    "impact_factor": {
-                        "type": "number",
-                        "default": 60.0,
-                        "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "impact_factor"
-                ],
-                "title": "RRFRanker",
-                "description": "Reciprocal Rank Fusion (RRF) ranker configuration."
-            },
-            "Ranker": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/RRFRanker"
-                    },
-                    {
-                        "$ref": "#/components/schemas/WeightedRanker"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "rrf": "#/components/schemas/RRFRanker",
-                        "weighted": "#/components/schemas/WeightedRanker"
-                    }
-                }
-            },
-            "WeightedRanker": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "weighted",
-                        "default": "weighted",
-                        "description": "The type of ranker, always \"weighted\""
-                    },
-                    "alpha": {
-                        "type": "number",
-                        "default": 0.5,
-                        "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "alpha"
-                ],
-                "title": "WeightedRanker",
-                "description": "Weighted ranker configuration that combines vector and keyword scores."
-            },
-            "QueryRequest": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The query content to search for in the indexed documents"
-                    },
-                    "vector_db_ids": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "List of vector database IDs to search within"
-                    },
-                    "query_config": {
-                        "$ref": "#/components/schemas/RAGQueryConfig",
-                        "description": "(Optional) Configuration parameters for the query operation"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "content",
-                    "vector_db_ids"
-                ],
-                "title": "QueryRequest"
-            },
-            "RAGQueryResult": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "(Optional) The retrieved content from the query"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Additional metadata about the query result"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "metadata"
-                ],
-                "title": "RAGQueryResult",
-                "description": "Result of a RAG query containing retrieved content and metadata."
-            },
             "ToolGroup": {
                 "type": "object",
                 "properties": {
@@ -11740,7 +11317,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 6c3702374..1b0fefe55 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -2036,69 +2036,6 @@ paths:
           schema:
             $ref: '#/components/schemas/URL'
       deprecated: false
-  /v1/tool-runtime/rag-tool/insert:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Index documents so they can be used by the RAG system.
-      description: >-
-        Index documents so they can be used by the RAG system.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InsertRequest'
-        required: true
-      deprecated: false
-  /v1/tool-runtime/rag-tool/query:
-    post:
-      responses:
-        '200':
-          description: >-
-            RAGQueryResult containing the retrieved content and metadata
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RAGQueryResult'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Query the RAG system for context; typically invoked by the agent.
-      description: >-
-        Query the RAG system for context; typically invoked by the agent.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryRequest'
-        required: true
-      deprecated: false
   /v1/toolgroups:
     get:
       responses:
@@ -5227,7 +5164,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -7919,7 +7856,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -8227,7 +8164,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -8708,274 +8645,6 @@ components:
       title: ListToolDefsResponse
       description: >-
         Response containing a list of tool definitions.
-    RAGDocument:
-      type: object
-      properties:
-        document_id:
-          type: string
-          description: The unique identifier for the document.
-        content:
-          oneOf:
-            - type: string
-            - $ref: '#/components/schemas/InterleavedContentItem'
-            - type: array
-              items:
-                $ref: '#/components/schemas/InterleavedContentItem'
-            - $ref: '#/components/schemas/URL'
-          description: The content of the document.
-        mime_type:
-          type: string
-          description: The MIME type of the document.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the document.
-      additionalProperties: false
-      required:
-        - document_id
-        - content
-        - metadata
-      title: RAGDocument
-      description: >-
-        A document to be used for document ingestion in the RAG Tool.
-    InsertRequest:
-      type: object
-      properties:
-        documents:
-          type: array
-          items:
-            $ref: '#/components/schemas/RAGDocument'
-          description: >-
-            List of documents to index in the RAG system
-        vector_db_id:
-          type: string
-          description: >-
-            ID of the vector database to store the document embeddings
-        chunk_size_in_tokens:
-          type: integer
-          description: >-
-            (Optional) Size in tokens for document chunking during indexing
-      additionalProperties: false
-      required:
-        - documents
-        - vector_db_id
-        - chunk_size_in_tokens
-      title: InsertRequest
-    DefaultRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: default
-          default: default
-          description: >-
-            Type of query generator, always 'default'
-        separator:
-          type: string
-          default: ' '
-          description: >-
-            String separator used to join query terms
-      additionalProperties: false
-      required:
-        - type
-        - separator
-      title: DefaultRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the default RAG query generator.
-    LLMRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: llm
-          default: llm
-          description: Type of query generator, always 'llm'
-        model:
-          type: string
-          description: >-
-            Name of the language model to use for query generation
-        template:
-          type: string
-          description: >-
-            Template string for formatting the query generation prompt
-      additionalProperties: false
-      required:
-        - type
-        - model
-        - template
-      title: LLMRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the LLM-based RAG query generator.
-    RAGQueryConfig:
-      type: object
-      properties:
-        query_generator_config:
-          oneOf:
-            - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-            - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          discriminator:
-            propertyName: type
-            mapping:
-              default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-              llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          description: Configuration for the query generator.
-        max_tokens_in_context:
-          type: integer
-          default: 4096
-          description: Maximum number of tokens in the context.
-        max_chunks:
-          type: integer
-          default: 5
-          description: Maximum number of chunks to retrieve.
-        chunk_template:
-          type: string
-          default: >
-            Result {index}
-
-            Content: {chunk.content}
-
-            Metadata: {metadata}
-          description: >-
-            Template for formatting each retrieved chunk in the context. Available
-            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
-            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
-            {chunk.content}\nMetadata: {metadata}\n"
-        mode:
-          $ref: '#/components/schemas/RAGSearchMode'
-          default: vector
-          description: >-
-            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
-            "vector".
-        ranker:
-          $ref: '#/components/schemas/Ranker'
-          description: >-
-            Configuration for the ranker to use in hybrid search. Defaults to RRF
-            ranker.
-      additionalProperties: false
-      required:
-        - query_generator_config
-        - max_tokens_in_context
-        - max_chunks
-        - chunk_template
-      title: RAGQueryConfig
-      description: >-
-        Configuration for the RAG query generation.
-    RAGSearchMode:
-      type: string
-      enum:
-        - vector
-        - keyword
-        - hybrid
-      title: RAGSearchMode
-      description: >-
-        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
-        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
-        - HYBRID: Combines both vector and keyword search for better results
-    RRFRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: rrf
-          default: rrf
-          description: The type of ranker, always "rrf"
-        impact_factor:
-          type: number
-          default: 60.0
-          description: >-
-            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
-            results. Must be greater than 0
-      additionalProperties: false
-      required:
-        - type
-        - impact_factor
-      title: RRFRanker
-      description: >-
-        Reciprocal Rank Fusion (RRF) ranker configuration.
-    Ranker:
-      oneOf:
-        - $ref: '#/components/schemas/RRFRanker'
-        - $ref: '#/components/schemas/WeightedRanker'
-      discriminator:
-        propertyName: type
-        mapping:
-          rrf: '#/components/schemas/RRFRanker'
-          weighted: '#/components/schemas/WeightedRanker'
-    WeightedRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: weighted
-          default: weighted
-          description: The type of ranker, always "weighted"
-        alpha:
-          type: number
-          default: 0.5
-          description: >-
-            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
-            only use vector scores, values in between blend both scores.
-      additionalProperties: false
-      required:
-        - type
-        - alpha
-      title: WeightedRanker
-      description: >-
-        Weighted ranker configuration that combines vector and keyword scores.
-    QueryRequest:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The query content to search for in the indexed documents
-        vector_db_ids:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of vector database IDs to search within
-        query_config:
-          $ref: '#/components/schemas/RAGQueryConfig'
-          description: >-
-            (Optional) Configuration parameters for the query operation
-      additionalProperties: false
-      required:
-        - content
-        - vector_db_ids
-      title: QueryRequest
-    RAGQueryResult:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The retrieved content from the query
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Additional metadata about the query result
-      additionalProperties: false
-      required:
-        - metadata
-      title: RAGQueryResult
-      description: >-
-        Result of a RAG query containing retrieved content and metadata.
     ToolGroup:
       type: object
       properties:
@@ -8990,7 +8659,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html
index ee0a265d3..7930b28e6 100644
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@@ -2624,89 +2624,6 @@
                 "deprecated": false
             }
         },
-        "/v1/tool-runtime/rag-tool/insert": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "ToolRuntime"
-                ],
-                "summary": "Index documents so they can be used by the RAG system.",
-                "description": "Index documents so they can be used by the RAG system.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/InsertRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1/tool-runtime/rag-tool/query": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "RAGQueryResult containing the retrieved content and metadata",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/RAGQueryResult"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "ToolRuntime"
-                ],
-                "summary": "Query the RAG system for context; typically invoked by the agent.",
-                "description": "Query the RAG system for context; typically invoked by the agent.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QueryRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
         "/v1/toolgroups": {
             "get": {
                 "responses": {
@@ -8472,7 +8389,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
@@ -11877,7 +11794,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
@@ -12359,7 +12276,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
@@ -13055,346 +12972,6 @@
                 "title": "ListToolDefsResponse",
                 "description": "Response containing a list of tool definitions."
             },
-            "RAGDocument": {
-                "type": "object",
-                "properties": {
-                    "document_id": {
-                        "type": "string",
-                        "description": "The unique identifier for the document."
-                    },
-                    "content": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "$ref": "#/components/schemas/InterleavedContentItem"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "$ref": "#/components/schemas/InterleavedContentItem"
-                                }
-                            },
-                            {
-                                "$ref": "#/components/schemas/URL"
-                            }
-                        ],
-                        "description": "The content of the document."
-                    },
-                    "mime_type": {
-                        "type": "string",
-                        "description": "The MIME type of the document."
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Additional metadata for the document."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "document_id",
-                    "content",
-                    "metadata"
-                ],
-                "title": "RAGDocument",
-                "description": "A document to be used for document ingestion in the RAG Tool."
-            },
-            "InsertRequest": {
-                "type": "object",
-                "properties": {
-                    "documents": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/RAGDocument"
-                        },
-                        "description": "List of documents to index in the RAG system"
-                    },
-                    "vector_db_id": {
-                        "type": "string",
-                        "description": "ID of the vector database to store the document embeddings"
-                    },
-                    "chunk_size_in_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) Size in tokens for document chunking during indexing"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "documents",
-                    "vector_db_id",
-                    "chunk_size_in_tokens"
-                ],
-                "title": "InsertRequest"
-            },
-            "DefaultRAGQueryGeneratorConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "default",
-                        "default": "default",
-                        "description": "Type of query generator, always 'default'"
-                    },
-                    "separator": {
-                        "type": "string",
-                        "default": " ",
-                        "description": "String separator used to join query terms"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "separator"
-                ],
-                "title": "DefaultRAGQueryGeneratorConfig",
-                "description": "Configuration for the default RAG query generator."
-            },
-            "LLMRAGQueryGeneratorConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "llm",
-                        "default": "llm",
-                        "description": "Type of query generator, always 'llm'"
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "Name of the language model to use for query generation"
-                    },
-                    "template": {
-                        "type": "string",
-                        "description": "Template string for formatting the query generation prompt"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "model",
-                    "template"
-                ],
-                "title": "LLMRAGQueryGeneratorConfig",
-                "description": "Configuration for the LLM-based RAG query generator."
-            },
-            "RAGQueryConfig": {
-                "type": "object",
-                "properties": {
-                    "query_generator_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
-                            }
-                        ],
-                        "discriminator": {
-                            "propertyName": "type",
-                            "mapping": {
-                                "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig",
-                                "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig"
-                            }
-                        },
-                        "description": "Configuration for the query generator."
-                    },
-                    "max_tokens_in_context": {
-                        "type": "integer",
-                        "default": 4096,
-                        "description": "Maximum number of tokens in the context."
-                    },
-                    "max_chunks": {
-                        "type": "integer",
-                        "default": 5,
-                        "description": "Maximum number of chunks to retrieve."
-                    },
-                    "chunk_template": {
-                        "type": "string",
-                        "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
-                        "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
-                    },
-                    "mode": {
-                        "$ref": "#/components/schemas/RAGSearchMode",
-                        "default": "vector",
-                        "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
-                    },
-                    "ranker": {
-                        "$ref": "#/components/schemas/Ranker",
-                        "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "query_generator_config",
-                    "max_tokens_in_context",
-                    "max_chunks",
-                    "chunk_template"
-                ],
-                "title": "RAGQueryConfig",
-                "description": "Configuration for the RAG query generation."
-            },
-            "RAGSearchMode": {
-                "type": "string",
-                "enum": [
-                    "vector",
-                    "keyword",
-                    "hybrid"
-                ],
-                "title": "RAGSearchMode",
-                "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results"
-            },
-            "RRFRanker": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "rrf",
-                        "default": "rrf",
-                        "description": "The type of ranker, always \"rrf\""
-                    },
-                    "impact_factor": {
-                        "type": "number",
-                        "default": 60.0,
-                        "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "impact_factor"
-                ],
-                "title": "RRFRanker",
-                "description": "Reciprocal Rank Fusion (RRF) ranker configuration."
-            },
-            "Ranker": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/RRFRanker"
-                    },
-                    {
-                        "$ref": "#/components/schemas/WeightedRanker"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "rrf": "#/components/schemas/RRFRanker",
-                        "weighted": "#/components/schemas/WeightedRanker"
-                    }
-                }
-            },
-            "WeightedRanker": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "weighted",
-                        "default": "weighted",
-                        "description": "The type of ranker, always \"weighted\""
-                    },
-                    "alpha": {
-                        "type": "number",
-                        "default": 0.5,
-                        "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "alpha"
-                ],
-                "title": "WeightedRanker",
-                "description": "Weighted ranker configuration that combines vector and keyword scores."
-            },
-            "QueryRequest": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The query content to search for in the indexed documents"
-                    },
-                    "vector_db_ids": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "List of vector database IDs to search within"
-                    },
-                    "query_config": {
-                        "$ref": "#/components/schemas/RAGQueryConfig",
-                        "description": "(Optional) Configuration parameters for the query operation"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "content",
-                    "vector_db_ids"
-                ],
-                "title": "QueryRequest"
-            },
-            "RAGQueryResult": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "(Optional) The retrieved content from the query"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Additional metadata about the query result"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "metadata"
-                ],
-                "title": "RAGQueryResult",
-                "description": "Result of a RAG query containing retrieved content and metadata."
-            },
             "ToolGroup": {
                 "type": "object",
                 "properties": {
@@ -13412,7 +12989,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
@@ -14959,7 +14536,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
@@ -16704,7 +16281,7 @@
                         "enum": [
                             "model",
                             "shield",
-                            "vector_db",
+                            "vector_store",
                             "dataset",
                             "scoring_function",
                             "benchmark",
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index eff01931f..98a309f12 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -2039,69 +2039,6 @@ paths:
           schema:
             $ref: '#/components/schemas/URL'
       deprecated: false
-  /v1/tool-runtime/rag-tool/insert:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Index documents so they can be used by the RAG system.
-      description: >-
-        Index documents so they can be used by the RAG system.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InsertRequest'
-        required: true
-      deprecated: false
-  /v1/tool-runtime/rag-tool/query:
-    post:
-      responses:
-        '200':
-          description: >-
-            RAGQueryResult containing the retrieved content and metadata
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RAGQueryResult'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Query the RAG system for context; typically invoked by the agent.
-      description: >-
-        Query the RAG system for context; typically invoked by the agent.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryRequest'
-        required: true
-      deprecated: false
   /v1/toolgroups:
     get:
       responses:
@@ -6440,7 +6377,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -9132,7 +9069,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -9440,7 +9377,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -9921,274 +9858,6 @@ components:
       title: ListToolDefsResponse
       description: >-
         Response containing a list of tool definitions.
-    RAGDocument:
-      type: object
-      properties:
-        document_id:
-          type: string
-          description: The unique identifier for the document.
-        content:
-          oneOf:
-            - type: string
-            - $ref: '#/components/schemas/InterleavedContentItem'
-            - type: array
-              items:
-                $ref: '#/components/schemas/InterleavedContentItem'
-            - $ref: '#/components/schemas/URL'
-          description: The content of the document.
-        mime_type:
-          type: string
-          description: The MIME type of the document.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the document.
-      additionalProperties: false
-      required:
-        - document_id
-        - content
-        - metadata
-      title: RAGDocument
-      description: >-
-        A document to be used for document ingestion in the RAG Tool.
-    InsertRequest:
-      type: object
-      properties:
-        documents:
-          type: array
-          items:
-            $ref: '#/components/schemas/RAGDocument'
-          description: >-
-            List of documents to index in the RAG system
-        vector_db_id:
-          type: string
-          description: >-
-            ID of the vector database to store the document embeddings
-        chunk_size_in_tokens:
-          type: integer
-          description: >-
-            (Optional) Size in tokens for document chunking during indexing
-      additionalProperties: false
-      required:
-        - documents
-        - vector_db_id
-        - chunk_size_in_tokens
-      title: InsertRequest
-    DefaultRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: default
-          default: default
-          description: >-
-            Type of query generator, always 'default'
-        separator:
-          type: string
-          default: ' '
-          description: >-
-            String separator used to join query terms
-      additionalProperties: false
-      required:
-        - type
-        - separator
-      title: DefaultRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the default RAG query generator.
-    LLMRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: llm
-          default: llm
-          description: Type of query generator, always 'llm'
-        model:
-          type: string
-          description: >-
-            Name of the language model to use for query generation
-        template:
-          type: string
-          description: >-
-            Template string for formatting the query generation prompt
-      additionalProperties: false
-      required:
-        - type
-        - model
-        - template
-      title: LLMRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the LLM-based RAG query generator.
-    RAGQueryConfig:
-      type: object
-      properties:
-        query_generator_config:
-          oneOf:
-            - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-            - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          discriminator:
-            propertyName: type
-            mapping:
-              default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-              llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          description: Configuration for the query generator.
-        max_tokens_in_context:
-          type: integer
-          default: 4096
-          description: Maximum number of tokens in the context.
-        max_chunks:
-          type: integer
-          default: 5
-          description: Maximum number of chunks to retrieve.
-        chunk_template:
-          type: string
-          default: >
-            Result {index}
-
-            Content: {chunk.content}
-
-            Metadata: {metadata}
-          description: >-
-            Template for formatting each retrieved chunk in the context. Available
-            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
-            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
-            {chunk.content}\nMetadata: {metadata}\n"
-        mode:
-          $ref: '#/components/schemas/RAGSearchMode'
-          default: vector
-          description: >-
-            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
-            "vector".
-        ranker:
-          $ref: '#/components/schemas/Ranker'
-          description: >-
-            Configuration for the ranker to use in hybrid search. Defaults to RRF
-            ranker.
-      additionalProperties: false
-      required:
-        - query_generator_config
-        - max_tokens_in_context
-        - max_chunks
-        - chunk_template
-      title: RAGQueryConfig
-      description: >-
-        Configuration for the RAG query generation.
-    RAGSearchMode:
-      type: string
-      enum:
-        - vector
-        - keyword
-        - hybrid
-      title: RAGSearchMode
-      description: >-
-        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
-        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
-        - HYBRID: Combines both vector and keyword search for better results
-    RRFRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: rrf
-          default: rrf
-          description: The type of ranker, always "rrf"
-        impact_factor:
-          type: number
-          default: 60.0
-          description: >-
-            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
-            results. Must be greater than 0
-      additionalProperties: false
-      required:
-        - type
-        - impact_factor
-      title: RRFRanker
-      description: >-
-        Reciprocal Rank Fusion (RRF) ranker configuration.
-    Ranker:
-      oneOf:
-        - $ref: '#/components/schemas/RRFRanker'
-        - $ref: '#/components/schemas/WeightedRanker'
-      discriminator:
-        propertyName: type
-        mapping:
-          rrf: '#/components/schemas/RRFRanker'
-          weighted: '#/components/schemas/WeightedRanker'
-    WeightedRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: weighted
-          default: weighted
-          description: The type of ranker, always "weighted"
-        alpha:
-          type: number
-          default: 0.5
-          description: >-
-            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
-            only use vector scores, values in between blend both scores.
-      additionalProperties: false
-      required:
-        - type
-        - alpha
-      title: WeightedRanker
-      description: >-
-        Weighted ranker configuration that combines vector and keyword scores.
-    QueryRequest:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The query content to search for in the indexed documents
-        vector_db_ids:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of vector database IDs to search within
-        query_config:
-          $ref: '#/components/schemas/RAGQueryConfig'
-          description: >-
-            (Optional) Configuration parameters for the query operation
-      additionalProperties: false
-      required:
-        - content
-        - vector_db_ids
-      title: QueryRequest
-    RAGQueryResult:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The retrieved content from the query
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Additional metadata about the query result
-      additionalProperties: false
-      required:
-        - metadata
-      title: RAGQueryResult
-      description: >-
-        Result of a RAG query containing retrieved content and metadata.
     ToolGroup:
       type: object
       properties:
@@ -10203,7 +9872,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -11325,7 +10994,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
@@ -12652,7 +12321,7 @@ components:
           enum:
             - model
             - shield
-            - vector_db
+            - vector_store
             - dataset
             - scoring_function
             - benchmark
diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py
index 5777f3d04..948ec615f 100644
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@@ -121,7 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
 
     models = "models"
     shields = "shields"
-    vector_dbs = "vector_dbs"  # only used for routing
+    vector_stores = "vector_stores"  # only used for routing table
     datasets = "datasets"
     scoring_functions = "scoring_functions"
     benchmarks = "benchmarks"
diff --git a/llama_stack/apis/resource.py b/llama_stack/apis/resource.py
index 7c4130f7d..dafdb28b0 100644
--- a/llama_stack/apis/resource.py
+++ b/llama_stack/apis/resource.py
@@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
 class ResourceType(StrEnum):
     model = "model"
     shield = "shield"
-    vector_db = "vector_db"
+    vector_store = "vector_store"
     dataset = "dataset"
     scoring_function = "scoring_function"
     benchmark = "benchmark"
@@ -34,4 +34,4 @@ class Resource(BaseModel):
 
     provider_id: str = Field(description="ID of the provider that owns this resource")
 
-    type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)")
+    type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)")
diff --git a/llama_stack/apis/tools/__init__.py b/llama_stack/apis/tools/__init__.py
index b25310ecf..2908d1c62 100644
--- a/llama_stack/apis/tools/__init__.py
+++ b/llama_stack/apis/tools/__init__.py
@@ -4,5 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .rag_tool import *
 from .tools import *
diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py
deleted file mode 100644
index ed7847e23..000000000
--- a/llama_stack/apis/tools/rag_tool.py
+++ /dev/null
@@ -1,218 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from enum import Enum, StrEnum
-from typing import Annotated, Any, Literal, Protocol
-
-from pydantic import BaseModel, Field, field_validator
-from typing_extensions import runtime_checkable
-
-from llama_stack.apis.common.content_types import URL, InterleavedContent
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
-
-
-@json_schema_type
-class RRFRanker(BaseModel):
-    """
-    Reciprocal Rank Fusion (RRF) ranker configuration.
-
-    :param type: The type of ranker, always "rrf"
-    :param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
-                         Must be greater than 0
-    """
-
-    type: Literal["rrf"] = "rrf"
-    impact_factor: float = Field(default=60.0, gt=0.0)  # default of 60 for optimal performance
-
-
-@json_schema_type
-class WeightedRanker(BaseModel):
-    """
-    Weighted ranker configuration that combines vector and keyword scores.
-
-    :param type: The type of ranker, always "weighted"
-    :param alpha: Weight factor between 0 and 1.
-                 0 means only use keyword scores,
-                 1 means only use vector scores,
-                 values in between blend both scores.
-    """
-
-    type: Literal["weighted"] = "weighted"
-    alpha: float = Field(
-        default=0.5,
-        ge=0.0,
-        le=1.0,
-        description="Weight factor between 0 and 1. 0 means only keyword scores, 1 means only vector scores.",
-    )
-
-
-Ranker = Annotated[
-    RRFRanker | WeightedRanker,
-    Field(discriminator="type"),
-]
-register_schema(Ranker, name="Ranker")
-
-
-@json_schema_type
-class RAGDocument(BaseModel):
-    """
-    A document to be used for document ingestion in the RAG Tool.
-
-    :param document_id: The unique identifier for the document.
-    :param content: The content of the document.
-    :param mime_type: The MIME type of the document.
-    :param metadata: Additional metadata for the document.
-    """
-
-    document_id: str
-    content: InterleavedContent | URL
-    mime_type: str | None = None
-    metadata: dict[str, Any] = Field(default_factory=dict)
-
-
-@json_schema_type
-class RAGQueryResult(BaseModel):
-    """Result of a RAG query containing retrieved content and metadata.
-
-    :param content: (Optional) The retrieved content from the query
-    :param metadata: Additional metadata about the query result
-    """
-
-    content: InterleavedContent | None = None
-    metadata: dict[str, Any] = Field(default_factory=dict)
-
-
-@json_schema_type
-class RAGQueryGenerator(Enum):
-    """Types of query generators for RAG systems.
-
-    :cvar default: Default query generator using simple text processing
-    :cvar llm: LLM-based query generator for enhanced query understanding
-    :cvar custom: Custom query generator implementation
-    """
-
-    default = "default"
-    llm = "llm"
-    custom = "custom"
-
-
-@json_schema_type
-class RAGSearchMode(StrEnum):
-    """
-    Search modes for RAG query retrieval:
-    - VECTOR: Uses vector similarity search for semantic matching
-    - KEYWORD: Uses keyword-based search for exact matching
-    - HYBRID: Combines both vector and keyword search for better results
-    """
-
-    VECTOR = "vector"
-    KEYWORD = "keyword"
-    HYBRID = "hybrid"
-
-
-@json_schema_type
-class DefaultRAGQueryGeneratorConfig(BaseModel):
-    """Configuration for the default RAG query generator.
-
-    :param type: Type of query generator, always 'default'
-    :param separator: String separator used to join query terms
-    """
-
-    type: Literal["default"] = "default"
-    separator: str = " "
-
-
-@json_schema_type
-class LLMRAGQueryGeneratorConfig(BaseModel):
-    """Configuration for the LLM-based RAG query generator.
-
-    :param type: Type of query generator, always 'llm'
-    :param model: Name of the language model to use for query generation
-    :param template: Template string for formatting the query generation prompt
-    """
-
-    type: Literal["llm"] = "llm"
-    model: str
-    template: str
-
-
-RAGQueryGeneratorConfig = Annotated[
-    DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig,
-    Field(discriminator="type"),
-]
-register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig")
-
-
-@json_schema_type
-class RAGQueryConfig(BaseModel):
-    """
-    Configuration for the RAG query generation.
-
-    :param query_generator_config: Configuration for the query generator.
-    :param max_tokens_in_context: Maximum number of tokens in the context.
-    :param max_chunks: Maximum number of chunks to retrieve.
-    :param chunk_template: Template for formatting each retrieved chunk in the context.
-        Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
-        Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
-    :param mode: Search mode for retrieval—either "vector", "keyword", or "hybrid". Default "vector".
-    :param ranker: Configuration for the ranker to use in hybrid search. Defaults to RRF ranker.
-    """
-
-    # This config defines how a query is generated using the messages
-    # for memory bank retrieval.
-    query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig())
-    max_tokens_in_context: int = 4096
-    max_chunks: int = 5
-    chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
-    mode: RAGSearchMode | None = RAGSearchMode.VECTOR
-    ranker: Ranker | None = Field(default=None)  # Only used for hybrid mode
-
-    @field_validator("chunk_template")
-    def validate_chunk_template(cls, v: str) -> str:
-        if "{chunk.content}" not in v:
-            raise ValueError("chunk_template must contain {chunk.content}")
-        if "{index}" not in v:
-            raise ValueError("chunk_template must contain {index}")
-        if len(v) == 0:
-            raise ValueError("chunk_template must not be empty")
-        return v
-
-
-@runtime_checkable
-@trace_protocol
-class RAGToolRuntime(Protocol):
-    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
-    async def insert(
-        self,
-        documents: list[RAGDocument],
-        vector_db_id: str,
-        chunk_size_in_tokens: int = 512,
-    ) -> None:
-        """Index documents so they can be used by the RAG system.
-
-        :param documents: List of documents to index in the RAG system
-        :param vector_db_id: ID of the vector database to store the document embeddings
-        :param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
-        """
-        ...
-
-    @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
-    async def query(
-        self,
-        content: InterleavedContent,
-        vector_db_ids: list[str],
-        query_config: RAGQueryConfig | None = None,
-    ) -> RAGQueryResult:
-        """Query the RAG system for context; typically invoked by the agent.
-
-        :param content: The query content to search for in the indexed documents
-        :param vector_db_ids: List of vector database IDs to search within
-        :param query_config: (Optional) Configuration parameters for the query operation
-        :returns: RAGQueryResult containing the retrieved content and metadata
-        """
-        ...
diff --git a/llama_stack/apis/tools/tools.py b/llama_stack/apis/tools/tools.py
index b6a1a2543..feac0d33e 100644
--- a/llama_stack/apis/tools/tools.py
+++ b/llama_stack/apis/tools/tools.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
 from typing import Any, Literal, Protocol
 
 from pydantic import BaseModel
@@ -16,8 +15,6 @@ from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
-from .rag_tool import RAGToolRuntime
-
 
 @json_schema_type
 class ToolDef(BaseModel):
@@ -181,22 +178,11 @@ class ToolGroups(Protocol):
         ...
 
 
-class SpecialToolGroup(Enum):
-    """Special tool groups with predefined functionality.
-
-    :cvar rag_tool: Retrieval-Augmented Generation tool group for document search and retrieval
-    """
-
-    rag_tool = "rag_tool"
-
-
 @runtime_checkable
 @trace_protocol
 class ToolRuntime(Protocol):
     tool_store: ToolStore | None = None
 
-    rag_tool: RAGToolRuntime | None = None
-
     # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
     @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
     async def list_runtime_tools(
diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py
deleted file mode 100644
index 0368095cb..000000000
--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Literal, Protocol, runtime_checkable
-
-from pydantic import BaseModel
-
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.schema_utils import json_schema_type
-
-
-@json_schema_type
-class VectorDB(Resource):
-    """Vector database resource for storing and querying vector embeddings.
-
-    :param type: Type of resource, always 'vector_db' for vector databases
-    :param embedding_model: Name of the embedding model to use for vector generation
-    :param embedding_dimension: Dimension of the embedding vectors
-    """
-
-    type: Literal[ResourceType.vector_db] = ResourceType.vector_db
-
-    embedding_model: str
-    embedding_dimension: int
-    vector_db_name: str | None = None
-
-    @property
-    def vector_db_id(self) -> str:
-        return self.identifier
-
-    @property
-    def provider_vector_db_id(self) -> str | None:
-        return self.provider_resource_id
-
-
-class VectorDBInput(BaseModel):
-    """Input parameters for creating or configuring a vector database.
-
-    :param vector_db_id: Unique identifier for the vector database
-    :param embedding_model: Name of the embedding model to use for vector generation
-    :param embedding_dimension: Dimension of the embedding vectors
-    :param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database
-    """
-
-    vector_db_id: str
-    embedding_model: str
-    embedding_dimension: int
-    provider_id: str | None = None
-    provider_vector_db_id: str | None = None
-
-
-class ListVectorDBsResponse(BaseModel):
-    """Response from listing vector databases.
-
-    :param data: List of vector databases
-    """
-
-    data: list[VectorDB]
-
-
-@runtime_checkable
-class VectorDBs(Protocol):
-    """Internal protocol for vector_dbs routing - no public API endpoints."""
-
-    async def list_vector_dbs(self) -> ListVectorDBsResponse:
-        """Internal method to list vector databases."""
-        ...
-
-    async def get_vector_db(
-        self,
-        vector_db_id: str,
-    ) -> VectorDB:
-        """Internal method to get a vector database by ID."""
-        ...
-
-    async def register_vector_db(
-        self,
-        vector_db_id: str,
-        embedding_model: str,
-        embedding_dimension: int | None = 384,
-        provider_id: str | None = None,
-        vector_db_name: str | None = None,
-        provider_vector_db_id: str | None = None,
-    ) -> VectorDB:
-        """Internal method to register a vector database."""
-        ...
-
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        """Internal method to unregister a vector database."""
-        ...
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index a309c47f9..49e4df039 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -15,7 +15,7 @@ from fastapi import Body
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.inference import InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
@@ -140,6 +140,7 @@ class VectorStoreFileCounts(BaseModel):
     total: int
 
 
+# TODO: rename this as OpenAIVectorStore
 @json_schema_type
 class VectorStoreObject(BaseModel):
     """OpenAI Vector Store object.
@@ -517,17 +518,18 @@ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="all
     chunking_strategy: VectorStoreChunkingStrategy | None = None
 
 
-class VectorDBStore(Protocol):
-    def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
+class VectorStoreTable(Protocol):
+    def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ...
 
 
 @runtime_checkable
 @trace_protocol
 class VectorIO(Protocol):
-    vector_db_store: VectorDBStore | None = None
+    vector_store_table: VectorStoreTable | None = None
 
     # this will just block now until chunks are inserted, but it should
     # probably return a Job instance which can be polled for completion
+    # TODO: rename vector_db_id to vector_store_id once Stainless is working
     @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
     async def insert_chunks(
         self,
@@ -546,6 +548,7 @@ class VectorIO(Protocol):
         """
         ...
 
+    # TODO: rename vector_db_id to vector_store_id once Stainless is working
     @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
     async def query_chunks(
         self,
diff --git a/llama_stack/apis/vector_dbs/__init__.py b/llama_stack/apis/vector_stores/__init__.py
similarity index 87%
rename from llama_stack/apis/vector_dbs/__init__.py
rename to llama_stack/apis/vector_stores/__init__.py
index af34ba9d4..8fc34058a 100644
--- a/llama_stack/apis/vector_dbs/__init__.py
+++ b/llama_stack/apis/vector_stores/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .vector_dbs import *
+from .vector_stores import *
diff --git a/llama_stack/apis/vector_stores/vector_stores.py b/llama_stack/apis/vector_stores/vector_stores.py
new file mode 100644
index 000000000..524624028
--- /dev/null
+++ b/llama_stack/apis/vector_stores/vector_stores.py
@@ -0,0 +1,51 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Literal
+
+from pydantic import BaseModel
+
+from llama_stack.apis.resource import Resource, ResourceType
+
+
+# Internal resource type for storing the vector store routing and other information
+class VectorStore(Resource):
+    """Vector database resource for storing and querying vector embeddings.
+
+    :param type: Type of resource, always 'vector_store' for vector stores
+    :param embedding_model: Name of the embedding model to use for vector generation
+    :param embedding_dimension: Dimension of the embedding vectors
+    """
+
+    type: Literal[ResourceType.vector_store] = ResourceType.vector_store
+
+    embedding_model: str
+    embedding_dimension: int
+    vector_store_name: str | None = None
+
+    @property
+    def vector_store_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_vector_store_id(self) -> str | None:
+        return self.provider_resource_id
+
+
+class VectorStoreInput(BaseModel):
+    """Input parameters for creating or configuring a vector database.
+
+    :param vector_store_id: Unique identifier for the vector store
+    :param embedding_model: Name of the embedding model to use for vector generation
+    :param embedding_dimension: Dimension of the embedding vectors
+    :param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
+    """
+
+    vector_store_id: str
+    embedding_model: str
+    embedding_dimension: int
+    provider_id: str | None = None
+    provider_vector_store_id: str | None = None
diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
deleted file mode 100644
index 2a30ff394..000000000
--- a/llama_stack/cli/stack/_build.py
+++ /dev/null
@@ -1,519 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import argparse
-import importlib.resources
-import json
-import os
-import shutil
-import sys
-import textwrap
-from functools import lru_cache
-from importlib.abc import Traversable
-from pathlib import Path
-
-import yaml
-from prompt_toolkit import prompt
-from prompt_toolkit.completion import WordCompleter
-from prompt_toolkit.validation import Validator
-from termcolor import colored, cprint
-
-from llama_stack.cli.stack.utils import ImageType
-from llama_stack.cli.table import print_table
-from llama_stack.core.build import (
-    SERVER_DEPENDENCIES,
-    build_image,
-    get_provider_dependencies,
-)
-from llama_stack.core.configure import parse_and_maybe_upgrade_config
-from llama_stack.core.datatypes import (
-    BuildConfig,
-    BuildProvider,
-    DistributionSpec,
-    Provider,
-    StackRunConfig,
-)
-from llama_stack.core.distribution import get_provider_registry
-from llama_stack.core.external import load_external_apis
-from llama_stack.core.resolver import InvalidProviderError
-from llama_stack.core.stack import replace_env_vars
-from llama_stack.core.storage.datatypes import (
-    InferenceStoreReference,
-    KVStoreReference,
-    ServerStoresConfig,
-    SqliteKVStoreConfig,
-    SqliteSqlStoreConfig,
-    SqlStoreReference,
-    StorageConfig,
-)
-from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.core.utils.exec import formulate_run_args, run_command
-from llama_stack.core.utils.image_types import LlamaStackImageType
-from llama_stack.providers.datatypes import Api
-
-DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
-
-
-@lru_cache
-def available_distros_specs() -> dict[str, BuildConfig]:
-    import yaml
-
-    distro_specs = {}
-    for p in DISTRIBS_PATH.rglob("*build.yaml"):
-        distro_name = p.parent.name
-        with open(p) as f:
-            build_config = BuildConfig(**yaml.safe_load(f))
-            distro_specs[distro_name] = build_config
-    return distro_specs
-
-
-def run_stack_build_command(args: argparse.Namespace) -> None:
-    if args.list_distros:
-        return _run_distro_list_cmd()
-
-    if args.image_type == ImageType.VENV.value:
-        current_venv = os.environ.get("VIRTUAL_ENV")
-        image_name = args.image_name or current_venv
-    else:
-        image_name = args.image_name
-
-    if args.template:
-        cprint(
-            "The --template argument is deprecated. Please use --distro instead.",
-            color="red",
-            file=sys.stderr,
-        )
-        distro_name = args.template
-    else:
-        distro_name = args.distribution
-
-    if distro_name:
-        available_distros = available_distros_specs()
-        if distro_name not in available_distros:
-            cprint(
-                f"Could not find distribution {distro_name}. Please run `llama stack build --list-distros` to check out the available distributions",
-                color="red",
-                file=sys.stderr,
-            )
-            sys.exit(1)
-        build_config = available_distros[distro_name]
-        if args.image_type:
-            build_config.image_type = args.image_type
-        else:
-            cprint(
-                f"Please specify a image-type ({' | '.join(e.value for e in ImageType)}) for {distro_name}",
-                color="red",
-                file=sys.stderr,
-            )
-            sys.exit(1)
-    elif args.providers:
-        provider_list: dict[str, list[BuildProvider]] = dict()
-        for api_provider in args.providers.split(","):
-            if "=" not in api_provider:
-                cprint(
-                    "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
-                    color="red",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-            api, provider_type = api_provider.split("=")
-            providers_for_api = get_provider_registry().get(Api(api), None)
-            if providers_for_api is None:
-                cprint(
-                    f"{api} is not a valid API.",
-                    color="red",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-            if provider_type in providers_for_api:
-                provider = BuildProvider(
-                    provider_type=provider_type,
-                    module=None,
-                )
-                provider_list.setdefault(api, []).append(provider)
-            else:
-                cprint(
-                    f"{provider} is not a valid provider for the {api} API.",
-                    color="red",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-        distribution_spec = DistributionSpec(
-            providers=provider_list,
-            description=",".join(args.providers),
-        )
-        if not args.image_type:
-            cprint(
-                f"Please specify a image-type (container | venv) for {args.template}",
-                color="red",
-                file=sys.stderr,
-            )
-            sys.exit(1)
-
-        build_config = BuildConfig(image_type=args.image_type, distribution_spec=distribution_spec)
-    elif not args.config and not distro_name:
-        name = prompt(
-            "> Enter a name for your Llama Stack (e.g. my-local-stack): ",
-            validator=Validator.from_callable(
-                lambda x: len(x) > 0,
-                error_message="Name cannot be empty, please enter a name",
-            ),
-        )
-
-        image_type = prompt(
-            "> Enter the image type you want your Llama Stack to be built as (use <TAB> to see options): ",
-            completer=WordCompleter([e.value for e in ImageType]),
-            complete_while_typing=True,
-            validator=Validator.from_callable(
-                lambda x: x in [e.value for e in ImageType],
-                error_message="Invalid image type. Use <TAB> to see options",
-            ),
-        )
-
-        image_name = f"llamastack-{name}"
-
-        cprint(
-            textwrap.dedent(
-                """
-            Llama Stack is composed of several APIs working together. Let's select
-            the provider types (implementations) you want to use for these APIs.
-            """,
-            ),
-            color="green",
-            file=sys.stderr,
-        )
-
-        cprint("Tip: use <TAB> to see options for the providers.\n", color="green", file=sys.stderr)
-
-        providers: dict[str, list[BuildProvider]] = dict()
-        for api, providers_for_api in get_provider_registry().items():
-            available_providers = [x for x in providers_for_api.keys() if x not in ("remote", "remote::sample")]
-            if not available_providers:
-                continue
-            api_provider = prompt(
-                f"> Enter provider for API {api.value}: ",
-                completer=WordCompleter(available_providers),
-                complete_while_typing=True,
-                validator=Validator.from_callable(
-                    lambda x: x in available_providers,  # noqa: B023 - see https://github.com/astral-sh/ruff/issues/7847
-                    error_message="Invalid provider, use <TAB> to see options",
-                ),
-            )
-
-            string_providers = api_provider.split(" ")
-
-            for provider in string_providers:
-                providers.setdefault(api.value, []).append(BuildProvider(provider_type=provider))
-
-        description = prompt(
-            "\n > (Optional) Enter a short description for your Llama Stack: ",
-            default="",
-        )
-
-        distribution_spec = DistributionSpec(
-            providers=providers,
-            description=description,
-        )
-
-        build_config = BuildConfig(image_type=image_type, distribution_spec=distribution_spec)
-    else:
-        with open(args.config) as f:
-            try:
-                contents = yaml.safe_load(f)
-                contents = replace_env_vars(contents)
-                build_config = BuildConfig(**contents)
-                if args.image_type:
-                    build_config.image_type = args.image_type
-            except Exception as e:
-                cprint(
-                    f"Could not parse config file {args.config}: {e}",
-                    color="red",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-
-    if args.print_deps_only:
-        print(f"# Dependencies for {distro_name or args.config or image_name}")
-        normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
-        normal_deps += SERVER_DEPENDENCIES
-        print(f"uv pip install {' '.join(normal_deps)}")
-        for special_dep in special_deps:
-            print(f"uv pip install {special_dep}")
-        for external_dep in external_provider_dependencies:
-            print(f"uv pip install {external_dep}")
-        return
-
-    try:
-        run_config = _run_stack_build_command_from_build_config(
-            build_config,
-            image_name=image_name,
-            config_path=args.config,
-            distro_name=distro_name,
-        )
-
-    except (Exception, RuntimeError) as exc:
-        import traceback
-
-        cprint(
-            f"Error building stack: {exc}",
-            color="red",
-            file=sys.stderr,
-        )
-        cprint("Stack trace:", color="red", file=sys.stderr)
-        traceback.print_exc()
-        sys.exit(1)
-
-    if run_config is None:
-        cprint(
-            "Run config path is empty",
-            color="red",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-
-    if args.run:
-        config_dict = yaml.safe_load(run_config.read_text())
-        config = parse_and_maybe_upgrade_config(config_dict)
-        if config.external_providers_dir and not config.external_providers_dir.exists():
-            config.external_providers_dir.mkdir(exist_ok=True)
-        run_args = formulate_run_args(args.image_type, image_name or config.image_name)
-        run_args.extend([str(os.getenv("LLAMA_STACK_PORT", 8321)), "--config", str(run_config)])
-        run_command(run_args)
-
-
-def _generate_run_config(
-    build_config: BuildConfig,
-    build_dir: Path,
-    image_name: str,
-) -> Path:
-    """
-    Generate a run.yaml template file for user to edit from a build.yaml file
-    """
-    apis = list(build_config.distribution_spec.providers.keys())
-    distro_dir = DISTRIBS_BASE_DIR / image_name
-    storage = StorageConfig(
-        backends={
-            "kv_default": SqliteKVStoreConfig(
-                db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
-            ),
-            "sql_default": SqliteSqlStoreConfig(
-                db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
-            ),
-        },
-        stores=ServerStoresConfig(
-            metadata=KVStoreReference(
-                backend="kv_default",
-                namespace="registry",
-            ),
-            inference=InferenceStoreReference(
-                backend="sql_default",
-                table_name="inference_store",
-            ),
-            conversations=SqlStoreReference(
-                backend="sql_default",
-                table_name="openai_conversations",
-            ),
-        ),
-    )
-
-    run_config = StackRunConfig(
-        container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
-        image_name=image_name,
-        apis=apis,
-        providers={},
-        storage=storage,
-        external_providers_dir=build_config.external_providers_dir
-        if build_config.external_providers_dir
-        else EXTERNAL_PROVIDERS_DIR,
-    )
-    # build providers dict
-    provider_registry = get_provider_registry(build_config)
-    for api in apis:
-        run_config.providers[api] = []
-        providers = build_config.distribution_spec.providers[api]
-
-        for provider in providers:
-            pid = provider.provider_type.split("::")[-1]
-
-            p = provider_registry[Api(api)][provider.provider_type]
-            if p.deprecation_error:
-                raise InvalidProviderError(p.deprecation_error)
-
-            try:
-                config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
-            except (ModuleNotFoundError, ValueError) as exc:
-                # HACK ALERT:
-                # This code executes after building is done, the import cannot work since the
-                # package is either available in the venv or container - not available on the host.
-                # TODO: use a "is_external" flag in ProviderSpec to check if the provider is
-                # external
-                cprint(
-                    f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
-                    color="yellow",
-                    file=sys.stderr,
-                )
-                # Set config_type to None to avoid UnboundLocalError
-                config_type = None
-
-            if config_type is not None and hasattr(config_type, "sample_run_config"):
-                config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
-            else:
-                config = {}
-
-            p_spec = Provider(
-                provider_id=pid,
-                provider_type=provider.provider_type,
-                config=config,
-                module=provider.module,
-            )
-            run_config.providers[api].append(p_spec)
-
-    run_config_file = build_dir / f"{image_name}-run.yaml"
-
-    with open(run_config_file, "w") as f:
-        to_write = json.loads(run_config.model_dump_json())
-        f.write(yaml.dump(to_write, sort_keys=False))
-
-    # Only print this message for non-container builds since it will be displayed before the
-    # container is built
-    # For non-container builds, the run.yaml is generated at the very end of the build process so it
-    # makes sense to display this message
-    if build_config.image_type != LlamaStackImageType.CONTAINER.value:
-        cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
-    return run_config_file
-
-
-def _run_stack_build_command_from_build_config(
-    build_config: BuildConfig,
-    image_name: str | None = None,
-    distro_name: str | None = None,
-    config_path: str | None = None,
-) -> Path | Traversable:
-    image_name = image_name or build_config.image_name
-    if build_config.image_type == LlamaStackImageType.CONTAINER.value:
-        if distro_name:
-            image_name = f"distribution-{distro_name}"
-        else:
-            if not image_name:
-                raise ValueError("Please specify an image name when building a container image without a template")
-    else:
-        if not image_name and os.environ.get("UV_SYSTEM_PYTHON"):
-            image_name = "__system__"
-        if not image_name:
-            raise ValueError("Please specify an image name when building a venv image")
-
-    # At this point, image_name should be guaranteed to be a string
-    if image_name is None:
-        raise ValueError("image_name should not be None after validation")
-
-    if distro_name:
-        build_dir = DISTRIBS_BASE_DIR / distro_name
-        build_file_path = build_dir / f"{distro_name}-build.yaml"
-    else:
-        if image_name is None:
-            raise ValueError("image_name cannot be None")
-        build_dir = DISTRIBS_BASE_DIR / image_name
-        build_file_path = build_dir / f"{image_name}-build.yaml"
-
-    os.makedirs(build_dir, exist_ok=True)
-    run_config_file = None
-    # Generate the run.yaml so it can be included in the container image with the proper entrypoint
-    # Only do this if we're building a container image and we're not using a template
-    if build_config.image_type == LlamaStackImageType.CONTAINER.value and not distro_name and config_path:
-        cprint("Generating run.yaml file", color="yellow", file=sys.stderr)
-        run_config_file = _generate_run_config(build_config, build_dir, image_name)
-
-    with open(build_file_path, "w") as f:
-        to_write = json.loads(build_config.model_dump_json(exclude_none=True))
-        f.write(yaml.dump(to_write, sort_keys=False))
-
-    # We first install the external APIs so that the build process can use them and discover the
-    # providers dependencies
-    if build_config.external_apis_dir:
-        cprint("Installing external APIs", color="yellow", file=sys.stderr)
-        external_apis = load_external_apis(build_config)
-        if external_apis:
-            # install the external APIs
-            packages = []
-            for _, api_spec in external_apis.items():
-                if api_spec.pip_packages:
-                    packages.extend(api_spec.pip_packages)
-                    cprint(
-                        f"Installing {api_spec.name} with pip packages {api_spec.pip_packages}",
-                        color="yellow",
-                        file=sys.stderr,
-                    )
-            return_code = run_command(["uv", "pip", "install", *packages])
-            if return_code != 0:
-                packages_str = ", ".join(packages)
-                raise RuntimeError(
-                    f"Failed to install external APIs packages: {packages_str} (return code: {return_code})"
-                )
-
-    return_code = build_image(
-        build_config,
-        image_name,
-        distro_or_config=distro_name or config_path or str(build_file_path),
-        run_config=run_config_file.as_posix() if run_config_file else None,
-    )
-    if return_code != 0:
-        raise RuntimeError(f"Failed to build image {image_name}")
-
-    if distro_name:
-        # copy run.yaml from distribution to build_dir instead of generating it again
-        distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro_name}/run.yaml"
-        run_config_file = build_dir / f"{distro_name}-run.yaml"
-
-        with importlib.resources.as_file(distro_path) as path:
-            shutil.copy(path, run_config_file)
-
-        cprint("Build Successful!", color="green", file=sys.stderr)
-        cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
-        if build_config.image_type == LlamaStackImageType.VENV:
-            cprint(
-                "You can run the new Llama Stack distro (after activating "
-                + colored(image_name, "cyan")
-                + ") via: "
-                + colored(f"llama stack run {run_config_file}", "blue"),
-                color="green",
-                file=sys.stderr,
-            )
-        elif build_config.image_type == LlamaStackImageType.CONTAINER:
-            cprint(
-                "You can run the container with: "
-                + colored(
-                    f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"
-                ),
-                color="green",
-                file=sys.stderr,
-            )
-        return distro_path
-    else:
-        return _generate_run_config(build_config, build_dir, image_name)
-
-
-def _run_distro_list_cmd() -> None:
-    headers = [
-        "Distribution Name",
-        # "Providers",
-        "Description",
-    ]
-
-    rows = []
-    for distro_name, spec in available_distros_specs().items():
-        rows.append(
-            [
-                distro_name,
-                # json.dumps(spec.distribution_spec.providers, indent=2),
-                spec.distribution_spec.description,
-            ]
-        )
-    print_table(
-        rows,
-        headers,
-        separate_rows=True,
-    )
diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py
deleted file mode 100644
index cbe8ed881..000000000
--- a/llama_stack/cli/stack/build.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import argparse
-import textwrap
-
-from llama_stack.cli.stack.utils import ImageType
-from llama_stack.cli.subcommand import Subcommand
-from llama_stack.log import get_logger
-
-logger = get_logger(__name__, category="cli")
-
-
-class StackBuild(Subcommand):
-    def __init__(self, subparsers: argparse._SubParsersAction):
-        super().__init__()
-        self.parser = subparsers.add_parser(
-            "build",
-            prog="llama stack build",
-            description="[DEPRECATED] Build a Llama stack container. This command is deprecated and will be removed in a future release. Use `llama stack list-deps <distro>' instead.",
-            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        )
-        self._add_arguments()
-        self.parser.set_defaults(func=self._run_stack_build_command)
-
-    def _add_arguments(self):
-        self.parser.add_argument(
-            "--config",
-            type=str,
-            default=None,
-            help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
-        )
-
-        self.parser.add_argument(
-            "--template",
-            type=str,
-            default=None,
-            help="""(deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
-        )
-        self.parser.add_argument(
-            "--distro",
-            "--distribution",
-            dest="distribution",
-            type=str,
-            default=None,
-            help="""Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
-        )
-
-        self.parser.add_argument(
-            "--list-distros",
-            "--list-distributions",
-            action="store_true",
-            dest="list_distros",
-            default=False,
-            help="Show the available distributions for building a Llama Stack distribution",
-        )
-
-        self.parser.add_argument(
-            "--image-type",
-            type=str,
-            help="Image Type to use for the build. If not specified, will use the image type from the template config.",
-            choices=[e.value for e in ImageType],
-            default=None,  # no default so we can detect if a user specified --image-type and override image_type in the config
-        )
-
-        self.parser.add_argument(
-            "--image-name",
-            type=str,
-            help=textwrap.dedent(
-                f"""[for image-type={"|".join(e.value for e in ImageType)}] Name of the virtual environment to use for
-the build. If not specified, currently active environment will be used if found.
-            """
-            ),
-            default=None,
-        )
-        self.parser.add_argument(
-            "--print-deps-only",
-            default=False,
-            action="store_true",
-            help="Print the dependencies for the stack only, without building the stack",
-        )
-
-        self.parser.add_argument(
-            "--run",
-            action="store_true",
-            default=False,
-            help="Run the stack after building using the same image type, name, and other applicable arguments",
-        )
-        self.parser.add_argument(
-            "--providers",
-            type=str,
-            default=None,
-            help="Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
-        )
-
-    def _run_stack_build_command(self, args: argparse.Namespace) -> None:
-        logger.warning(
-            "The 'llama stack build' command is deprecated and will be removed in a future release. Please use 'llama stack list-deps'"
-        )
-        # always keep implementation completely silo-ed away from CLI so CLI
-        # can be fast to load and reduces dependencies
-        from ._build import run_stack_build_command
-
-        return run_stack_build_command(args)
diff --git a/llama_stack/cli/stack/stack.py b/llama_stack/cli/stack/stack.py
index fd0a4edf5..351da972f 100644
--- a/llama_stack/cli/stack/stack.py
+++ b/llama_stack/cli/stack/stack.py
@@ -11,7 +11,6 @@ from llama_stack.cli.stack.list_stacks import StackListBuilds
 from llama_stack.cli.stack.utils import print_subcommand_description
 from llama_stack.cli.subcommand import Subcommand
 
-from .build import StackBuild
 from .list_apis import StackListApis
 from .list_deps import StackListDeps
 from .list_providers import StackListProviders
@@ -41,7 +40,6 @@ class StackParser(Subcommand):
 
         # Add sub-commands
         StackListDeps.create(subparsers)
-        StackBuild.create(subparsers)
         StackListApis.create(subparsers)
         StackListProviders.create(subparsers)
         StackRun.create(subparsers)
diff --git a/llama_stack/core/access_control/datatypes.py b/llama_stack/core/access_control/datatypes.py
index c833ed51b..84beb8e15 100644
--- a/llama_stack/core/access_control/datatypes.py
+++ b/llama_stack/core/access_control/datatypes.py
@@ -41,7 +41,7 @@ class AccessRule(BaseModel):
     A rule defines a list of action either to permit or to forbid. It may specify a
     principal or a resource that must match for the rule to take effect. The resource
     to match should be specified in the form of a type qualified identifier, e.g.
-    model::my-model or vector_db::some-db, or a wildcard for all resources of a type,
+    model::my-model or vector_store::some-db, or a wildcard for all resources of a type,
     e.g. model::*. If the principal or resource are not specified, they will match all
     requests.
 
@@ -79,9 +79,9 @@ class AccessRule(BaseModel):
       description: any user has read access to any resource created by a member of their team
     - forbid:
         actions: [create, read, delete]
-        resource: vector_db::*
+        resource: vector_store::*
       unless: user with admin in roles
-      description: only user with admin role can use vector_db resources
+      description: only user with admin role can use vector_store resources
 
     """
 
diff --git a/llama_stack/core/build_container.sh b/llama_stack/core/build_container.sh
deleted file mode 100755
index 03ed846d9..000000000
--- a/llama_stack/core/build_container.sh
+++ /dev/null
@@ -1,410 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
-LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-}
-
-TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
-PYPI_VERSION=${PYPI_VERSION:-}
-BUILD_PLATFORM=${BUILD_PLATFORM:-}
-# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out
-# Reference: https://github.com/astral-sh/uv/pull/1694
-UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
-
-# mounting is not supported by docker buildx, so we use COPY instead
-USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
-# Path to the run.yaml file in the container
-RUN_CONFIG_PATH=/app/run.yaml
-
-BUILD_CONTEXT_DIR=$(pwd)
-
-set -euo pipefail
-
-# Define color codes
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-# Usage function
-usage() {
-  echo "Usage: $0 --image-name <image_name> --container-base <container_base> --normal-deps <pip_dependencies> [--run-config <run_config>] [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
-  echo "Example: $0 --image-name llama-stack-img --container-base python:3.12-slim --normal-deps 'numpy pandas' --run-config ./run.yaml --external-provider-deps 'foo' --optional-deps 'bar'"
-  exit 1
-}
-
-# Parse arguments
-image_name=""
-container_base=""
-normal_deps=""
-external_provider_deps=""
-optional_deps=""
-run_config=""
-distro_or_config=""
-
-while [[ $# -gt 0 ]]; do
-  key="$1"
-  case "$key" in
-    --image-name)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --image-name requires a string value" >&2
-        usage
-      fi
-      image_name="$2"
-      shift 2
-      ;;
-    --container-base)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --container-base requires a string value" >&2
-        usage
-      fi
-      container_base="$2"
-      shift 2
-      ;;
-    --normal-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --normal-deps requires a string value" >&2
-        usage
-      fi
-      normal_deps="$2"
-      shift 2
-      ;;
-    --external-provider-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --external-provider-deps requires a string value" >&2
-        usage
-      fi
-      external_provider_deps="$2"
-      shift 2
-      ;;
-    --optional-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --optional-deps requires a string value" >&2
-        usage
-      fi
-      optional_deps="$2"
-      shift 2
-      ;;
-    --run-config)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --run-config requires a string value" >&2
-        usage
-      fi
-      run_config="$2"
-      shift 2
-      ;;
-    --distro-or-config)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --distro-or-config requires a string value" >&2
-        usage
-      fi
-      distro_or_config="$2"
-      shift 2
-      ;;
-    *)
-      echo "Unknown option: $1" >&2
-      usage
-      ;;
-  esac
-done
-
-# Check required arguments
-if [[ -z "$image_name" || -z "$container_base" || -z "$normal_deps" ]]; then
-  echo "Error: --image-name, --container-base, and --normal-deps are required." >&2
-  usage
-fi
-
-CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
-CONTAINER_OPTS=${CONTAINER_OPTS:---progress=plain}
-TEMP_DIR=$(mktemp -d)
-SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
-source "$SCRIPT_DIR/common.sh"
-
-add_to_container() {
-  output_file="$TEMP_DIR/Containerfile"
-  if [ -t 0 ]; then
-    printf '%s\n' "$1" >>"$output_file"
-  else
-    cat >>"$output_file"
-  fi
-}
-
-if ! is_command_available "$CONTAINER_BINARY"; then
-  printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
-  exit 1
-fi
-
-if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
-  add_to_container << EOF
-FROM $container_base
-WORKDIR /app
-
-# We install the Python 3.12 dev headers and build tools so that any
-# C-extension wheels (e.g. polyleven, faiss-cpu) can compile successfully.
-
-RUN dnf -y update && dnf install -y iputils git net-tools wget \
-    vim-minimal python3.12 python3.12-pip python3.12-wheel \
-    python3.12-setuptools python3.12-devel gcc gcc-c++ make && \
-    ln -s /bin/pip3.12 /bin/pip && ln -s /bin/python3.12 /bin/python && dnf clean all
-
-ENV UV_SYSTEM_PYTHON=1
-RUN pip install uv
-EOF
-else
-  add_to_container << EOF
-FROM $container_base
-WORKDIR /app
-
-RUN apt-get update && apt-get install -y \
-       iputils-ping net-tools iproute2 dnsutils telnet \
-       curl wget telnet git\
-       procps psmisc lsof \
-       traceroute \
-       bubblewrap \
-       gcc g++ \
-       && rm -rf /var/lib/apt/lists/*
-
-ENV UV_SYSTEM_PYTHON=1
-RUN pip install uv
-EOF
-fi
-
-# Add pip dependencies first since llama-stack is what will change most often
-# so we can reuse layers.
-if [ -n "$normal_deps" ]; then
-  read -ra pip_args <<<  "$normal_deps"
-  quoted_deps=$(printf " %q" "${pip_args[@]}")
-  add_to_container << EOF
-RUN uv pip install --no-cache $quoted_deps
-EOF
-fi
-
-if [ -n "$optional_deps" ]; then
-  IFS='#' read -ra parts <<<"$optional_deps"
-  for part in "${parts[@]}"; do
-    read -ra pip_args <<< "$part"
-    quoted_deps=$(printf " %q" "${pip_args[@]}")
-    add_to_container <<EOF
-RUN uv pip install --no-cache $quoted_deps
-EOF
-  done
-fi
-
-if [ -n "$external_provider_deps" ]; then
-  IFS='#' read -ra parts <<<"$external_provider_deps"
-  for part in "${parts[@]}"; do
-    read -ra pip_args <<< "$part"
-    quoted_deps=$(printf " %q" "${pip_args[@]}")
-    add_to_container <<EOF
-RUN uv pip install --no-cache $quoted_deps
-EOF
-    add_to_container <<EOF
-RUN python3 - <<PYTHON | uv pip install --no-cache -r -
-import importlib
-import sys
-
-try:
-    package_name = '$part'.split('==')[0].split('>=')[0].split('<=')[0].split('!=')[0].split('<')[0].split('>')[0]
-    module = importlib.import_module(f'{package_name}.provider')
-    spec = module.get_provider_spec()
-    if hasattr(spec, 'pip_packages') and spec.pip_packages:
-        if isinstance(spec.pip_packages, (list, tuple)):
-            print('\n'.join(spec.pip_packages))
-except Exception as e:
-    print(f'Error getting provider spec for {package_name}: {e}', file=sys.stderr)
-PYTHON
-EOF
-  done
-fi
-
-get_python_cmd() {
-    if is_command_available python; then
-        echo "python"
-    elif is_command_available python3; then
-        echo "python3"
-    else
-        echo "Error: Neither python nor python3 is installed. Please install Python to continue." >&2
-        exit 1
-    fi
-}
-
-if [ -n "$run_config" ]; then
-  # Copy the run config to the build context since it's an absolute path
-  cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
-
-  # Parse the run.yaml configuration to identify external provider directories
-  # If external providers are specified, copy their directory to the container
-  # and update the configuration to reference the new container path
-  python_cmd=$(get_python_cmd)
-  external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
-  external_providers_dir=$(eval echo "$external_providers_dir")
-  if [ -n "$external_providers_dir" ]; then
-    if [ -d "$external_providers_dir" ]; then
-    echo "Copying external providers directory: $external_providers_dir"
-    cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d"
-    add_to_container << EOF
-COPY providers.d /.llama/providers.d
-EOF
-    fi
-
-    # Edit the run.yaml file to change the external_providers_dir to /.llama/providers.d
-    if [ "$(uname)" = "Darwin" ]; then
-      sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
-      rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak"
-    else
-      sed -i 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
-    fi
-  fi
-
-  # Copy run config into docker image
-  add_to_container << EOF
-COPY run.yaml $RUN_CONFIG_PATH
-EOF
-fi
-
-stack_mount="/app/llama-stack-source"
-client_mount="/app/llama-stack-client-source"
-
-install_local_package() {
-  local dir="$1"
-  local mount_point="$2"
-  local name="$3"
-
-  if [ ! -d "$dir" ]; then
-    echo "${RED}Warning: $name is set but directory does not exist: $dir${NC}" >&2
-    exit 1
-  fi
-
-  if [ "$USE_COPY_NOT_MOUNT" = "true" ]; then
-    add_to_container << EOF
-COPY $dir $mount_point
-EOF
-  fi
-  add_to_container << EOF
-RUN uv pip install --no-cache -e $mount_point
-EOF
-}
-
-
-if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
-  install_local_package "$LLAMA_STACK_CLIENT_DIR" "$client_mount" "LLAMA_STACK_CLIENT_DIR"
-fi
-
-if [ -n "$LLAMA_STACK_DIR" ]; then
-  install_local_package "$LLAMA_STACK_DIR" "$stack_mount" "LLAMA_STACK_DIR"
-else
-  if [ -n "$TEST_PYPI_VERSION" ]; then
-    # these packages are damaged in test-pypi, so install them first
-    add_to_container << EOF
-RUN uv pip install --no-cache fastapi libcst
-EOF
-    add_to_container << EOF
-RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
-  --index-strategy unsafe-best-match \
-  llama-stack==$TEST_PYPI_VERSION
-
-EOF
-  else
-    if [ -n "$PYPI_VERSION" ]; then
-      SPEC_VERSION="llama-stack==${PYPI_VERSION}"
-    else
-      SPEC_VERSION="llama-stack"
-    fi
-    add_to_container << EOF
-RUN uv pip install --no-cache $SPEC_VERSION
-EOF
-  fi
-fi
-
-# remove uv after installation
-  add_to_container << EOF
-RUN pip uninstall -y uv
-EOF
-
-# If a run config is provided, we use the llama stack CLI
-if [[ -n "$run_config" ]]; then
-  add_to_container << EOF
-ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"]
-EOF
-elif [[ "$distro_or_config" != *.yaml ]]; then
-  add_to_container << EOF
-ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"]
-EOF
-fi
-
-# Add other require item commands genearic to all containers
-add_to_container << EOF
-
-RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true)
-EOF
-
-printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
-cat "$TEMP_DIR"/Containerfile
-printf "\n"
-
-# Start building the CLI arguments
-CLI_ARGS=()
-
-# Read CONTAINER_OPTS and put it in an array
-read -ra CLI_ARGS <<< "$CONTAINER_OPTS"
-
-if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then
-  if [ -n "$LLAMA_STACK_DIR" ]; then
-    CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount")
-  fi
-  if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
-    CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount")
-  fi
-fi
-
-if is_command_available selinuxenabled && selinuxenabled; then
-  # Disable SELinux labels -- we don't want to relabel the llama-stack source dir
-  CLI_ARGS+=("--security-opt" "label=disable")
-fi
-
-# Set version tag based on PyPI version
-if [ -n "$PYPI_VERSION" ]; then
-  version_tag="$PYPI_VERSION"
-elif [ -n "$TEST_PYPI_VERSION" ]; then
-  version_tag="test-$TEST_PYPI_VERSION"
-elif [[ -n "$LLAMA_STACK_DIR" || -n "$LLAMA_STACK_CLIENT_DIR" ]]; then
-  version_tag="dev"
-else
-  URL="https://pypi.org/pypi/llama-stack/json"
-  version_tag=$(curl -s $URL | jq -r '.info.version')
-fi
-
-# Add version tag to image name
-image_tag="$image_name:$version_tag"
-
-# Detect platform architecture
-ARCH=$(uname -m)
-if [ -n "$BUILD_PLATFORM" ]; then
-  CLI_ARGS+=("--platform" "$BUILD_PLATFORM")
-elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
-  CLI_ARGS+=("--platform" "linux/arm64")
-elif [ "$ARCH" = "x86_64" ]; then
-  CLI_ARGS+=("--platform" "linux/amd64")
-else
-  echo "Unsupported architecture: $ARCH"
-  exit 1
-fi
-
-echo "PWD: $(pwd)"
-echo "Containerfile: $TEMP_DIR/Containerfile"
-set -x
-
-$CONTAINER_BINARY build \
-  "${CLI_ARGS[@]}" \
-  -t "$image_tag" \
-  -f "$TEMP_DIR/Containerfile" \
-  "$BUILD_CONTEXT_DIR"
-
-# clean up tmp/configs
-rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR"
-set +x
-
-echo "Success!"
diff --git a/llama_stack/core/build_venv.sh b/llama_stack/core/build_venv.sh
deleted file mode 100755
index 04927d71e..000000000
--- a/llama_stack/core/build_venv.sh
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
-LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-}
-TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
-# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out
-# Reference: https://github.com/astral-sh/uv/pull/1694
-UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
-UV_SYSTEM_PYTHON=${UV_SYSTEM_PYTHON:-}
-VIRTUAL_ENV=${VIRTUAL_ENV:-}
-
-set -euo pipefail
-
-# Define color codes
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
-source "$SCRIPT_DIR/common.sh"
-
-# Usage function
-usage() {
-  echo "Usage: $0 --env-name <env_name> --normal-deps <pip_dependencies> [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
-  echo "Example: $0 --env-name mybuild --normal-deps 'numpy pandas scipy' --external-provider-deps 'foo' --optional-deps 'bar'"
-  exit 1
-}
-
-# Parse arguments
-env_name=""
-normal_deps=""
-external_provider_deps=""
-optional_deps=""
-
-while [[ $# -gt 0 ]]; do
-  key="$1"
-  case "$key" in
-    --env-name)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --env-name requires a string value" >&2
-        usage
-      fi
-      env_name="$2"
-      shift 2
-      ;;
-    --normal-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --normal-deps requires a string value" >&2
-        usage
-      fi
-      normal_deps="$2"
-      shift 2
-      ;;
-    --external-provider-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --external-provider-deps requires a string value" >&2
-        usage
-      fi
-      external_provider_deps="$2"
-      shift 2
-      ;;
-    --optional-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --optional-deps requires a string value" >&2
-        usage
-      fi
-      optional_deps="$2"
-      shift 2
-      ;;
-    *)
-      echo "Unknown option: $1" >&2
-      usage
-      ;;
-  esac
-done
-
-# Check required arguments
-if [[ -z "$env_name" || -z "$normal_deps" ]]; then
-  echo "Error: --env-name and --normal-deps are required." >&2
-  usage
-fi
-
-if [ -n "$LLAMA_STACK_DIR" ]; then
-  echo "Using llama-stack-dir=$LLAMA_STACK_DIR"
-fi
-if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
-  echo "Using llama-stack-client-dir=$LLAMA_STACK_CLIENT_DIR"
-fi
-
-ENVNAME=""
-
-# pre-run checks to make sure we can proceed with the installation
-pre_run_checks() {
-  local env_name="$1"
-
-  if ! is_command_available uv; then
-    echo "uv is not installed, trying to install it."
-    if ! is_command_available pip; then
-      echo "pip is not installed, cannot automatically install 'uv'."
-      echo "Follow this link to install it:"
-      echo "https://docs.astral.sh/uv/getting-started/installation/"
-      exit 1
-    else
-      pip install uv
-    fi
-  fi
-
-  # checking if an environment with the same name already exists
-  if [ -d "$env_name" ]; then
-    echo "Environment '$env_name' already exists, re-using it."
-  fi
-}
-
-run() {
-  # Use only global variables set by flag parser
-  if [ -n "$UV_SYSTEM_PYTHON" ] || [ "$env_name" == "__system__" ]; then
-    echo "Installing dependencies in system Python environment"
-    export UV_SYSTEM_PYTHON=1
-  elif [ "$VIRTUAL_ENV" == "$env_name" ]; then
-    echo "Virtual environment $env_name is already active"
-  else
-    echo "Using virtual environment $env_name"
-    uv venv "$env_name"
-    source "$env_name/bin/activate"
-  fi
-
-  if [ -n "$TEST_PYPI_VERSION" ]; then
-    uv pip install fastapi libcst
-    uv pip install --extra-index-url https://test.pypi.org/simple/ \
-      --index-strategy unsafe-best-match \
-      llama-stack=="$TEST_PYPI_VERSION" \
-      $normal_deps
-    if [ -n "$optional_deps" ]; then
-      IFS='#' read -ra parts <<<"$optional_deps"
-      for part in "${parts[@]}"; do
-        echo "$part"
-        uv pip install $part
-      done
-    fi
-    if [ -n "$external_provider_deps" ]; then
-      IFS='#' read -ra parts <<<"$external_provider_deps"
-      for part in "${parts[@]}"; do
-        echo "$part"
-        uv pip install "$part"
-      done
-    fi
-  else
-    if [ -n "$LLAMA_STACK_DIR" ]; then
-      # only warn if DIR does not start with "git+"
-      if [ ! -d "$LLAMA_STACK_DIR" ] && [[ "$LLAMA_STACK_DIR" != git+* ]]; then
-        printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_DIR" >&2
-        exit 1
-      fi
-      printf "Installing from LLAMA_STACK_DIR: %s\n"  "$LLAMA_STACK_DIR"
-      # editable only if LLAMA_STACK_DIR does not start with "git+"
-      if [[ "$LLAMA_STACK_DIR" != git+* ]]; then
-        EDITABLE="-e"
-      else
-        EDITABLE=""
-      fi
-      uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_DIR"
-    else
-      uv pip install --no-cache-dir llama-stack
-    fi
-
-    if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
-      # only warn if DIR does not start with "git+"
-      if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ] && [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
-        printf "${RED}Warning: LLAMA_STACK_CLIENT_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_CLIENT_DIR" >&2
-        exit 1
-      fi
-      printf "Installing from LLAMA_STACK_CLIENT_DIR: %s\n" "$LLAMA_STACK_CLIENT_DIR"
-      # editable only if LLAMA_STACK_CLIENT_DIR does not start with "git+"
-      if [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
-        EDITABLE="-e"
-      else
-        EDITABLE=""
-      fi
-      uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_CLIENT_DIR"
-    fi
-
-    printf "Installing pip dependencies\n"
-    uv pip install $normal_deps
-    if [ -n "$optional_deps" ]; then
-      IFS='#' read -ra parts <<<"$optional_deps"
-      for part in "${parts[@]}"; do
-        echo "Installing special provider module: $part"
-        uv pip install $part
-      done
-    fi
-    if [ -n "$external_provider_deps" ]; then
-      IFS='#' read -ra parts <<<"$external_provider_deps"
-      for part in "${parts[@]}"; do
-        echo "Installing external provider module: $part"
-        uv pip install "$part"
-        echo "Getting provider spec for module: $part and installing dependencies"
-        package_name=$(echo "$part" | sed 's/[<>=!].*//')
-        python3 -c "
-import importlib
-import sys
-try:
-    module = importlib.import_module(f'$package_name.provider')
-    spec = module.get_provider_spec()
-    if hasattr(spec, 'pip_packages') and spec.pip_packages:
-        print('\\n'.join(spec.pip_packages))
-except Exception as e:
-    print(f'Error getting provider spec for $package_name: {e}', file=sys.stderr)
-" | uv pip install -r -
-      done
-    fi
-  fi
-}
-
-pre_run_checks "$env_name"
-run
diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py
index e8cb36a02..5f4775d87 100644
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@@ -23,8 +23,8 @@ from llama_stack.apis.scoring import Scoring
 from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
 from llama_stack.apis.shields import Shield, ShieldInput
 from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
-from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
 from llama_stack.apis.vector_io import VectorIO
+from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
     KVStoreReference,
@@ -71,7 +71,7 @@ class ShieldWithOwner(Shield, ResourceWithOwner):
     pass
 
 
-class VectorDBWithOwner(VectorDB, ResourceWithOwner):
+class VectorStoreWithOwner(VectorStore, ResourceWithOwner):
     pass
 
 
@@ -91,12 +91,12 @@ class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
     pass
 
 
-RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup
+RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup
 
 RoutableObjectWithProvider = Annotated[
     ModelWithOwner
     | ShieldWithOwner
-    | VectorDBWithOwner
+    | VectorStoreWithOwner
     | DatasetWithOwner
     | ScoringFnWithOwner
     | BenchmarkWithOwner
@@ -427,7 +427,7 @@ class RegisteredResources(BaseModel):
 
     models: list[ModelInput] = Field(default_factory=list)
     shields: list[ShieldInput] = Field(default_factory=list)
-    vector_dbs: list[VectorDBInput] = Field(default_factory=list)
+    vector_stores: list[VectorStoreInput] = Field(default_factory=list)
     datasets: list[DatasetInput] = Field(default_factory=list)
     scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
     benchmarks: list[BenchmarkInput] = Field(default_factory=list)
diff --git a/llama_stack/core/distribution.py b/llama_stack/core/distribution.py
index 59461f5d6..82cbcf984 100644
--- a/llama_stack/core/distribution.py
+++ b/llama_stack/core/distribution.py
@@ -64,7 +64,7 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
             router_api=Api.tool_runtime,
         ),
         AutoRoutedApiInfo(
-            routing_table_api=Api.vector_dbs,
+            routing_table_api=Api.vector_stores,
             router_api=Api.vector_io,
         ),
     ]
diff --git a/llama_stack/core/library_client.py b/llama_stack/core/library_client.py
index 1179075cd..328ca9c6e 100644
--- a/llama_stack/core/library_client.py
+++ b/llama_stack/core/library_client.py
@@ -278,7 +278,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
             else:
                 prefix = "!" if in_notebook() else ""
                 cprint(
-                    f"Please run:\n\n{prefix}llama stack build --distro {self.config_path_or_distro_name} --image-type venv\n\n",
+                    f"Please run:\n\n{prefix}llama stack list-deps {self.config_path_or_distro_name} | xargs -L1 uv pip install\n\n",
                     "yellow",
                     file=sys.stderr,
                 )
diff --git a/llama_stack/core/resolver.py b/llama_stack/core/resolver.py
index 6e1843870..0b63815ea 100644
--- a/llama_stack/core/resolver.py
+++ b/llama_stack/core/resolver.py
@@ -29,8 +29,8 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
 from llama_stack.apis.shields import Shields
 from llama_stack.apis.telemetry import Telemetry
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
 from llama_stack.core.client import get_client_impl
 from llama_stack.core.datatypes import (
@@ -82,7 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
         Api.inspect: Inspect,
         Api.batches: Batches,
         Api.vector_io: VectorIO,
-        Api.vector_dbs: VectorDBs,
+        Api.vector_stores: VectorStore,
         Api.models: Models,
         Api.safety: Safety,
         Api.shields: Shields,
diff --git a/llama_stack/core/routers/__init__.py b/llama_stack/core/routers/__init__.py
index df4df0463..20c17e59d 100644
--- a/llama_stack/core/routers/__init__.py
+++ b/llama_stack/core/routers/__init__.py
@@ -29,7 +29,7 @@ async def get_routing_table_impl(
     from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
     from ..routing_tables.shields import ShieldsRoutingTable
     from ..routing_tables.toolgroups import ToolGroupsRoutingTable
-    from ..routing_tables.vector_dbs import VectorDBsRoutingTable
+    from ..routing_tables.vector_stores import VectorStoresRoutingTable
 
     api_to_tables = {
         "models": ModelsRoutingTable,
@@ -38,7 +38,7 @@ async def get_routing_table_impl(
         "scoring_functions": ScoringFunctionsRoutingTable,
         "benchmarks": BenchmarksRoutingTable,
         "tool_groups": ToolGroupsRoutingTable,
-        "vector_dbs": VectorDBsRoutingTable,
+        "vector_stores": VectorStoresRoutingTable,
     }
 
     if api.value not in api_to_tables:
diff --git a/llama_stack/core/routers/tool_runtime.py b/llama_stack/core/routers/tool_runtime.py
index ad82293e5..7c5bb25c6 100644
--- a/llama_stack/core/routers/tool_runtime.py
+++ b/llama_stack/core/routers/tool_runtime.py
@@ -8,16 +8,8 @@ from typing import Any
 
 from llama_stack.apis.common.content_types import (
     URL,
-    InterleavedContent,
-)
-from llama_stack.apis.tools import (
-    ListToolDefsResponse,
-    RAGDocument,
-    RAGQueryConfig,
-    RAGQueryResult,
-    RAGToolRuntime,
-    ToolRuntime,
 )
+from llama_stack.apis.tools import ListToolDefsResponse, ToolRuntime
 from llama_stack.log import get_logger
 
 from ..routing_tables.toolgroups import ToolGroupsRoutingTable
@@ -26,36 +18,6 @@ logger = get_logger(name=__name__, category="core::routers")
 
 
 class ToolRuntimeRouter(ToolRuntime):
-    class RagToolImpl(RAGToolRuntime):
-        def __init__(
-            self,
-            routing_table: ToolGroupsRoutingTable,
-        ) -> None:
-            logger.debug("Initializing ToolRuntimeRouter.RagToolImpl")
-            self.routing_table = routing_table
-
-        async def query(
-            self,
-            content: InterleavedContent,
-            vector_db_ids: list[str],
-            query_config: RAGQueryConfig | None = None,
-        ) -> RAGQueryResult:
-            logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}")
-            provider = await self.routing_table.get_provider_impl("knowledge_search")
-            return await provider.query(content, vector_db_ids, query_config)
-
-        async def insert(
-            self,
-            documents: list[RAGDocument],
-            vector_db_id: str,
-            chunk_size_in_tokens: int = 512,
-        ) -> None:
-            logger.debug(
-                f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
-            )
-            provider = await self.routing_table.get_provider_impl("insert_into_memory")
-            return await provider.insert(documents, vector_db_id, chunk_size_in_tokens)
-
     def __init__(
         self,
         routing_table: ToolGroupsRoutingTable,
@@ -63,11 +25,6 @@ class ToolRuntimeRouter(ToolRuntime):
         logger.debug("Initializing ToolRuntimeRouter")
         self.routing_table = routing_table
 
-        # HACK ALERT this should be in sync with "get_all_api_endpoints()"
-        self.rag_tool = self.RagToolImpl(routing_table)
-        for method in ("query", "insert"):
-            setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))
-
     async def initialize(self) -> None:
         logger.debug("ToolRuntimeRouter.initialize")
         pass
diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py
index bfc5f7164..2b1701dc2 100644
--- a/llama_stack/core/routers/vector_io.py
+++ b/llama_stack/core/routers/vector_io.py
@@ -71,25 +71,6 @@ class VectorIORouter(VectorIO):
 
         raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model")
 
-    async def register_vector_db(
-        self,
-        vector_db_id: str,
-        embedding_model: str,
-        embedding_dimension: int | None = 384,
-        provider_id: str | None = None,
-        vector_db_name: str | None = None,
-        provider_vector_db_id: str | None = None,
-    ) -> None:
-        logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
-        await self.routing_table.register_vector_db(
-            vector_db_id,
-            embedding_model,
-            embedding_dimension,
-            provider_id,
-            vector_db_name,
-            provider_vector_db_id,
-        )
-
     async def insert_chunks(
         self,
         vector_db_id: str,
@@ -165,22 +146,22 @@ class VectorIORouter(VectorIO):
             else:
                 provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
 
-        vector_db_id = f"vs_{uuid.uuid4()}"
-        registered_vector_db = await self.routing_table.register_vector_db(
-            vector_db_id=vector_db_id,
+        vector_store_id = f"vs_{uuid.uuid4()}"
+        registered_vector_store = await self.routing_table.register_vector_store(
+            vector_store_id=vector_store_id,
             embedding_model=embedding_model,
             embedding_dimension=embedding_dimension,
             provider_id=provider_id,
-            provider_vector_db_id=vector_db_id,
-            vector_db_name=params.name,
+            provider_vector_store_id=vector_store_id,
+            vector_store_name=params.name,
         )
-        provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier)
+        provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier)
 
-        # Update model_extra with registered values so provider uses the already-registered vector_db
+        # Update model_extra with registered values so provider uses the already-registered vector_store
         if params.model_extra is None:
             params.model_extra = {}
-        params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id
-        params.model_extra["provider_id"] = registered_vector_db.provider_id
+        params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id
+        params.model_extra["provider_id"] = registered_vector_store.provider_id
         if embedding_model is not None:
             params.model_extra["embedding_model"] = embedding_model
         if embedding_dimension is not None:
@@ -198,15 +179,15 @@ class VectorIORouter(VectorIO):
         logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}")
         # Route to default provider for now - could aggregate from all providers in the future
         # call retrieve on each vector dbs to get list of vector stores
-        vector_dbs = await self.routing_table.get_all_with_type("vector_db")
+        vector_stores = await self.routing_table.get_all_with_type("vector_store")
         all_stores = []
-        for vector_db in vector_dbs:
+        for vector_store in vector_stores:
             try:
-                provider = await self.routing_table.get_provider_impl(vector_db.identifier)
-                vector_store = await provider.openai_retrieve_vector_store(vector_db.identifier)
+                provider = await self.routing_table.get_provider_impl(vector_store.identifier)
+                vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier)
                 all_stores.append(vector_store)
             except Exception as e:
-                logger.error(f"Error retrieving vector store {vector_db.identifier}: {e}")
+                logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}")
                 continue
 
         # Sort by created_at
diff --git a/llama_stack/core/routing_tables/common.py b/llama_stack/core/routing_tables/common.py
index 087483bb6..d6faf93c5 100644
--- a/llama_stack/core/routing_tables/common.py
+++ b/llama_stack/core/routing_tables/common.py
@@ -41,7 +41,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
     elif api == Api.safety:
         return await p.register_shield(obj)
     elif api == Api.vector_io:
-        return await p.register_vector_db(obj)
+        return await p.register_vector_store(obj)
     elif api == Api.datasetio:
         return await p.register_dataset(obj)
     elif api == Api.scoring:
@@ -57,7 +57,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
 async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
     api = get_impl_api(p)
     if api == Api.vector_io:
-        return await p.unregister_vector_db(obj.identifier)
+        return await p.unregister_vector_store(obj.identifier)
     elif api == Api.inference:
         return await p.unregister_model(obj.identifier)
     elif api == Api.safety:
@@ -108,7 +108,7 @@ class CommonRoutingTableImpl(RoutingTable):
             elif api == Api.safety:
                 p.shield_store = self
             elif api == Api.vector_io:
-                p.vector_db_store = self
+                p.vector_store_store = self
             elif api == Api.datasetio:
                 p.dataset_store = self
             elif api == Api.scoring:
@@ -134,15 +134,15 @@ class CommonRoutingTableImpl(RoutingTable):
         from .scoring_functions import ScoringFunctionsRoutingTable
         from .shields import ShieldsRoutingTable
         from .toolgroups import ToolGroupsRoutingTable
-        from .vector_dbs import VectorDBsRoutingTable
+        from .vector_stores import VectorStoresRoutingTable
 
         def apiname_object():
             if isinstance(self, ModelsRoutingTable):
                 return ("Inference", "model")
             elif isinstance(self, ShieldsRoutingTable):
                 return ("Safety", "shield")
-            elif isinstance(self, VectorDBsRoutingTable):
-                return ("VectorIO", "vector_db")
+            elif isinstance(self, VectorStoresRoutingTable):
+                return ("VectorIO", "vector_store")
             elif isinstance(self, DatasetsRoutingTable):
                 return ("DatasetIO", "dataset")
             elif isinstance(self, ScoringFunctionsRoutingTable):
diff --git a/llama_stack/core/routing_tables/vector_dbs.py b/llama_stack/core/routing_tables/vector_stores.py
similarity index 70%
rename from llama_stack/core/routing_tables/vector_dbs.py
rename to llama_stack/core/routing_tables/vector_stores.py
index e87fb61c6..c6c80a01e 100644
--- a/llama_stack/core/routing_tables/vector_dbs.py
+++ b/llama_stack/core/routing_tables/vector_stores.py
@@ -6,15 +6,12 @@
 
 from typing import Any
 
-from pydantic import TypeAdapter
-
 from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
 from llama_stack.apis.models import ModelType
 from llama_stack.apis.resource import ResourceType
 
-# Removed VectorDBs import to avoid exposing public API
+# Removed VectorStores import to avoid exposing public API
 from llama_stack.apis.vector_io.vector_io import (
-    OpenAICreateVectorStoreRequestWithExtraBody,
     SearchRankingOptions,
     VectorStoreChunkingStrategy,
     VectorStoreDeleteResponse,
@@ -26,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import (
     VectorStoreSearchResponsePage,
 )
 from llama_stack.core.datatypes import (
-    VectorDBWithOwner,
+    VectorStoreWithOwner,
 )
 from llama_stack.log import get_logger
 
@@ -35,23 +32,23 @@ from .common import CommonRoutingTableImpl, lookup_model
 logger = get_logger(name=__name__, category="core::routing_tables")
 
 
-class VectorDBsRoutingTable(CommonRoutingTableImpl):
-    """Internal routing table for vector_db operations.
+class VectorStoresRoutingTable(CommonRoutingTableImpl):
+    """Internal routing table for vector_store operations.
 
-    Does not inherit from VectorDBs to avoid exposing public API endpoints.
+    Does not inherit from VectorStores to avoid exposing public API endpoints.
     Only provides internal routing functionality for VectorIORouter.
     """
 
     # Internal methods only - no public API exposure
 
-    async def register_vector_db(
+    async def register_vector_store(
         self,
-        vector_db_id: str,
+        vector_store_id: str,
         embedding_model: str,
         embedding_dimension: int | None = 384,
         provider_id: str | None = None,
-        provider_vector_db_id: str | None = None,
-        vector_db_name: str | None = None,
+        provider_vector_store_id: str | None = None,
+        vector_store_name: str | None = None,
     ) -> Any:
         if provider_id is None:
             if len(self.impls_by_provider_id) > 0:
@@ -67,52 +64,24 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
             raise ModelNotFoundError(embedding_model)
         if model.model_type != ModelType.embedding:
             raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
-        if "embedding_dimension" not in model.metadata:
-            raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
 
-        try:
-            provider = self.impls_by_provider_id[provider_id]
-        except KeyError:
-            available_providers = list(self.impls_by_provider_id.keys())
-            raise ValueError(
-                f"Provider '{provider_id}' not found in routing table. Available providers: {available_providers}"
-            ) from None
-        logger.warning(
-            "VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
-        )
-        request = OpenAICreateVectorStoreRequestWithExtraBody(
-            name=vector_db_name or vector_db_id,
-            embedding_model=embedding_model,
-            embedding_dimension=model.metadata["embedding_dimension"],
+        vector_store = VectorStoreWithOwner(
+            identifier=vector_store_id,
+            type=ResourceType.vector_store.value,
             provider_id=provider_id,
-            provider_vector_db_id=provider_vector_db_id,
+            provider_resource_id=provider_vector_store_id,
+            embedding_model=embedding_model,
+            embedding_dimension=embedding_dimension,
+            vector_store_name=vector_store_name,
         )
-        vector_store = await provider.openai_create_vector_store(request)
-
-        vector_store_id = vector_store.id
-        actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
-        logger.warning(
-            f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
-        )
-
-        vector_db_data = {
-            "identifier": vector_store_id,
-            "type": ResourceType.vector_db.value,
-            "provider_id": provider_id,
-            "provider_resource_id": actual_provider_vector_db_id,
-            "embedding_model": embedding_model,
-            "embedding_dimension": model.metadata["embedding_dimension"],
-            "vector_db_name": vector_store.name,
-        }
-        vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
-        await self.register_object(vector_db)
-        return vector_db
+        await self.register_object(vector_store)
+        return vector_store
 
     async def openai_retrieve_vector_store(
         self,
         vector_store_id: str,
     ) -> VectorStoreObject:
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_retrieve_vector_store(vector_store_id)
 
@@ -123,7 +92,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         expires_after: dict[str, Any] | None = None,
         metadata: dict[str, Any] | None = None,
     ) -> VectorStoreObject:
-        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        await self.assert_action_allowed("update", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_update_vector_store(
             vector_store_id=vector_store_id,
@@ -136,18 +105,18 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         self,
         vector_store_id: str,
     ) -> VectorStoreDeleteResponse:
-        await self.assert_action_allowed("delete", "vector_db", vector_store_id)
+        await self.assert_action_allowed("delete", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         result = await provider.openai_delete_vector_store(vector_store_id)
-        await self.unregister_vector_db(vector_store_id)
+        await self.unregister_vector_store(vector_store_id)
         return result
 
-    async def unregister_vector_db(self, vector_store_id: str) -> None:
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
         """Remove the vector store from the routing table registry."""
         try:
-            vector_db_obj = await self.get_object_by_identifier("vector_db", vector_store_id)
-            if vector_db_obj:
-                await self.unregister_object(vector_db_obj)
+            vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id)
+            if vector_store_obj:
+                await self.unregister_object(vector_store_obj)
         except Exception as e:
             # Log the error but don't fail the operation
             logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}")
@@ -162,7 +131,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         rewrite_query: bool | None = False,
         search_mode: str | None = "vector",
     ) -> VectorStoreSearchResponsePage:
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_search_vector_store(
             vector_store_id=vector_store_id,
@@ -181,7 +150,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         attributes: dict[str, Any] | None = None,
         chunking_strategy: VectorStoreChunkingStrategy | None = None,
     ) -> VectorStoreFileObject:
-        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        await self.assert_action_allowed("update", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_attach_file_to_vector_store(
             vector_store_id=vector_store_id,
@@ -199,7 +168,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         before: str | None = None,
         filter: VectorStoreFileStatus | None = None,
     ) -> list[VectorStoreFileObject]:
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_list_files_in_vector_store(
             vector_store_id=vector_store_id,
@@ -215,7 +184,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         vector_store_id: str,
         file_id: str,
     ) -> VectorStoreFileObject:
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_retrieve_vector_store_file(
             vector_store_id=vector_store_id,
@@ -227,7 +196,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         vector_store_id: str,
         file_id: str,
     ) -> VectorStoreFileContentsResponse:
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_retrieve_vector_store_file_contents(
             vector_store_id=vector_store_id,
@@ -240,7 +209,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         file_id: str,
         attributes: dict[str, Any],
     ) -> VectorStoreFileObject:
-        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        await self.assert_action_allowed("update", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_update_vector_store_file(
             vector_store_id=vector_store_id,
@@ -253,7 +222,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         vector_store_id: str,
         file_id: str,
     ) -> VectorStoreFileDeleteResponse:
-        await self.assert_action_allowed("delete", "vector_db", vector_store_id)
+        await self.assert_action_allowed("delete", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_delete_vector_store_file(
             vector_store_id=vector_store_id,
@@ -267,7 +236,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         attributes: dict[str, Any] | None = None,
         chunking_strategy: Any | None = None,
     ):
-        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        await self.assert_action_allowed("update", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_create_vector_store_file_batch(
             vector_store_id=vector_store_id,
@@ -281,7 +250,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         batch_id: str,
         vector_store_id: str,
     ):
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_retrieve_vector_store_file_batch(
             batch_id=batch_id,
@@ -298,7 +267,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         limit: int | None = 20,
         order: str | None = "desc",
     ):
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_list_files_in_vector_store_file_batch(
             batch_id=batch_id,
@@ -315,7 +284,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
         batch_id: str,
         vector_store_id: str,
     ):
-        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        await self.assert_action_allowed("update", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_cancel_vector_store_file_batch(
             batch_id=batch_id,
diff --git a/llama_stack/core/server/routes.py b/llama_stack/core/server/routes.py
index 4970d0bf8..ed76ea86f 100644
--- a/llama_stack/core/server/routes.py
+++ b/llama_stack/core/server/routes.py
@@ -13,7 +13,6 @@ from aiohttp import hdrs
 from starlette.routing import Route
 
 from llama_stack.apis.datatypes import Api, ExternalApiSpec
-from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
 from llama_stack.core.resolver import api_protocol_map
 from llama_stack.schema_utils import WebMethod
 
@@ -25,33 +24,16 @@ RouteImpls = dict[str, PathImpl]
 RouteMatch = tuple[EndpointFunc, PathParams, str, WebMethod]
 
 
-def toolgroup_protocol_map():
-    return {
-        SpecialToolGroup.rag_tool: RAGToolRuntime,
-    }
-
-
 def get_all_api_routes(
     external_apis: dict[Api, ExternalApiSpec] | None = None,
 ) -> dict[Api, list[tuple[Route, WebMethod]]]:
     apis = {}
 
     protocols = api_protocol_map(external_apis)
-    toolgroup_protocols = toolgroup_protocol_map()
     for api, protocol in protocols.items():
         routes = []
         protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
 
-        # HACK ALERT
-        if api == Api.tool_runtime:
-            for tool_group in SpecialToolGroup:
-                sub_protocol = toolgroup_protocols[tool_group]
-                sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction)
-                for name, method in sub_protocol_methods:
-                    if not hasattr(method, "__webmethod__"):
-                        continue
-                    protocol_methods.append((f"{tool_group.value}.{name}", method))
-
         for name, method in protocol_methods:
             # Get all webmethods for this method (supports multiple decorators)
             webmethods = getattr(method, "__webmethods__", [])
diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py
index 4cf1d072d..49100b4bc 100644
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@@ -32,7 +32,7 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
 from llama_stack.apis.shields import Shields
 from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration
 from llama_stack.apis.telemetry import Telemetry
-from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
+from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig
@@ -80,7 +80,6 @@ class LlamaStack(
     Inspect,
     ToolGroups,
     ToolRuntime,
-    RAGToolRuntime,
     Files,
     Prompts,
     Conversations,
diff --git a/llama_stack/core/ui/README.md b/llama_stack/core/ui/README.md
index f1d85454b..37f1501c9 100644
--- a/llama_stack/core/ui/README.md
+++ b/llama_stack/core/ui/README.md
@@ -9,7 +9,7 @@
 1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll).
 
 ```
-llama stack build --distro together --image-type venv
+llama stack list-deps together | xargs -L1 uv pip install
 
 llama stack run together
 ```
diff --git a/llama_stack/core/ui/page/playground/tools.py b/llama_stack/core/ui/page/playground/tools.py
index 4ee9d2204..16fd464ee 100644
--- a/llama_stack/core/ui/page/playground/tools.py
+++ b/llama_stack/core/ui/page/playground/tools.py
@@ -32,7 +32,7 @@ def tool_chat_page():
     tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
     mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
     builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
-    selected_vector_dbs = []
+    selected_vector_stores = []
 
     def reset_agent():
         st.session_state.clear()
@@ -55,13 +55,13 @@ def tool_chat_page():
         )
 
         if "builtin::rag" in toolgroup_selection:
-            vector_dbs = llama_stack_api.client.vector_dbs.list() or []
-            if not vector_dbs:
+            vector_stores = llama_stack_api.client.vector_stores.list() or []
+            if not vector_stores:
                 st.info("No vector databases available for selection.")
-            vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
-            selected_vector_dbs = st.multiselect(
+            vector_stores = [vector_store.identifier for vector_store in vector_stores]
+            selected_vector_stores = st.multiselect(
                 label="Select Document Collections to use in RAG queries",
-                options=vector_dbs,
+                options=vector_stores,
                 on_change=reset_agent,
             )
 
@@ -119,7 +119,7 @@ def tool_chat_page():
             tool_dict = dict(
                 name="builtin::rag",
                 args={
-                    "vector_db_ids": list(selected_vector_dbs),
+                    "vector_store_ids": list(selected_vector_stores),
                 },
             )
             toolgroup_selection[i] = tool_dict
diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml
index c01e415a9..3cf43de15 100644
--- a/llama_stack/distributions/ci-tests/build.yaml
+++ b/llama_stack/distributions/ci-tests/build.yaml
@@ -48,7 +48,6 @@ distribution_spec:
     tool_runtime:
     - provider_type: remote::brave-search
     - provider_type: remote::tavily-search
-    - provider_type: inline::rag-runtime
     - provider_type: remote::model-context-protocol
     batches:
     - provider_type: inline::reference
diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml
index ecf9eed3b..f403527fc 100644
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@@ -216,8 +216,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:=}
       max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
   batches:
@@ -263,8 +261,6 @@ registered_resources:
   tool_groups:
   - toolgroup_id: builtin::websearch
     provider_id: tavily-search
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/dell/build.yaml b/llama_stack/distributions/dell/build.yaml
index 7bc26ca9e..0275a47a1 100644
--- a/llama_stack/distributions/dell/build.yaml
+++ b/llama_stack/distributions/dell/build.yaml
@@ -26,7 +26,6 @@ distribution_spec:
     tool_runtime:
     - provider_type: remote::brave-search
     - provider_type: remote::tavily-search
-    - provider_type: inline::rag-runtime
 image_type: venv
 additional_pip_packages:
 - aiosqlite
diff --git a/llama_stack/distributions/dell/dell.py b/llama_stack/distributions/dell/dell.py
index 88e72688f..708ba0b10 100644
--- a/llama_stack/distributions/dell/dell.py
+++ b/llama_stack/distributions/dell/dell.py
@@ -45,7 +45,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             BuildProvider(provider_type="remote::brave-search"),
             BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
         ],
     }
     name = "dell"
@@ -98,10 +97,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::websearch",
             provider_id="brave-search",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/distributions/dell/doc_template.md b/llama_stack/distributions/dell/doc_template.md
index 852e78d0e..4e28673e8 100644
--- a/llama_stack/distributions/dell/doc_template.md
+++ b/llama_stack/distributions/dell/doc_template.md
@@ -157,7 +157,7 @@ docker run \
 Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
 
 ```bash
-llama stack build --distro {{ name }} --image-type conda
+llama stack list-deps {{ name }} | xargs -L1 pip install
 INFERENCE_MODEL=$INFERENCE_MODEL \
 DEH_URL=$DEH_URL \
 CHROMA_URL=$CHROMA_URL \
diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml
index 2563f2f4b..062c50e2b 100644
--- a/llama_stack/distributions/dell/run-with-safety.yaml
+++ b/llama_stack/distributions/dell/run-with-safety.yaml
@@ -87,8 +87,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:=}
       max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
 storage:
   backends:
     kv_default:
@@ -133,8 +131,6 @@ registered_resources:
   tool_groups:
   - toolgroup_id: builtin::websearch
     provider_id: brave-search
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml
index 7bada394f..42e0658bd 100644
--- a/llama_stack/distributions/dell/run.yaml
+++ b/llama_stack/distributions/dell/run.yaml
@@ -83,8 +83,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:=}
       max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
 storage:
   backends:
     kv_default:
@@ -124,8 +122,6 @@ registered_resources:
   tool_groups:
   - toolgroup_id: builtin::websearch
     provider_id: brave-search
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/meta-reference-gpu/build.yaml b/llama_stack/distributions/meta-reference-gpu/build.yaml
index 1513742a7..74da29bb8 100644
--- a/llama_stack/distributions/meta-reference-gpu/build.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/build.yaml
@@ -24,7 +24,6 @@ distribution_spec:
     tool_runtime:
     - provider_type: remote::brave-search
     - provider_type: remote::tavily-search
-    - provider_type: inline::rag-runtime
     - provider_type: remote::model-context-protocol
 image_type: venv
 additional_pip_packages:
diff --git a/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/llama_stack/distributions/meta-reference-gpu/meta_reference.py
index 4e4ddef33..aa66d43a0 100644
--- a/llama_stack/distributions/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/distributions/meta-reference-gpu/meta_reference.py
@@ -47,7 +47,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             BuildProvider(provider_type="remote::brave-search"),
             BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
             BuildProvider(provider_type="remote::model-context-protocol"),
         ],
     }
@@ -92,10 +91,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::websearch",
             provider_id="tavily-search",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
     ]
 
     return DistributionTemplate(
diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
index 01b5db4f9..6e74201db 100644
--- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
@@ -98,8 +98,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:=}
       max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
 storage:
@@ -146,8 +144,6 @@ registered_resources:
   tool_groups:
   - toolgroup_id: builtin::websearch
     provider_id: tavily-search
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml
index 87c33dde0..92934ca74 100644
--- a/llama_stack/distributions/meta-reference-gpu/run.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run.yaml
@@ -88,8 +88,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:=}
       max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
 storage:
@@ -131,8 +129,6 @@ registered_resources:
   tool_groups:
   - toolgroup_id: builtin::websearch
     provider_id: tavily-search
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/nvidia/build.yaml b/llama_stack/distributions/nvidia/build.yaml
index 8ddd12439..3412ea15b 100644
--- a/llama_stack/distributions/nvidia/build.yaml
+++ b/llama_stack/distributions/nvidia/build.yaml
@@ -19,8 +19,7 @@ distribution_spec:
     - provider_type: remote::nvidia
     scoring:
     - provider_type: inline::basic
-    tool_runtime:
-    - provider_type: inline::rag-runtime
+    tool_runtime: []
     files:
     - provider_type: inline::localfs
 image_type: venv
diff --git a/llama_stack/distributions/nvidia/nvidia.py b/llama_stack/distributions/nvidia/nvidia.py
index a92a2e6f8..889f83aa5 100644
--- a/llama_stack/distributions/nvidia/nvidia.py
+++ b/llama_stack/distributions/nvidia/nvidia.py
@@ -28,7 +28,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
             BuildProvider(provider_type="remote::nvidia"),
         ],
         "scoring": [BuildProvider(provider_type="inline::basic")],
-        "tool_runtime": [BuildProvider(provider_type="inline::rag-runtime")],
+        "tool_runtime": [],
         "files": [BuildProvider(provider_type="inline::localfs")],
     }
 
@@ -66,12 +66,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
         provider_id="nvidia",
     )
 
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
+    default_tool_groups: list[ToolGroupInput] = []
 
     return DistributionTemplate(
         name=name,
diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml
index c23d0f9cb..dca29ed2a 100644
--- a/llama_stack/distributions/nvidia/run-with-safety.yaml
+++ b/llama_stack/distributions/nvidia/run-with-safety.yaml
@@ -80,9 +80,7 @@ providers:
   scoring:
   - provider_id: basic
     provider_type: inline::basic
-  tool_runtime:
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
+  tool_runtime: []
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
@@ -128,9 +126,7 @@ registered_resources:
   datasets: []
   scoring_fns: []
   benchmarks: []
-  tool_groups:
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
+  tool_groups: []
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml
index 81e744d53..e35d9c44c 100644
--- a/llama_stack/distributions/nvidia/run.yaml
+++ b/llama_stack/distributions/nvidia/run.yaml
@@ -69,9 +69,7 @@ providers:
   scoring:
   - provider_id: basic
     provider_type: inline::basic
-  tool_runtime:
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
+  tool_runtime: []
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
@@ -107,9 +105,7 @@ registered_resources:
   datasets: []
   scoring_fns: []
   benchmarks: []
-  tool_groups:
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
+  tool_groups: []
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/open-benchmark/build.yaml b/llama_stack/distributions/open-benchmark/build.yaml
index 05acd98e3..9fc0e9eb0 100644
--- a/llama_stack/distributions/open-benchmark/build.yaml
+++ b/llama_stack/distributions/open-benchmark/build.yaml
@@ -28,7 +28,6 @@ distribution_spec:
     tool_runtime:
     - provider_type: remote::brave-search
     - provider_type: remote::tavily-search
-    - provider_type: inline::rag-runtime
     - provider_type: remote::model-context-protocol
 image_type: venv
 additional_pip_packages:
diff --git a/llama_stack/distributions/open-benchmark/open_benchmark.py b/llama_stack/distributions/open-benchmark/open_benchmark.py
index 2b7760894..cceec74fd 100644
--- a/llama_stack/distributions/open-benchmark/open_benchmark.py
+++ b/llama_stack/distributions/open-benchmark/open_benchmark.py
@@ -118,7 +118,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             BuildProvider(provider_type="remote::brave-search"),
             BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
             BuildProvider(provider_type="remote::model-context-protocol"),
         ],
     }
@@ -154,10 +153,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::websearch",
             provider_id="tavily-search",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
     ]
 
     models, _ = get_model_registry(available_models)
diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml
index 4fd0e199b..8f63e4417 100644
--- a/llama_stack/distributions/open-benchmark/run.yaml
+++ b/llama_stack/distributions/open-benchmark/run.yaml
@@ -118,8 +118,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:=}
       max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
 storage:
@@ -244,8 +242,6 @@ registered_resources:
   tool_groups:
   - toolgroup_id: builtin::websearch
     provider_id: tavily-search
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/postgres-demo/build.yaml b/llama_stack/distributions/postgres-demo/build.yaml
index 063dc3999..99b4edeb3 100644
--- a/llama_stack/distributions/postgres-demo/build.yaml
+++ b/llama_stack/distributions/postgres-demo/build.yaml
@@ -14,7 +14,6 @@ distribution_spec:
     tool_runtime:
     - provider_type: remote::brave-search
     - provider_type: remote::tavily-search
-    - provider_type: inline::rag-runtime
     - provider_type: remote::model-context-protocol
 image_type: venv
 additional_pip_packages:
diff --git a/llama_stack/distributions/postgres-demo/postgres_demo.py b/llama_stack/distributions/postgres-demo/postgres_demo.py
index 876370ef3..9f8d35cb1 100644
--- a/llama_stack/distributions/postgres-demo/postgres_demo.py
+++ b/llama_stack/distributions/postgres-demo/postgres_demo.py
@@ -45,7 +45,6 @@ def get_distribution_template() -> DistributionTemplate:
         "tool_runtime": [
             BuildProvider(provider_type="remote::brave-search"),
             BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
             BuildProvider(provider_type="remote::model-context-protocol"),
         ],
     }
@@ -66,10 +65,6 @@ def get_distribution_template() -> DistributionTemplate:
             toolgroup_id="builtin::websearch",
             provider_id="tavily-search",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
     ]
 
     default_models = [
diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml
index 0d7ecff48..67222969c 100644
--- a/llama_stack/distributions/postgres-demo/run.yaml
+++ b/llama_stack/distributions/postgres-demo/run.yaml
@@ -54,8 +54,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:=}
       max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
 storage:
@@ -107,8 +105,6 @@ registered_resources:
   tool_groups:
   - toolgroup_id: builtin::websearch
     provider_id: tavily-search
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/starter-gpu/build.yaml b/llama_stack/distributions/starter-gpu/build.yaml
index b2e2a0c85..678d7995d 100644
--- a/llama_stack/distributions/starter-gpu/build.yaml
+++ b/llama_stack/distributions/starter-gpu/build.yaml
@@ -49,7 +49,6 @@ distribution_spec:
     tool_runtime:
     - provider_type: remote::brave-search
     - provider_type: remote::tavily-search
-    - provider_type: inline::rag-runtime
     - provider_type: remote::model-context-protocol
     batches:
     - provider_type: inline::reference
diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml
index 92483c78e..4764dc02c 100644
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@@ -219,8 +219,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:=}
       max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
   batches:
@@ -266,8 +264,6 @@ registered_resources:
   tool_groups:
   - toolgroup_id: builtin::websearch
     provider_id: tavily-search
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml
index baa80ef3e..e6cd3c688 100644
--- a/llama_stack/distributions/starter/build.yaml
+++ b/llama_stack/distributions/starter/build.yaml
@@ -49,7 +49,6 @@ distribution_spec:
     tool_runtime:
     - provider_type: remote::brave-search
     - provider_type: remote::tavily-search
-    - provider_type: inline::rag-runtime
     - provider_type: remote::model-context-protocol
     batches:
     - provider_type: inline::reference
diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml
index 3b9d8f890..88358501e 100644
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@@ -216,8 +216,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:=}
       max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
   batches:
@@ -263,8 +261,6 @@ registered_resources:
   tool_groups:
   - toolgroup_id: builtin::websearch
     provider_id: tavily-search
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py
index c8c7101a6..bad6279bd 100644
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@@ -140,7 +140,6 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
         "tool_runtime": [
             BuildProvider(provider_type="remote::brave-search"),
             BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
             BuildProvider(provider_type="remote::model-context-protocol"),
         ],
         "batches": [
@@ -162,10 +161,6 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
             toolgroup_id="builtin::websearch",
             provider_id="tavily-search",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
     ]
     default_shields = [
         # if the
diff --git a/llama_stack/distributions/watsonx/build.yaml b/llama_stack/distributions/watsonx/build.yaml
index dba1a94e2..d2c396085 100644
--- a/llama_stack/distributions/watsonx/build.yaml
+++ b/llama_stack/distributions/watsonx/build.yaml
@@ -23,7 +23,6 @@ distribution_spec:
     tool_runtime:
     - provider_type: remote::brave-search
     - provider_type: remote::tavily-search
-    - provider_type: inline::rag-runtime
     - provider_type: remote::model-context-protocol
     files:
     - provider_type: inline::localfs
diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml
index ca3c8402d..ddc7e095f 100644
--- a/llama_stack/distributions/watsonx/run.yaml
+++ b/llama_stack/distributions/watsonx/run.yaml
@@ -83,8 +83,6 @@ providers:
     config:
       api_key: ${env.TAVILY_SEARCH_API_KEY:=}
       max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
   files:
@@ -125,8 +123,6 @@ registered_resources:
   tool_groups:
   - toolgroup_id: builtin::websearch
     provider_id: tavily-search
-  - toolgroup_id: builtin::rag
-    provider_id: rag-runtime
 server:
   port: 8321
 telemetry:
diff --git a/llama_stack/distributions/watsonx/watsonx.py b/llama_stack/distributions/watsonx/watsonx.py
index d79aea872..b16f76fcb 100644
--- a/llama_stack/distributions/watsonx/watsonx.py
+++ b/llama_stack/distributions/watsonx/watsonx.py
@@ -33,7 +33,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
         "tool_runtime": [
             BuildProvider(provider_type="remote::brave-search"),
             BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
             BuildProvider(provider_type="remote::model-context-protocol"),
         ],
         "files": [BuildProvider(provider_type="inline::localfs")],
@@ -50,10 +49,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
             toolgroup_id="builtin::websearch",
             provider_id="tavily-search",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
     ]
 
     files_provider = Provider(
diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py
index c8ff9cecb..9be3edb8e 100644
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@@ -17,7 +17,7 @@ from llama_stack.apis.models import Model
 from llama_stack.apis.scoring_functions import ScoringFn
 from llama_stack.apis.shields import Shield
 from llama_stack.apis.tools import ToolGroup
-from llama_stack.apis.vector_dbs import VectorDB
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.schema_utils import json_schema_type
 
 
@@ -68,10 +68,10 @@ class ShieldsProtocolPrivate(Protocol):
     async def unregister_shield(self, identifier: str) -> None: ...
 
 
-class VectorDBsProtocolPrivate(Protocol):
-    async def register_vector_db(self, vector_db: VectorDB) -> None: ...
+class VectorStoresProtocolPrivate(Protocol):
+    async def register_vector_store(self, vector_store: VectorStore) -> None: ...
 
-    async def unregister_vector_db(self, vector_db_id: str) -> None: ...
+    async def unregister_vector_store(self, vector_store_id: str) -> None: ...
 
 
 class DatasetsProtocolPrivate(Protocol):
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py b/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py
deleted file mode 100644
index 78e49af94..000000000
--- a/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-from datetime import UTC, datetime
-
-from opentelemetry.sdk.trace import ReadableSpan
-from opentelemetry.sdk.trace.export import SpanProcessor
-from opentelemetry.trace.status import StatusCode
-
-from llama_stack.log import get_logger
-
-logger = get_logger(name="console_span_processor", category="telemetry")
-
-
-class ConsoleSpanProcessor(SpanProcessor):
-    def __init__(self, print_attributes: bool = False):
-        self.print_attributes = print_attributes
-
-    def on_start(self, span: ReadableSpan, parent_context=None) -> None:
-        if span.attributes and span.attributes.get("__autotraced__"):
-            return
-
-        timestamp = datetime.fromtimestamp(span.start_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
-        logger.info(f"[dim]{timestamp}[/dim] [bold magenta][START][/bold magenta] [dim]{span.name}[/dim]")
-
-    def on_end(self, span: ReadableSpan) -> None:
-        timestamp = datetime.fromtimestamp(span.end_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
-        span_context = f"[dim]{timestamp}[/dim] [bold magenta][END][/bold magenta] [dim]{span.name}[/dim]"
-        if span.status.status_code == StatusCode.ERROR:
-            span_context += " [bold red][ERROR][/bold red]"
-        elif span.status.status_code != StatusCode.UNSET:
-            span_context += f" [{span.status.status_code}]"
-        duration_ms = (span.end_time - span.start_time) / 1e6
-        span_context += f" ({duration_ms:.2f}ms)"
-        logger.info(span_context)
-
-        if self.print_attributes and span.attributes:
-            for key, value in span.attributes.items():
-                if key.startswith("__"):
-                    continue
-                str_value = str(value)
-                if len(str_value) > 1000:
-                    str_value = str_value[:997] + "..."
-                logger.info(f"    [dim]{key}[/dim]: {str_value}")
-
-        for event in span.events:
-            event_time = datetime.fromtimestamp(event.timestamp / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
-            severity = event.attributes.get("severity", "info")
-            message = event.attributes.get("message", event.name)
-            if isinstance(message, dict) or isinstance(message, list):
-                message = json.dumps(message, indent=2)
-            severity_color = {
-                "error": "red",
-                "warn": "yellow",
-                "info": "white",
-                "debug": "dim",
-            }.get(severity, "white")
-            logger.info(f" {event_time} [bold {severity_color}][{severity.upper()}][/bold {severity_color}] {message}")
-            if event.attributes:
-                for key, value in event.attributes.items():
-                    if key.startswith("__") or key in ["message", "severity"]:
-                        continue
-                    logger.info(f"[dim]{key}[/dim]: {value}")
-
-    def shutdown(self) -> None:
-        """Shutdown the processor."""
-        pass
-
-    def force_flush(self, timeout_millis: float | None = None) -> bool:
-        """Force flush any pending spans."""
-        return True
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
index 2a225476b..b15b1e490 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@@ -79,8 +79,10 @@ class TelemetryAdapter(Telemetry):
                 metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
                 metric_provider = MeterProvider(metric_readers=[metric_reader])
                 metrics.set_meter_provider(metric_provider)
+            self.is_otel_endpoint_set = True
         else:
             logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
+            self.is_otel_endpoint_set = False
 
         self.meter = metrics.get_meter(__name__)
         self._lock = _global_lock
@@ -89,7 +91,8 @@ class TelemetryAdapter(Telemetry):
         pass
 
     async def shutdown(self) -> None:
-        trace.get_tracer_provider().force_flush()
+        if self.is_otel_endpoint_set:
+            trace.get_tracer_provider().force_flush()
 
     async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
         if isinstance(event, UnstructuredLogEvent):
diff --git a/llama_stack/providers/inline/tool_runtime/__init__.py b/llama_stack/providers/inline/tool_runtime/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/llama_stack/providers/inline/tool_runtime/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/llama_stack/providers/inline/tool_runtime/rag/__init__.py
deleted file mode 100644
index f9a7e7b89..000000000
--- a/llama_stack/providers/inline/tool_runtime/rag/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any
-
-from llama_stack.providers.datatypes import Api
-
-from .config import RagToolRuntimeConfig
-
-
-async def get_provider_impl(config: RagToolRuntimeConfig, deps: dict[Api, Any]):
-    from .memory import MemoryToolRuntimeImpl
-
-    impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference], deps[Api.files])
-    await impl.initialize()
-    return impl
diff --git a/llama_stack/providers/inline/tool_runtime/rag/config.py b/llama_stack/providers/inline/tool_runtime/rag/config.py
deleted file mode 100644
index 43ba78e65..000000000
--- a/llama_stack/providers/inline/tool_runtime/rag/config.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any
-
-from pydantic import BaseModel
-
-
-class RagToolRuntimeConfig(BaseModel):
-    @classmethod
-    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
-        return {}
diff --git a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
deleted file mode 100644
index 14cbec49d..000000000
--- a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from jinja2 import Template
-
-from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
-from llama_stack.apis.tools.rag_tool import (
-    DefaultRAGQueryGeneratorConfig,
-    LLMRAGQueryGeneratorConfig,
-    RAGQueryGenerator,
-    RAGQueryGeneratorConfig,
-)
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-
-
-async def generate_rag_query(
-    config: RAGQueryGeneratorConfig,
-    content: InterleavedContent,
-    **kwargs,
-):
-    """
-    Generates a query that will be used for
-    retrieving relevant information from the memory bank.
-    """
-    if config.type == RAGQueryGenerator.default.value:
-        query = await default_rag_query_generator(config, content, **kwargs)
-    elif config.type == RAGQueryGenerator.llm.value:
-        query = await llm_rag_query_generator(config, content, **kwargs)
-    else:
-        raise NotImplementedError(f"Unsupported memory query generator {config.type}")
-    return query
-
-
-async def default_rag_query_generator(
-    config: DefaultRAGQueryGeneratorConfig,
-    content: InterleavedContent,
-    **kwargs,
-):
-    return interleaved_content_as_str(content, sep=config.separator)
-
-
-async def llm_rag_query_generator(
-    config: LLMRAGQueryGeneratorConfig,
-    content: InterleavedContent,
-    **kwargs,
-):
-    assert "inference_api" in kwargs, "LLMRAGQueryGenerator needs inference_api"
-    inference_api = kwargs["inference_api"]
-
-    messages = []
-    if isinstance(content, list):
-        messages = [interleaved_content_as_str(m) for m in content]
-    else:
-        messages = [interleaved_content_as_str(content)]
-
-    template = Template(config.template)
-    rendered_content: str = template.render({"messages": messages})
-
-    model = config.model
-    message = OpenAIUserMessageParam(content=rendered_content)
-    params = OpenAIChatCompletionRequestWithExtraBody(
-        model=model,
-        messages=[message],
-        stream=False,
-    )
-    response = await inference_api.openai_chat_completion(params)
-
-    query = response.choices[0].message.content
-
-    return query
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
deleted file mode 100644
index dc3dfbbca..000000000
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ /dev/null
@@ -1,332 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import asyncio
-import base64
-import io
-import mimetypes
-from typing import Any
-
-import httpx
-from fastapi import UploadFile
-from pydantic import TypeAdapter
-
-from llama_stack.apis.common.content_types import (
-    URL,
-    InterleavedContent,
-    InterleavedContentItem,
-    TextContentItem,
-)
-from llama_stack.apis.files import Files, OpenAIFilePurpose
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.tools import (
-    ListToolDefsResponse,
-    RAGDocument,
-    RAGQueryConfig,
-    RAGQueryResult,
-    RAGToolRuntime,
-    ToolDef,
-    ToolGroup,
-    ToolInvocationResult,
-    ToolRuntime,
-)
-from llama_stack.apis.vector_io import (
-    QueryChunksResponse,
-    VectorIO,
-    VectorStoreChunkingStrategyStatic,
-    VectorStoreChunkingStrategyStaticConfig,
-)
-from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
-from llama_stack.providers.utils.memory.vector_store import parse_data_url
-
-from .config import RagToolRuntimeConfig
-from .context_retriever import generate_rag_query
-
-log = get_logger(name=__name__, category="tool_runtime")
-
-
-async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
-    """Get raw binary data and mime type from a RAGDocument for file upload."""
-    if isinstance(doc.content, URL):
-        if doc.content.uri.startswith("data:"):
-            parts = parse_data_url(doc.content.uri)
-            mime_type = parts["mimetype"]
-            data = parts["data"]
-
-            if parts["is_base64"]:
-                file_data = base64.b64decode(data)
-            else:
-                file_data = data.encode("utf-8")
-
-            return file_data, mime_type
-        else:
-            async with httpx.AsyncClient() as client:
-                r = await client.get(doc.content.uri)
-                r.raise_for_status()
-                mime_type = r.headers.get("content-type", "application/octet-stream")
-                return r.content, mime_type
-    else:
-        if isinstance(doc.content, str):
-            content_str = doc.content
-        else:
-            content_str = interleaved_content_as_str(doc.content)
-
-        if content_str.startswith("data:"):
-            parts = parse_data_url(content_str)
-            mime_type = parts["mimetype"]
-            data = parts["data"]
-
-            if parts["is_base64"]:
-                file_data = base64.b64decode(data)
-            else:
-                file_data = data.encode("utf-8")
-
-            return file_data, mime_type
-        else:
-            return content_str.encode("utf-8"), "text/plain"
-
-
-class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
-    def __init__(
-        self,
-        config: RagToolRuntimeConfig,
-        vector_io_api: VectorIO,
-        inference_api: Inference,
-        files_api: Files,
-    ):
-        self.config = config
-        self.vector_io_api = vector_io_api
-        self.inference_api = inference_api
-        self.files_api = files_api
-
-    async def initialize(self):
-        pass
-
-    async def shutdown(self):
-        pass
-
-    async def register_toolgroup(self, toolgroup: ToolGroup) -> None:
-        pass
-
-    async def unregister_toolgroup(self, toolgroup_id: str) -> None:
-        return
-
-    async def insert(
-        self,
-        documents: list[RAGDocument],
-        vector_db_id: str,
-        chunk_size_in_tokens: int = 512,
-    ) -> None:
-        if not documents:
-            return
-
-        for doc in documents:
-            try:
-                try:
-                    file_data, mime_type = await raw_data_from_doc(doc)
-                except Exception as e:
-                    log.error(f"Failed to extract content from document {doc.document_id}: {e}")
-                    continue
-
-                file_extension = mimetypes.guess_extension(mime_type) or ".txt"
-                filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}")
-
-                file_obj = io.BytesIO(file_data)
-                file_obj.name = filename
-
-                upload_file = UploadFile(file=file_obj, filename=filename)
-
-                try:
-                    created_file = await self.files_api.openai_upload_file(
-                        file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
-                    )
-                except Exception as e:
-                    log.error(f"Failed to upload file for document {doc.document_id}: {e}")
-                    continue
-
-                chunking_strategy = VectorStoreChunkingStrategyStatic(
-                    static=VectorStoreChunkingStrategyStaticConfig(
-                        max_chunk_size_tokens=chunk_size_in_tokens,
-                        chunk_overlap_tokens=chunk_size_in_tokens // 4,
-                    )
-                )
-
-                try:
-                    await self.vector_io_api.openai_attach_file_to_vector_store(
-                        vector_store_id=vector_db_id,
-                        file_id=created_file.id,
-                        attributes=doc.metadata,
-                        chunking_strategy=chunking_strategy,
-                    )
-                except Exception as e:
-                    log.error(
-                        f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}"
-                    )
-                    continue
-
-            except Exception as e:
-                log.error(f"Unexpected error processing document {doc.document_id}: {e}")
-                continue
-
-    async def query(
-        self,
-        content: InterleavedContent,
-        vector_db_ids: list[str],
-        query_config: RAGQueryConfig | None = None,
-    ) -> RAGQueryResult:
-        if not vector_db_ids:
-            raise ValueError(
-                "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
-            )
-
-        query_config = query_config or RAGQueryConfig()
-        query = await generate_rag_query(
-            query_config.query_generator_config,
-            content,
-            inference_api=self.inference_api,
-        )
-        tasks = [
-            self.vector_io_api.query_chunks(
-                vector_db_id=vector_db_id,
-                query=query,
-                params={
-                    "mode": query_config.mode,
-                    "max_chunks": query_config.max_chunks,
-                    "score_threshold": 0.0,
-                    "ranker": query_config.ranker,
-                },
-            )
-            for vector_db_id in vector_db_ids
-        ]
-        results: list[QueryChunksResponse] = await asyncio.gather(*tasks)
-
-        chunks = []
-        scores = []
-
-        for vector_db_id, result in zip(vector_db_ids, results, strict=False):
-            for chunk, score in zip(result.chunks, result.scores, strict=False):
-                if not hasattr(chunk, "metadata") or chunk.metadata is None:
-                    chunk.metadata = {}
-                chunk.metadata["vector_db_id"] = vector_db_id
-
-                chunks.append(chunk)
-                scores.append(score)
-
-        if not chunks:
-            return RAGQueryResult(content=None)
-
-        # sort by score
-        chunks, scores = zip(*sorted(zip(chunks, scores, strict=False), key=lambda x: x[1], reverse=True), strict=False)  # type: ignore
-        chunks = chunks[: query_config.max_chunks]
-
-        tokens = 0
-        picked: list[InterleavedContentItem] = [
-            TextContentItem(
-                text=f"knowledge_search tool found {len(chunks)} chunks:\nBEGIN of knowledge_search tool results.\n"
-            )
-        ]
-        for i, chunk in enumerate(chunks):
-            metadata = chunk.metadata
-            tokens += metadata.get("token_count", 0)
-            tokens += metadata.get("metadata_token_count", 0)
-
-            if tokens > query_config.max_tokens_in_context:
-                log.error(
-                    f"Using {len(picked)} chunks; reached max tokens in context: {tokens}",
-                )
-                break
-
-            # Add useful keys from chunk_metadata to metadata and remove some from metadata
-            chunk_metadata_keys_to_include_from_context = [
-                "chunk_id",
-                "document_id",
-                "source",
-            ]
-            metadata_keys_to_exclude_from_context = [
-                "token_count",
-                "metadata_token_count",
-                "vector_db_id",
-            ]
-            metadata_for_context = {}
-            for k in chunk_metadata_keys_to_include_from_context:
-                metadata_for_context[k] = getattr(chunk.chunk_metadata, k)
-            for k in metadata:
-                if k not in metadata_keys_to_exclude_from_context:
-                    metadata_for_context[k] = metadata[k]
-
-            text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context)
-            picked.append(TextContentItem(text=text_content))
-
-        picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
-        picked.append(
-            TextContentItem(
-                text=f'The above results were retrieved to help answer the user\'s query: "{interleaved_content_as_str(content)}". Use them as supporting information only in answering this query.\n',
-            )
-        )
-
-        return RAGQueryResult(
-            content=picked,
-            metadata={
-                "document_ids": [c.document_id for c in chunks[: len(picked)]],
-                "chunks": [c.content for c in chunks[: len(picked)]],
-                "scores": scores[: len(picked)],
-                "vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]],
-            },
-        )
-
-    async def list_runtime_tools(
-        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
-    ) -> ListToolDefsResponse:
-        # Parameters are not listed since these methods are not yet invoked automatically
-        # by the LLM. The method is only implemented so things like /tools can list without
-        # encountering fatals.
-        return ListToolDefsResponse(
-            data=[
-                ToolDef(
-                    name="insert_into_memory",
-                    description="Insert documents into memory",
-                ),
-                ToolDef(
-                    name="knowledge_search",
-                    description="Search for information in a database.",
-                    input_schema={
-                        "type": "object",
-                        "properties": {
-                            "query": {
-                                "type": "string",
-                                "description": "The query to search for. Can be a natural language sentence or keywords.",
-                            }
-                        },
-                        "required": ["query"],
-                    },
-                ),
-            ]
-        )
-
-    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
-        vector_db_ids = kwargs.get("vector_db_ids", [])
-        query_config = kwargs.get("query_config")
-        if query_config:
-            query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
-        else:
-            query_config = RAGQueryConfig()
-
-        query = kwargs["query"]
-        result = await self.query(
-            content=query,
-            vector_db_ids=vector_db_ids,
-            query_config=query_config,
-        )
-
-        return ToolInvocationResult(
-            content=result.content or [],
-            metadata={
-                **(result.metadata or {}),
-                "citation_files": getattr(result, "citation_files", None),
-            },
-        )
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index f13eb3e96..5e33d4ca3 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -17,21 +17,21 @@ from numpy.typing import NDArray
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import FaissVectorIOConfig
 
 logger = get_logger(name=__name__, category="vector_io")
 
 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
 FAISS_INDEX_PREFIX = f"faiss_index:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
@@ -176,28 +176,28 @@ class FaissIndex(EmbeddingIndex):
         )
 
 
-class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
     def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.cache: dict[str, VectorDBWithIndex] = {}
+        self.cache: dict[str, VectorStoreWithIndex] = {}
 
     async def initialize(self) -> None:
         self.kvstore = await kvstore_impl(self.config.persistence)
         # Load existing banks from kvstore
         start_key = VECTOR_DBS_PREFIX
         end_key = f"{VECTOR_DBS_PREFIX}\xff"
-        stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
+        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
 
-        for vector_db_data in stored_vector_dbs:
-            vector_db = VectorDB.model_validate_json(vector_db_data)
-            index = VectorDBWithIndex(
-                vector_db,
-                await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
+        for vector_store_data in stored_vector_stores:
+            vector_store = VectorStore.model_validate_json(vector_store_data)
+            index = VectorStoreWithIndex(
+                vector_store,
+                await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
                 self.inference_api,
             )
-            self.cache[vector_db.identifier] = index
+            self.cache[vector_store.identifier] = index
 
         # Load existing OpenAI vector stores into the in-memory cache
         await self.initialize_openai_vector_stores()
@@ -222,32 +222,31 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
         except Exception as e:
             return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
 
-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
         assert self.kvstore is not None
 
-        key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
-        await self.kvstore.set(key=key, value=vector_db.model_dump_json())
+        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
+        await self.kvstore.set(key=key, value=vector_store.model_dump_json())
 
         # Store in cache
-        self.cache[vector_db.identifier] = VectorDBWithIndex(
-            vector_db=vector_db,
-            index=await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
+        self.cache[vector_store.identifier] = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
             inference_api=self.inference_api,
         )
 
-    async def list_vector_dbs(self) -> list[VectorDB]:
-        return [i.vector_db for i in self.cache.values()]
+    async def list_vector_stores(self) -> list[VectorStore]:
+        return [i.vector_store for i in self.cache.values()]
 
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
         assert self.kvstore is not None
 
-        if vector_db_id not in self.cache:
-            logger.warning(f"Vector DB {vector_db_id} not found")
+        if vector_store_id not in self.cache:
             return
 
-        await self.cache[vector_db_id].index.delete()
-        del self.cache[vector_db_id]
-        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
+        await self.cache[vector_store_id].index.delete()
+        del self.cache[vector_store_id]
+        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
 
     async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
         index = self.cache.get(vector_db_id)
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index cfe23bde5..37294f173 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -17,10 +17,10 @@ from numpy.typing import NDArray
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     RERANKER_TYPE_RRF,
     ChunkForDeletion,
     EmbeddingIndex,
-    VectorDBWithIndex,
+    VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
 
@@ -41,7 +41,7 @@ HYBRID_SEARCH = "hybrid"
 SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH, HYBRID_SEARCH}
 
 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:sqlite_vec:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:sqlite_vec:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:sqlite_vec:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:sqlite_vec:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:sqlite_vec:{VERSION}::"
@@ -374,32 +374,32 @@ class SQLiteVecIndex(EmbeddingIndex):
         await asyncio.to_thread(_delete_chunks)
 
 
-class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
     """
     A VectorIO implementation using SQLite + sqlite_vec.
-    This class handles vector database registration (with metadata stored in a table named `vector_dbs`)
-    and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex).
+    This class handles vector database registration (with metadata stored in a table named `vector_stores`)
+    and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex).
     """
 
     def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.cache: dict[str, VectorDBWithIndex] = {}
-        self.vector_db_store = None
+        self.cache: dict[str, VectorStoreWithIndex] = {}
+        self.vector_store_table = None
 
     async def initialize(self) -> None:
         self.kvstore = await kvstore_impl(self.config.persistence)
 
         start_key = VECTOR_DBS_PREFIX
         end_key = f"{VECTOR_DBS_PREFIX}\xff"
-        stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
-        for db_json in stored_vector_dbs:
-            vector_db = VectorDB.model_validate_json(db_json)
+        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
+        for db_json in stored_vector_stores:
+            vector_store = VectorStore.model_validate_json(db_json)
             index = await SQLiteVecIndex.create(
-                vector_db.embedding_dimension, self.config.db_path, vector_db.identifier
+                vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
             )
-            self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
+            self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
 
         # Load existing OpenAI vector stores into the in-memory cache
         await self.initialize_openai_vector_stores()
@@ -408,63 +408,64 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
         # Clean up mixin resources (file batch tasks)
         await super().shutdown()
 
-    async def list_vector_dbs(self) -> list[VectorDB]:
-        return [v.vector_db for v in self.cache.values()]
+    async def list_vector_stores(self) -> list[VectorStore]:
+        return [v.vector_store for v in self.cache.values()]
 
-    async def register_vector_db(self, vector_db: VectorDB) -> None:
-        index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier)
-        self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
+        index = await SQLiteVecIndex.create(
+            vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
+        )
+        self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
 
-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]
 
-        if self.vector_db_store is None:
-            raise VectorStoreNotFoundError(vector_db_id)
+        if self.vector_store_table is None:
+            raise VectorStoreNotFoundError(vector_store_id)
 
-        vector_db = self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise VectorStoreNotFoundError(vector_db_id)
+        vector_store = self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise VectorStoreNotFoundError(vector_store_id)
 
-        index = VectorDBWithIndex(
-            vector_db=vector_db,
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
             index=SQLiteVecIndex(
-                dimension=vector_db.embedding_dimension,
+                dimension=vector_store.embedding_dimension,
                 db_path=self.config.db_path,
-                bank_id=vector_db.identifier,
+                bank_id=vector_store.identifier,
                 kvstore=self.kvstore,
             ),
             inference_api=self.inference_api,
         )
-        self.cache[vector_db_id] = index
+        self.cache[vector_store_id] = index
         return index
 
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        if vector_db_id not in self.cache:
-            logger.warning(f"Vector DB {vector_db_id} not found")
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
+        if vector_store_id not in self.cache:
             return
-        await self.cache[vector_db_id].index.delete()
-        del self.cache[vector_db_id]
+        await self.cache[vector_store_id].index.delete()
+        del self.cache[vector_store_id]
 
     async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
-        # The VectorDBWithIndex helper is expected to compute embeddings via the inference_api
+        # The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
         # and then call our index's add_chunks.
         await index.insert_chunks(chunks)
 
     async def query_chunks(
         self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
         return await index.query_chunks(query, params)
 
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
         """Delete chunks from a sqlite_vec index."""
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
         if not index:
             raise VectorStoreNotFoundError(store_id)
 
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index 35afb296d..2e52e2d12 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -42,6 +42,7 @@ def available_providers() -> list[ProviderSpec]:
             # CrossEncoder depends on torchao.quantization
             pip_packages=[
                 "torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
+                "numpy tqdm transformers",
                 "sentence-transformers --no-deps",
                 # required by some SentenceTransformers architectures for tensor rearrange/merge ops
                 "einops",
diff --git a/llama_stack/providers/registry/tool_runtime.py b/llama_stack/providers/registry/tool_runtime.py
index 39dc7fccd..514d9d0a0 100644
--- a/llama_stack/providers/registry/tool_runtime.py
+++ b/llama_stack/providers/registry/tool_runtime.py
@@ -7,33 +7,13 @@
 
 from llama_stack.providers.datatypes import (
     Api,
-    InlineProviderSpec,
     ProviderSpec,
     RemoteProviderSpec,
 )
-from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
 
 
 def available_providers() -> list[ProviderSpec]:
     return [
-        InlineProviderSpec(
-            api=Api.tool_runtime,
-            provider_type="inline::rag-runtime",
-            pip_packages=DEFAULT_VECTOR_IO_DEPS
-            + [
-                "tqdm",
-                "numpy",
-                "scikit-learn",
-                "scipy",
-                "nltk",
-                "sentencepiece",
-                "transformers",
-            ],
-            module="llama_stack.providers.inline.tool_runtime.rag",
-            config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
-            api_dependencies=[Api.vector_io, Api.inference, Api.files],
-            description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
-        ),
         RemoteProviderSpec(
             api=Api.tool_runtime,
             adapter_type="brave-search",
diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py
index ff3b8486f..db81ea35d 100644
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@@ -119,7 +119,7 @@ Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, i
 #### Empirical Example
 
 Consider the histogram below in which 10,000 randomly generated strings were inserted
-in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
+in batches of 100 into both Faiss and sqlite-vec.
 
 ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
 :alt: Comparison of SQLite-Vec and Faiss write times
diff --git a/llama_stack/providers/remote/datasetio/nvidia/README.md b/llama_stack/providers/remote/datasetio/nvidia/README.md
index 74e0895f4..da57d5550 100644
--- a/llama_stack/providers/remote/datasetio/nvidia/README.md
+++ b/llama_stack/providers/remote/datasetio/nvidia/README.md
@@ -20,7 +20,7 @@ This provider enables dataset management using NVIDIA's NeMo Customizer service.
 Build the NVIDIA environment:
 
 ```bash
-llama stack build --distro nvidia --image-type venv
+uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```
 
 ### Basic Usage using the LlamaStack Python Client
diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
index 692b9125b..f1a828413 100644
--- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
+++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@@ -18,7 +18,7 @@ This provider enables running inference using NVIDIA NIM.
 Build the NVIDIA environment:
 
 ```bash
-llama stack build --distro nvidia --image-type venv
+uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```
 
 ### Basic Usage using the LlamaStack Python Client
diff --git a/llama_stack/providers/remote/inference/nvidia/__init__.py b/llama_stack/providers/remote/inference/nvidia/__init__.py
index 1869cb748..b4926f33e 100644
--- a/llama_stack/providers/remote/inference/nvidia/__init__.py
+++ b/llama_stack/providers/remote/inference/nvidia/__init__.py
@@ -10,7 +10,7 @@ from .config import NVIDIAConfig
 
 
 async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference:
-    # import dynamically so `llama stack build` does not fail due to missing dependencies
+    # import dynamically so `llama stack list-deps` does not fail due to missing dependencies
     from .nvidia import NVIDIAInferenceAdapter
 
     if not isinstance(config, NVIDIAConfig):
diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/llama_stack/providers/remote/post_training/nvidia/README.md
index 9b088a615..789514b1e 100644
--- a/llama_stack/providers/remote/post_training/nvidia/README.md
+++ b/llama_stack/providers/remote/post_training/nvidia/README.md
@@ -22,7 +22,7 @@ This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service
 Build the NVIDIA environment:
 
 ```bash
-llama stack build --distro nvidia --image-type venv
+uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```
 
 ### Basic Usage using the LlamaStack Python Client
diff --git a/llama_stack/providers/remote/safety/nvidia/README.md b/llama_stack/providers/remote/safety/nvidia/README.md
index 784ab464f..e589afe84 100644
--- a/llama_stack/providers/remote/safety/nvidia/README.md
+++ b/llama_stack/providers/remote/safety/nvidia/README.md
@@ -19,7 +19,7 @@ This provider enables safety checks and guardrails for LLM interactions using NV
 Build the NVIDIA environment:
 
 ```bash
-llama stack build --distro nvidia --image-type venv
+uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```
 
 ### Basic Usage using the LlamaStack Python Client
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 0aa728c32..2663ad43e 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -13,15 +13,15 @@ from numpy.typing import NDArray
 
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
 
@@ -30,7 +30,7 @@ log = get_logger(name=__name__, category="vector_io::chroma")
 ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI
 
 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:chroma:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:chroma:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:chroma:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:chroma:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:chroma:{VERSION}::"
@@ -114,7 +114,7 @@ class ChromaIndex(EmbeddingIndex):
         raise NotImplementedError("Hybrid search is not supported in Chroma")
 
 
-class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
     def __init__(
         self,
         config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
@@ -127,11 +127,11 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self.inference_api = inference_api
         self.client = None
         self.cache = {}
-        self.vector_db_store = None
+        self.vector_store_table = None
 
     async def initialize(self) -> None:
         self.kvstore = await kvstore_impl(self.config.persistence)
-        self.vector_db_store = self.kvstore
+        self.vector_store_table = self.kvstore
 
         if isinstance(self.config, RemoteChromaVectorIOConfig):
             log.info(f"Connecting to Chroma server at: {self.config.url}")
@@ -151,26 +151,26 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         # Clean up mixin resources (file batch tasks)
         await super().shutdown()
 
-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
         collection = await maybe_await(
             self.client.get_or_create_collection(
-                name=vector_db.identifier, metadata={"vector_db": vector_db.model_dump_json()}
+                name=vector_store.identifier, metadata={"vector_store": vector_store.model_dump_json()}
             )
         )
-        self.cache[vector_db.identifier] = VectorDBWithIndex(
-            vector_db, ChromaIndex(self.client, collection), self.inference_api
+        self.cache[vector_store.identifier] = VectorStoreWithIndex(
+            vector_store, ChromaIndex(self.client, collection), self.inference_api
         )
 
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        if vector_db_id not in self.cache:
-            log.warning(f"Vector DB {vector_db_id} not found")
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
+        if vector_store_id not in self.cache:
+            log.warning(f"Vector DB {vector_store_id} not found")
             return
 
-        await self.cache[vector_db_id].index.delete()
-        del self.cache[vector_db_id]
+        await self.cache[vector_store_id].index.delete()
+        del self.cache[vector_store_id]
 
     async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
         if index is None:
             raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
 
@@ -179,30 +179,30 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
     async def query_chunks(
         self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
 
         if index is None:
             raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
 
         return await index.query_chunks(query, params)
 
-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]
 
-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
-        collection = await maybe_await(self.client.get_collection(vector_db_id))
+        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
+        collection = await maybe_await(self.client.get_collection(vector_store_id))
         if not collection:
-            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
-        index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
-        self.cache[vector_db_id] = index
+            raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
+        index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api)
+        self.cache[vector_store_id] = index
         return index
 
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
         """Delete chunks from a Chroma vector store."""
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
         if not index:
             raise ValueError(f"Vector DB {store_id} not found")
 
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index d7c34163d..cccf13816 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -14,10 +14,10 @@ from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusC
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
@@ -26,7 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     RERANKER_TYPE_WEIGHTED,
     ChunkForDeletion,
     EmbeddingIndex,
-    VectorDBWithIndex,
+    VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
 
@@ -35,7 +35,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
 logger = get_logger(name=__name__, category="vector_io::milvus")
 
 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:milvus:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:milvus:{VERSION}::"
@@ -261,7 +261,7 @@ class MilvusIndex(EmbeddingIndex):
             raise
 
 
-class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
     def __init__(
         self,
         config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
@@ -273,28 +273,28 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self.cache = {}
         self.client = None
         self.inference_api = inference_api
-        self.vector_db_store = None
+        self.vector_store_table = None
         self.metadata_collection_name = "openai_vector_stores_metadata"
 
     async def initialize(self) -> None:
         self.kvstore = await kvstore_impl(self.config.persistence)
         start_key = VECTOR_DBS_PREFIX
         end_key = f"{VECTOR_DBS_PREFIX}\xff"
-        stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
+        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
 
-        for vector_db_data in stored_vector_dbs:
-            vector_db = VectorDB.model_validate_json(vector_db_data)
-            index = VectorDBWithIndex(
-                vector_db,
+        for vector_store_data in stored_vector_stores:
+            vector_store = VectorStore.model_validate_json(vector_store_data)
+            index = VectorStoreWithIndex(
+                vector_store,
                 index=MilvusIndex(
                     client=self.client,
-                    collection_name=vector_db.identifier,
+                    collection_name=vector_store.identifier,
                     consistency_level=self.config.consistency_level,
                     kvstore=self.kvstore,
                 ),
                 inference_api=self.inference_api,
             )
-            self.cache[vector_db.identifier] = index
+            self.cache[vector_store.identifier] = index
         if isinstance(self.config, RemoteMilvusVectorIOConfig):
             logger.info(f"Connecting to Milvus server at {self.config.uri}")
             self.client = MilvusClient(**self.config.model_dump(exclude_none=True))
@@ -311,45 +311,45 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         # Clean up mixin resources (file batch tasks)
         await super().shutdown()
 
-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
         if isinstance(self.config, RemoteMilvusVectorIOConfig):
             consistency_level = self.config.consistency_level
         else:
             consistency_level = "Strong"
-        index = VectorDBWithIndex(
-            vector_db=vector_db,
-            index=MilvusIndex(self.client, vector_db.identifier, consistency_level=consistency_level),
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level),
             inference_api=self.inference_api,
         )
 
-        self.cache[vector_db.identifier] = index
+        self.cache[vector_store.identifier] = index
 
-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]
 
-        if self.vector_db_store is None:
-            raise VectorStoreNotFoundError(vector_db_id)
+        if self.vector_store_table is None:
+            raise VectorStoreNotFoundError(vector_store_id)
 
-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise VectorStoreNotFoundError(vector_db_id)
+        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise VectorStoreNotFoundError(vector_store_id)
 
-        index = VectorDBWithIndex(
-            vector_db=vector_db,
-            index=MilvusIndex(client=self.client, collection_name=vector_db.identifier, kvstore=self.kvstore),
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
             inference_api=self.inference_api,
         )
-        self.cache[vector_db_id] = index
+        self.cache[vector_store_id] = index
         return index
 
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        if vector_db_id in self.cache:
-            await self.cache[vector_db_id].index.delete()
-            del self.cache[vector_db_id]
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
+        if vector_store_id in self.cache:
+            await self.cache[vector_store_id].index.delete()
+            del self.cache[vector_store_id]
 
     async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
 
@@ -358,14 +358,14 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
     async def query_chunks(
         self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
         return await index.query_chunks(query, params)
 
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
         """Delete a chunk from a milvus vector store."""
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
         if not index:
             raise VectorStoreNotFoundError(store_id)
 
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 703a47843..f28bd3cd9 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -16,15 +16,15 @@ from pydantic import BaseModel, TypeAdapter
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
 
 from .config import PGVectorVectorIOConfig
@@ -32,7 +32,7 @@ from .config import PGVectorVectorIOConfig
 log = get_logger(name=__name__, category="vector_io::pgvector")
 
 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:pgvector:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:pgvector:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:pgvector:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:pgvector:{VERSION}::"
@@ -79,13 +79,13 @@ class PGVectorIndex(EmbeddingIndex):
 
     def __init__(
         self,
-        vector_db: VectorDB,
+        vector_store: VectorStore,
         dimension: int,
         conn: psycopg2.extensions.connection,
         kvstore: KVStore | None = None,
         distance_metric: str = "COSINE",
     ):
-        self.vector_db = vector_db
+        self.vector_store = vector_store
         self.dimension = dimension
         self.conn = conn
         self.kvstore = kvstore
@@ -97,9 +97,9 @@ class PGVectorIndex(EmbeddingIndex):
         try:
             with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                 # Sanitize the table name by replacing hyphens with underscores
-                # SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens
+                # SQL doesn't allow hyphens in table names, and vector_store.identifier may contain hyphens
                 # when created with patterns like "test-vector-db-{uuid4()}"
-                sanitized_identifier = sanitize_collection_name(self.vector_db.identifier)
+                sanitized_identifier = sanitize_collection_name(self.vector_store.identifier)
                 self.table_name = f"vs_{sanitized_identifier}"
 
                 cur.execute(
@@ -122,8 +122,8 @@ class PGVectorIndex(EmbeddingIndex):
                 """
                 )
         except Exception as e:
-            log.exception(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}")
-            raise RuntimeError(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") from e
+            log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
+            raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
 
     async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
         assert len(chunks) == len(embeddings), (
@@ -323,7 +323,7 @@ class PGVectorIndex(EmbeddingIndex):
             )
 
 
-class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
     def __init__(
         self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
     ) -> None:
@@ -332,7 +332,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
         self.inference_api = inference_api
         self.conn = None
         self.cache = {}
-        self.vector_db_store = None
+        self.vector_store_table = None
         self.metadata_collection_name = "openai_vector_stores_metadata"
 
     async def initialize(self) -> None:
@@ -375,59 +375,59 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
         # Clean up mixin resources (file batch tasks)
         await super().shutdown()
 
-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
         # Persist vector DB metadata in the KV store
         assert self.kvstore is not None
         # Upsert model metadata in Postgres
-        upsert_models(self.conn, [(vector_db.identifier, vector_db)])
+        upsert_models(self.conn, [(vector_store.identifier, vector_store)])
 
         # Create and cache the PGVector index table for the vector DB
         pgvector_index = PGVectorIndex(
-            vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore
+            vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore
         )
         await pgvector_index.initialize()
-        index = VectorDBWithIndex(vector_db, index=pgvector_index, inference_api=self.inference_api)
-        self.cache[vector_db.identifier] = index
+        index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
+        self.cache[vector_store.identifier] = index
 
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
         # Remove provider index and cache
-        if vector_db_id in self.cache:
-            await self.cache[vector_db_id].index.delete()
-            del self.cache[vector_db_id]
+        if vector_store_id in self.cache:
+            await self.cache[vector_store_id].index.delete()
+            del self.cache[vector_store_id]
 
         # Delete vector DB metadata from KV store
         assert self.kvstore is not None
-        await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_db_id}")
+        await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
 
     async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
         await index.insert_chunks(chunks)
 
     async def query_chunks(
         self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
         return await index.query_chunks(query, params)
 
-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]
 
-        if self.vector_db_store is None:
-            raise VectorStoreNotFoundError(vector_db_id)
+        if self.vector_store_table is None:
+            raise VectorStoreNotFoundError(vector_store_id)
 
-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise VectorStoreNotFoundError(vector_db_id)
+        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise VectorStoreNotFoundError(vector_store_id)
 
-        index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
+        index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
         await index.initialize()
-        self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
-        return self.cache[vector_db_id]
+        self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)
+        return self.cache[vector_store_id]
 
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
         """Delete a chunk from a PostgreSQL vector store."""
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
         if not index:
             raise VectorStoreNotFoundError(store_id)
 
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 6838d69e9..93d0894a6 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -16,7 +16,6 @@ from qdrant_client.models import PointStruct
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
     QueryChunksResponse,
@@ -24,12 +23,13 @@ from llama_stack.apis.vector_io import (
     VectorStoreChunkingStrategy,
     VectorStoreFileObject,
 )
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
 
@@ -38,7 +38,7 @@ CHUNK_ID_KEY = "_chunk_id"
 
 # KV store prefixes for vector databases
 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:qdrant:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:qdrant:{VERSION}::"
 
 
 def convert_id(_id: str) -> str:
@@ -145,7 +145,7 @@ class QdrantIndex(EmbeddingIndex):
         await self.client.delete_collection(collection_name=self.collection_name)
 
 
-class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
     def __init__(
         self,
         config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig,
@@ -157,7 +157,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self.client: AsyncQdrantClient = None
         self.cache = {}
         self.inference_api = inference_api
-        self.vector_db_store = None
+        self.vector_store_table = None
         self._qdrant_lock = asyncio.Lock()
 
     async def initialize(self) -> None:
@@ -167,12 +167,14 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
 
         start_key = VECTOR_DBS_PREFIX
         end_key = f"{VECTOR_DBS_PREFIX}\xff"
-        stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
+        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
 
-        for vector_db_data in stored_vector_dbs:
-            vector_db = VectorDB.model_validate_json(vector_db_data)
-            index = VectorDBWithIndex(vector_db, QdrantIndex(self.client, vector_db.identifier), self.inference_api)
-            self.cache[vector_db.identifier] = index
+        for vector_store_data in stored_vector_stores:
+            vector_store = VectorStore.model_validate_json(vector_store_data)
+            index = VectorStoreWithIndex(
+                vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api
+            )
+            self.cache[vector_store.identifier] = index
         self.openai_vector_stores = await self._load_openai_vector_stores()
 
     async def shutdown(self) -> None:
@@ -180,46 +182,48 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         # Clean up mixin resources (file batch tasks)
         await super().shutdown()
 
-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
         assert self.kvstore is not None
-        key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
-        await self.kvstore.set(key=key, value=vector_db.model_dump_json())
+        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
+        await self.kvstore.set(key=key, value=vector_store.model_dump_json())
 
-        index = VectorDBWithIndex(
-            vector_db=vector_db, index=QdrantIndex(self.client, vector_db.identifier), inference_api=self.inference_api
-        )
-
-        self.cache[vector_db.identifier] = index
-
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        if vector_db_id in self.cache:
-            await self.cache[vector_db_id].index.delete()
-            del self.cache[vector_db_id]
-
-        assert self.kvstore is not None
-        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
-
-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
-
-        if self.vector_db_store is None:
-            raise ValueError(f"Vector DB not found {vector_db_id}")
-
-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise VectorStoreNotFoundError(vector_db_id)
-
-        index = VectorDBWithIndex(
-            vector_db=vector_db,
-            index=QdrantIndex(client=self.client, collection_name=vector_db.identifier),
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=QdrantIndex(self.client, vector_store.identifier),
             inference_api=self.inference_api,
         )
-        self.cache[vector_db_id] = index
+
+        self.cache[vector_store.identifier] = index
+
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
+        if vector_store_id in self.cache:
+            await self.cache[vector_store_id].index.delete()
+            del self.cache[vector_store_id]
+
+        assert self.kvstore is not None
+        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
+
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]
+
+        if self.vector_store_table is None:
+            raise ValueError(f"Vector DB not found {vector_store_id}")
+
+        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise VectorStoreNotFoundError(vector_store_id)
+
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),
+            inference_api=self.inference_api,
+        )
+        self.cache[vector_store_id] = index
         return index
 
     async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
 
@@ -228,7 +232,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
     async def query_chunks(
         self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
 
@@ -249,7 +253,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
 
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
         """Delete chunks from a Qdrant vector store."""
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
         if not index:
             raise ValueError(f"Vector DB {store_id} not found")
 
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index 8e7eb7267..66922aa3f 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -16,11 +16,11 @@ from llama_stack.apis.common.content_types import InterleavedContent
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     RERANKER_TYPE_RRF,
     ChunkForDeletion,
     EmbeddingIndex,
-    VectorDBWithIndex,
+    VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
 
@@ -37,7 +37,7 @@ from .config import WeaviateVectorIOConfig
 log = get_logger(name=__name__, category="vector_io::weaviate")
 
 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:weaviate:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:weaviate:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:weaviate:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:weaviate:{VERSION}::"
@@ -257,14 +257,14 @@ class WeaviateIndex(EmbeddingIndex):
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
 
-class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorDBsProtocolPrivate):
+class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate):
     def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
         self.client_cache = {}
         self.cache = {}
-        self.vector_db_store = None
+        self.vector_store_table = None
         self.metadata_collection_name = "openai_vector_stores_metadata"
 
     def _get_client(self) -> weaviate.WeaviateClient:
@@ -300,11 +300,11 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
             end_key = f"{VECTOR_DBS_PREFIX}\xff"
             stored = await self.kvstore.values_in_range(start_key, end_key)
             for raw in stored:
-                vector_db = VectorDB.model_validate_json(raw)
+                vector_store = VectorStore.model_validate_json(raw)
                 client = self._get_client()
-                idx = WeaviateIndex(client=client, collection_name=vector_db.identifier, kvstore=self.kvstore)
-                self.cache[vector_db.identifier] = VectorDBWithIndex(
-                    vector_db=vector_db, index=idx, inference_api=self.inference_api
+                idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore)
+                self.cache[vector_store.identifier] = VectorStoreWithIndex(
+                    vector_store=vector_store, index=idx, inference_api=self.inference_api
                 )
 
             # Load OpenAI vector stores metadata into cache
@@ -316,9 +316,9 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
         # Clean up mixin resources (file batch tasks)
         await super().shutdown()
 
-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
         client = self._get_client()
-        sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True)
+        sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
         # Create collection if it doesn't exist
         if not client.collections.exists(sanitized_collection_name):
             client.collections.create(
@@ -329,45 +329,45 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
                 ],
             )
 
-        self.cache[vector_db.identifier] = VectorDBWithIndex(
-            vector_db, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api
+        self.cache[vector_store.identifier] = VectorStoreWithIndex(
+            vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api
         )
 
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
         client = self._get_client()
-        sanitized_collection_name = sanitize_collection_name(vector_db_id, weaviate_format=True)
-        if vector_db_id not in self.cache or client.collections.exists(sanitized_collection_name) is False:
+        sanitized_collection_name = sanitize_collection_name(vector_store_id, weaviate_format=True)
+        if vector_store_id not in self.cache or client.collections.exists(sanitized_collection_name) is False:
             return
         client.collections.delete(sanitized_collection_name)
-        await self.cache[vector_db_id].index.delete()
-        del self.cache[vector_db_id]
+        await self.cache[vector_store_id].index.delete()
+        del self.cache[vector_store_id]
 
-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]
 
-        if self.vector_db_store is None:
-            raise VectorStoreNotFoundError(vector_db_id)
+        if self.vector_store_table is None:
+            raise VectorStoreNotFoundError(vector_store_id)
 
-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise VectorStoreNotFoundError(vector_db_id)
+        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise VectorStoreNotFoundError(vector_store_id)
 
         client = self._get_client()
-        sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True)
+        sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
         if not client.collections.exists(sanitized_collection_name):
             raise ValueError(f"Collection with name `{sanitized_collection_name}` not found")
 
-        index = VectorDBWithIndex(
-            vector_db=vector_db,
-            index=WeaviateIndex(client=client, collection_name=vector_db.identifier),
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=WeaviateIndex(client=client, collection_name=vector_store.identifier),
             inference_api=self.inference_api,
         )
-        self.cache[vector_db_id] = index
+        self.cache[vector_store_id] = index
         return index
 
     async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
 
@@ -376,14 +376,14 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
     async def query_chunks(
         self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
 
         return await index.query_chunks(query, params)
 
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
         if not index:
             raise ValueError(f"Vector DB {store_id} not found")
 
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 7806d98c1..6629fb965 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -17,7 +17,6 @@ from pydantic import TypeAdapter
 
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files, OpenAIFileObject
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
@@ -43,6 +42,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreSearchResponse,
     VectorStoreSearchResponsePage,
 )
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.core.id_generation import generate_object_id
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore.api import KVStore
@@ -63,7 +63,7 @@ MAX_CONCURRENT_FILES_PER_BATCH = 3  # Maximum concurrent file processing within
 FILE_BATCH_CHUNK_SIZE = 10  # Process files in chunks of this size
 
 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::"
@@ -321,12 +321,12 @@ class OpenAIVectorStoreMixin(ABC):
         pass
 
     @abstractmethod
-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
         """Register a vector database (provider-specific implementation)."""
         pass
 
     @abstractmethod
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
         """Unregister a vector database (provider-specific implementation)."""
         pass
 
@@ -358,7 +358,7 @@ class OpenAIVectorStoreMixin(ABC):
         extra_body = params.model_extra or {}
         metadata = params.metadata or {}
 
-        provider_vector_db_id = extra_body.get("provider_vector_db_id")
+        provider_vector_store_id = extra_body.get("provider_vector_store_id")
 
         # Use embedding info from metadata if available, otherwise from extra_body
         if metadata.get("embedding_model"):
@@ -389,8 +389,8 @@ class OpenAIVectorStoreMixin(ABC):
 
         # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config
         provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None)
-        # Derive the canonical vector_db_id (allow override, else generate)
-        vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}")
+        # Derive the canonical vector_store_id (allow override, else generate)
+        vector_store_id = provider_vector_store_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}")
 
         if embedding_model is None:
             raise ValueError("embedding_model is required")
@@ -398,19 +398,20 @@ class OpenAIVectorStoreMixin(ABC):
         if embedding_dimension is None:
             raise ValueError("Embedding dimension is required")
 
-        # Register the VectorDB backing this vector store
+        # Register the VectorStore backing this vector store
         if provider_id is None:
             raise ValueError("Provider ID is required but was not provided")
 
-        vector_db = VectorDB(
-            identifier=vector_db_id,
+        # call to the provider to create any index, etc.
+        vector_store = VectorStore(
+            identifier=vector_store_id,
             embedding_dimension=embedding_dimension,
             embedding_model=embedding_model,
             provider_id=provider_id,
-            provider_resource_id=vector_db_id,
-            vector_db_name=params.name,
+            provider_resource_id=vector_store_id,
+            vector_store_name=params.name,
         )
-        await self.register_vector_db(vector_db)
+        await self.register_vector_store(vector_store)
 
         # Create OpenAI vector store metadata
         status = "completed"
@@ -424,7 +425,7 @@ class OpenAIVectorStoreMixin(ABC):
             total=0,
         )
         store_info: dict[str, Any] = {
-            "id": vector_db_id,
+            "id": vector_store_id,
             "object": "vector_store",
             "created_at": created_at,
             "name": params.name,
@@ -441,23 +442,23 @@ class OpenAIVectorStoreMixin(ABC):
         # Add provider information to metadata if provided
         if provider_id:
             metadata["provider_id"] = provider_id
-        if provider_vector_db_id:
-            metadata["provider_vector_db_id"] = provider_vector_db_id
+        if provider_vector_store_id:
+            metadata["provider_vector_store_id"] = provider_vector_store_id
         store_info["metadata"] = metadata
 
         # Save to persistent storage (provider-specific)
-        await self._save_openai_vector_store(vector_db_id, store_info)
+        await self._save_openai_vector_store(vector_store_id, store_info)
 
         # Store in memory cache
-        self.openai_vector_stores[vector_db_id] = store_info
+        self.openai_vector_stores[vector_store_id] = store_info
 
         # Now that our vector store is created, attach any files that were provided
         file_ids = params.file_ids or []
-        tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids]
+        tasks = [self.openai_attach_file_to_vector_store(vector_store_id, file_id) for file_id in file_ids]
         await asyncio.gather(*tasks)
 
         # Get the updated store info and return it
-        store_info = self.openai_vector_stores[vector_db_id]
+        store_info = self.openai_vector_stores[vector_store_id]
         return VectorStoreObject.model_validate(store_info)
 
     async def openai_list_vector_stores(
@@ -567,7 +568,7 @@ class OpenAIVectorStoreMixin(ABC):
 
         # Also delete the underlying vector DB
         try:
-            await self.unregister_vector_db(vector_store_id)
+            await self.unregister_vector_store(vector_store_id)
         except Exception as e:
             logger.warning(f"Failed to delete underlying vector DB {vector_store_id}: {e}")
 
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index 0375ecaaa..9e9c9a08a 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -12,19 +12,16 @@ from dataclasses import dataclass
 from typing import Any
 from urllib.parse import unquote
 
-import httpx
 import numpy as np
 from numpy.typing import NDArray
 from pydantic import BaseModel
 
 from llama_stack.apis.common.content_types import (
-    URL,
     InterleavedContent,
 )
 from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody
-from llama_stack.apis.tools import RAGDocument
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.datatypes import Api
@@ -129,31 +126,6 @@ def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, en
         return ""
 
 
-async def content_from_doc(doc: RAGDocument) -> str:
-    if isinstance(doc.content, URL):
-        if doc.content.uri.startswith("data:"):
-            return content_from_data(doc.content.uri)
-        async with httpx.AsyncClient() as client:
-            r = await client.get(doc.content.uri)
-        if doc.mime_type == "application/pdf":
-            return parse_pdf(r.content)
-        return r.text
-    elif isinstance(doc.content, str):
-        pattern = re.compile("^(https?://|file://|data:)")
-        if pattern.match(doc.content):
-            if doc.content.startswith("data:"):
-                return content_from_data(doc.content)
-            async with httpx.AsyncClient() as client:
-                r = await client.get(doc.content)
-            if doc.mime_type == "application/pdf":
-                return parse_pdf(r.content)
-            return r.text
-        return doc.content
-    else:
-        # will raise ValueError if the content is not List[InterleavedContent] or InterleavedContent
-        return interleaved_content_as_str(doc.content)
-
-
 def make_overlapped_chunks(
     document_id: str, text: str, window_len: int, overlap_len: int, metadata: dict[str, Any]
 ) -> list[Chunk]:
@@ -187,7 +159,7 @@ def make_overlapped_chunks(
             updated_timestamp=int(time.time()),
             chunk_window=chunk_window,
             chunk_tokenizer=default_tokenizer,
-            chunk_embedding_model=None,  # This will be set in `VectorDBWithIndex.insert_chunks`
+            chunk_embedding_model=None,  # This will be set in `VectorStoreWithIndex.insert_chunks`
             content_token_count=len(toks),
             metadata_token_count=len(metadata_tokens),
         )
@@ -255,8 +227,8 @@ class EmbeddingIndex(ABC):
 
 
 @dataclass
-class VectorDBWithIndex:
-    vector_db: VectorDB
+class VectorStoreWithIndex:
+    vector_store: VectorStore
     index: EmbeddingIndex
     inference_api: Api.inference
 
@@ -269,14 +241,14 @@ class VectorDBWithIndex:
             if c.embedding is None:
                 chunks_to_embed.append(c)
                 if c.chunk_metadata:
-                    c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model
-                    c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension
+                    c.chunk_metadata.chunk_embedding_model = self.vector_store.embedding_model
+                    c.chunk_metadata.chunk_embedding_dimension = self.vector_store.embedding_dimension
             else:
-                _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
+                _validate_embedding(c.embedding, i, self.vector_store.embedding_dimension)
 
         if chunks_to_embed:
             params = OpenAIEmbeddingsRequestWithExtraBody(
-                model=self.vector_db.embedding_model,
+                model=self.vector_store.embedding_model,
                 input=[c.content for c in chunks_to_embed],
             )
             resp = await self.inference_api.openai_embeddings(params)
@@ -319,7 +291,7 @@ class VectorDBWithIndex:
             return await self.index.query_keyword(query_string, k, score_threshold)
 
         params = OpenAIEmbeddingsRequestWithExtraBody(
-            model=self.vector_db.embedding_model,
+            model=self.vector_store.embedding_model,
             input=[query_string],
         )
         embeddings_response = await self.inference_api.openai_embeddings(params)
diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json
index c138de535..9b22dd8d5 100644
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@@ -2072,9 +2072,9 @@
       }
     },
     "node_modules/@jridgewell/sourcemap-codec": {
-      "version": "1.5.0",
-      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz",
-      "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==",
+      "version": "1.5.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz",
+      "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==",
       "dev": true,
       "license": "MIT"
     },
@@ -3199,61 +3199,54 @@
       }
     },
     "node_modules/@tailwindcss/node": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.6.tgz",
-      "integrity": "sha512-ed6zQbgmKsjsVvodAS1q1Ld2BolEuxJOSyyNc+vhkjdmfNUDCmQnlXBfQkHrlzNmslxHsQU/bFmzcEbv4xXsLg==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.14.tgz",
+      "integrity": "sha512-hpz+8vFk3Ic2xssIA3e01R6jkmsAhvkQdXlEbRTk6S10xDAtiQiM3FyvZVGsucefq764euO/b8WUW9ysLdThHw==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "@ampproject/remapping": "^2.3.0",
-        "enhanced-resolve": "^5.18.1",
-        "jiti": "^2.4.2",
-        "lightningcss": "1.29.2",
-        "magic-string": "^0.30.17",
+        "@jridgewell/remapping": "^2.3.4",
+        "enhanced-resolve": "^5.18.3",
+        "jiti": "^2.6.0",
+        "lightningcss": "1.30.1",
+        "magic-string": "^0.30.19",
         "source-map-js": "^1.2.1",
-        "tailwindcss": "4.1.6"
+        "tailwindcss": "4.1.14"
       }
     },
-    "node_modules/@tailwindcss/node/node_modules/tailwindcss": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
-      "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
-      "dev": true,
-      "license": "MIT"
-    },
     "node_modules/@tailwindcss/oxide": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz",
-      "integrity": "sha512-0bpEBQiGx+227fW4G0fLQ8vuvyy5rsB1YIYNapTq3aRsJ9taF3f5cCaovDjN5pUGKKzcpMrZst/mhNaKAPOHOA==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.14.tgz",
+      "integrity": "sha512-23yx+VUbBwCg2x5XWdB8+1lkPajzLmALEfMb51zZUBYaYVPDQvBSD/WYDqiVyBIo2BZFa3yw1Rpy3G2Jp+K0dw==",
       "dev": true,
       "hasInstallScript": true,
       "license": "MIT",
       "dependencies": {
         "detect-libc": "^2.0.4",
-        "tar": "^7.4.3"
+        "tar": "^7.5.1"
       },
       "engines": {
         "node": ">= 10"
       },
       "optionalDependencies": {
-        "@tailwindcss/oxide-android-arm64": "4.1.6",
-        "@tailwindcss/oxide-darwin-arm64": "4.1.6",
-        "@tailwindcss/oxide-darwin-x64": "4.1.6",
-        "@tailwindcss/oxide-freebsd-x64": "4.1.6",
-        "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.6",
-        "@tailwindcss/oxide-linux-arm64-gnu": "4.1.6",
-        "@tailwindcss/oxide-linux-arm64-musl": "4.1.6",
-        "@tailwindcss/oxide-linux-x64-gnu": "4.1.6",
-        "@tailwindcss/oxide-linux-x64-musl": "4.1.6",
-        "@tailwindcss/oxide-wasm32-wasi": "4.1.6",
-        "@tailwindcss/oxide-win32-arm64-msvc": "4.1.6",
-        "@tailwindcss/oxide-win32-x64-msvc": "4.1.6"
+        "@tailwindcss/oxide-android-arm64": "4.1.14",
+        "@tailwindcss/oxide-darwin-arm64": "4.1.14",
+        "@tailwindcss/oxide-darwin-x64": "4.1.14",
+        "@tailwindcss/oxide-freebsd-x64": "4.1.14",
+        "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.14",
+        "@tailwindcss/oxide-linux-arm64-gnu": "4.1.14",
+        "@tailwindcss/oxide-linux-arm64-musl": "4.1.14",
+        "@tailwindcss/oxide-linux-x64-gnu": "4.1.14",
+        "@tailwindcss/oxide-linux-x64-musl": "4.1.14",
+        "@tailwindcss/oxide-wasm32-wasi": "4.1.14",
+        "@tailwindcss/oxide-win32-arm64-msvc": "4.1.14",
+        "@tailwindcss/oxide-win32-x64-msvc": "4.1.14"
       }
     },
     "node_modules/@tailwindcss/oxide-android-arm64": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.6.tgz",
-      "integrity": "sha512-VHwwPiwXtdIvOvqT/0/FLH/pizTVu78FOnI9jQo64kSAikFSZT7K4pjyzoDpSMaveJTGyAKvDjuhxJxKfmvjiQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.14.tgz",
+      "integrity": "sha512-a94ifZrGwMvbdeAxWoSuGcIl6/DOP5cdxagid7xJv6bwFp3oebp7y2ImYsnZBMTwjn5Ev5xESvS3FFYUGgPODQ==",
       "cpu": [
         "arm64"
       ],
@@ -3268,9 +3261,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-darwin-arm64": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.6.tgz",
-      "integrity": "sha512-weINOCcqv1HVBIGptNrk7c6lWgSFFiQMcCpKM4tnVi5x8OY2v1FrV76jwLukfT6pL1hyajc06tyVmZFYXoxvhQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.14.tgz",
+      "integrity": "sha512-HkFP/CqfSh09xCnrPJA7jud7hij5ahKyWomrC3oiO2U9i0UjP17o9pJbxUN0IJ471GTQQmzwhp0DEcpbp4MZTA==",
       "cpu": [
         "arm64"
       ],
@@ -3285,9 +3278,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-darwin-x64": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.6.tgz",
-      "integrity": "sha512-3FzekhHG0ww1zQjQ1lPoq0wPrAIVXAbUkWdWM8u5BnYFZgb9ja5ejBqyTgjpo5mfy0hFOoMnMuVDI+7CXhXZaQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.14.tgz",
+      "integrity": "sha512-eVNaWmCgdLf5iv6Qd3s7JI5SEFBFRtfm6W0mphJYXgvnDEAZ5sZzqmI06bK6xo0IErDHdTA5/t7d4eTfWbWOFw==",
       "cpu": [
         "x64"
       ],
@@ -3302,9 +3295,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-freebsd-x64": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.6.tgz",
-      "integrity": "sha512-4m5F5lpkBZhVQJq53oe5XgJ+aFYWdrgkMwViHjRsES3KEu2m1udR21B1I77RUqie0ZYNscFzY1v9aDssMBZ/1w==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.14.tgz",
+      "integrity": "sha512-QWLoRXNikEuqtNb0dhQN6wsSVVjX6dmUFzuuiL09ZeXju25dsei2uIPl71y2Ic6QbNBsB4scwBoFnlBfabHkEw==",
       "cpu": [
         "x64"
       ],
@@ -3319,9 +3312,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.6.tgz",
-      "integrity": "sha512-qU0rHnA9P/ZoaDKouU1oGPxPWzDKtIfX7eOGi5jOWJKdxieUJdVV+CxWZOpDWlYTd4N3sFQvcnVLJWJ1cLP5TA==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.14.tgz",
+      "integrity": "sha512-VB4gjQni9+F0VCASU+L8zSIyjrLLsy03sjcR3bM0V2g4SNamo0FakZFKyUQ96ZVwGK4CaJsc9zd/obQy74o0Fw==",
       "cpu": [
         "arm"
       ],
@@ -3336,9 +3329,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-linux-arm64-gnu": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.6.tgz",
-      "integrity": "sha512-jXy3TSTrbfgyd3UxPQeXC3wm8DAgmigzar99Km9Sf6L2OFfn/k+u3VqmpgHQw5QNfCpPe43em6Q7V76Wx7ogIQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.14.tgz",
+      "integrity": "sha512-qaEy0dIZ6d9vyLnmeg24yzA8XuEAD9WjpM5nIM1sUgQ/Zv7cVkharPDQcmm/t/TvXoKo/0knI3me3AGfdx6w1w==",
       "cpu": [
         "arm64"
       ],
@@ -3353,9 +3346,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-linux-arm64-musl": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.6.tgz",
-      "integrity": "sha512-8kjivE5xW0qAQ9HX9reVFmZj3t+VmljDLVRJpVBEoTR+3bKMnvC7iLcoSGNIUJGOZy1mLVq7x/gerVg0T+IsYw==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.14.tgz",
+      "integrity": "sha512-ISZjT44s59O8xKsPEIesiIydMG/sCXoMBCqsphDm/WcbnuWLxxb+GcvSIIA5NjUw6F8Tex7s5/LM2yDy8RqYBQ==",
       "cpu": [
         "arm64"
       ],
@@ -3370,9 +3363,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-linux-x64-gnu": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.6.tgz",
-      "integrity": "sha512-A4spQhwnWVpjWDLXnOW9PSinO2PTKJQNRmL/aIl2U/O+RARls8doDfs6R41+DAXK0ccacvRyDpR46aVQJJCoCg==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.14.tgz",
+      "integrity": "sha512-02c6JhLPJj10L2caH4U0zF8Hji4dOeahmuMl23stk0MU1wfd1OraE7rOloidSF8W5JTHkFdVo/O7uRUJJnUAJg==",
       "cpu": [
         "x64"
       ],
@@ -3387,9 +3380,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-linux-x64-musl": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.6.tgz",
-      "integrity": "sha512-YRee+6ZqdzgiQAHVSLfl3RYmqeeaWVCk796MhXhLQu2kJu2COHBkqlqsqKYx3p8Hmk5pGCQd2jTAoMWWFeyG2A==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.14.tgz",
+      "integrity": "sha512-TNGeLiN1XS66kQhxHG/7wMeQDOoL0S33x9BgmydbrWAb9Qw0KYdd8o1ifx4HOGDWhVmJ+Ul+JQ7lyknQFilO3Q==",
       "cpu": [
         "x64"
       ],
@@ -3404,9 +3397,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-wasm32-wasi": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.6.tgz",
-      "integrity": "sha512-qAp4ooTYrBQ5pk5jgg54/U1rCJ/9FLYOkkQ/nTE+bVMseMfB6O7J8zb19YTpWuu4UdfRf5zzOrNKfl6T64MNrQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.14.tgz",
+      "integrity": "sha512-uZYAsaW/jS/IYkd6EWPJKW/NlPNSkWkBlaeVBi/WsFQNP05/bzkebUL8FH1pdsqx4f2fH/bWFcUABOM9nfiJkQ==",
       "bundleDependencies": [
         "@napi-rs/wasm-runtime",
         "@emnapi/core",
@@ -3422,21 +3415,81 @@
       "license": "MIT",
       "optional": true,
       "dependencies": {
-        "@emnapi/core": "^1.4.3",
-        "@emnapi/runtime": "^1.4.3",
-        "@emnapi/wasi-threads": "^1.0.2",
-        "@napi-rs/wasm-runtime": "^0.2.9",
-        "@tybys/wasm-util": "^0.9.0",
-        "tslib": "^2.8.0"
+        "@emnapi/core": "^1.5.0",
+        "@emnapi/runtime": "^1.5.0",
+        "@emnapi/wasi-threads": "^1.1.0",
+        "@napi-rs/wasm-runtime": "^1.0.5",
+        "@tybys/wasm-util": "^0.10.1",
+        "tslib": "^2.4.0"
       },
       "engines": {
         "node": ">=14.0.0"
       }
     },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
+      "version": "1.5.0",
+      "dev": true,
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/wasi-threads": "1.1.0",
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
+      "version": "1.5.0",
+      "dev": true,
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
+      "version": "1.1.0",
+      "dev": true,
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
+      "version": "1.0.5",
+      "dev": true,
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/core": "^1.5.0",
+        "@emnapi/runtime": "^1.5.0",
+        "@tybys/wasm-util": "^0.10.1"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
+      "version": "0.10.1",
+      "dev": true,
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
+      "version": "2.8.1",
+      "dev": true,
+      "inBundle": true,
+      "license": "0BSD",
+      "optional": true
+    },
     "node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.6.tgz",
-      "integrity": "sha512-nqpDWk0Xr8ELO/nfRUDjk1pc9wDJ3ObeDdNMHLaymc4PJBWj11gdPCWZFKSK2AVKjJQC7J2EfmSmf47GN7OuLg==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.14.tgz",
+      "integrity": "sha512-Az0RnnkcvRqsuoLH2Z4n3JfAef0wElgzHD5Aky/e+0tBUxUhIeIqFBTMNQvmMRSP15fWwmvjBxZ3Q8RhsDnxAA==",
       "cpu": [
         "arm64"
       ],
@@ -3451,9 +3504,9 @@
       }
     },
     "node_modules/@tailwindcss/oxide-win32-x64-msvc": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.6.tgz",
-      "integrity": "sha512-5k9xF33xkfKpo9wCvYcegQ21VwIBU1/qEbYlVukfEIyQbEA47uK8AAwS7NVjNE3vHzcmxMYwd0l6L4pPjjm1rQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.14.tgz",
+      "integrity": "sha512-ttblVGHgf68kEE4om1n/n44I0yGPkCPbLsqzjvybhpwa6mKKtgFfAzy6btc3HRmuW7nHe0OOrSeNP9sQmmH9XA==",
       "cpu": [
         "x64"
       ],
@@ -3468,26 +3521,19 @@
       }
     },
     "node_modules/@tailwindcss/postcss": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.6.tgz",
-      "integrity": "sha512-ELq+gDMBuRXPJlpE3PEen+1MhnHAQQrh2zF0dI1NXOlEWfr2qWf2CQdr5jl9yANv8RErQaQ2l6nIFO9OSCVq/g==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.14.tgz",
+      "integrity": "sha512-BdMjIxy7HUNThK87C7BC8I1rE8BVUsfNQSI5siQ4JK3iIa3w0XyVvVL9SXLWO//CtYTcp1v7zci0fYwJOjB+Zg==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
         "@alloc/quick-lru": "^5.2.0",
-        "@tailwindcss/node": "4.1.6",
-        "@tailwindcss/oxide": "4.1.6",
+        "@tailwindcss/node": "4.1.14",
+        "@tailwindcss/oxide": "4.1.14",
         "postcss": "^8.4.41",
-        "tailwindcss": "4.1.6"
+        "tailwindcss": "4.1.14"
       }
     },
-    "node_modules/@tailwindcss/postcss/node_modules/tailwindcss": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
-      "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
-      "dev": true,
-      "license": "MIT"
-    },
     "node_modules/@testing-library/dom": {
       "version": "10.4.1",
       "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
@@ -3812,12 +3858,12 @@
       "license": "MIT"
     },
     "node_modules/@types/node": {
-      "version": "24.3.0",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.0.tgz",
-      "integrity": "sha512-aPTXCrfwnDLj4VvXrm+UUCQjNEvJgNA8s5F1cvwQU+3KNltTOkBm1j30uNLyqqPNe7gE3KFzImYoZEfLhp4Yow==",
+      "version": "24.8.1",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.8.1.tgz",
+      "integrity": "sha512-alv65KGRadQVfVcG69MuB4IzdYVpRwMG/mq8KWOaoOdyY617P5ivaDiMCGOFDWD2sAn5Q0mR3mRtUOgm99hL9Q==",
       "license": "MIT",
       "dependencies": {
-        "undici-types": "~7.10.0"
+        "undici-types": "~7.14.0"
       }
     },
     "node_modules/@types/node-fetch": {
@@ -5850,9 +5896,9 @@
       "license": "MIT"
     },
     "node_modules/enhanced-resolve": {
-      "version": "5.18.1",
-      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.1.tgz",
-      "integrity": "sha512-ZSW3ma5GkcQBIpwZTSRAI8N71Uuwgs93IezB7mf7R60tC8ZbJideoDNKjHn2O9KIlx6rkGTTEk1xUCK2E1Y2Yg==",
+      "version": "5.18.3",
+      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.3.tgz",
+      "integrity": "sha512-d4lC8xfavMeBjzGr2vECC3fsGXziXZQyJxD868h2M/mBI3PwAuODxAkLkq5HYuvrPYcUtiLzsTo8U3PgX3Ocww==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -9128,9 +9174,9 @@
       }
     },
     "node_modules/jiti": {
-      "version": "2.4.2",
-      "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.4.2.tgz",
-      "integrity": "sha512-rg9zJN+G4n2nfJl5MW3BMygZX56zKPNVEYYqq7adpmMh4Jn2QNEwhvQlFy6jPVdcod7txZtKHWnyZiA3a0zP7A==",
+      "version": "2.6.1",
+      "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz",
+      "integrity": "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==",
       "dev": true,
       "license": "MIT",
       "bin": {
@@ -9368,9 +9414,9 @@
       }
     },
     "node_modules/lightningcss": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.29.2.tgz",
-      "integrity": "sha512-6b6gd/RUXKaw5keVdSEtqFVdzWnU5jMxTUjA2bVcMNPLwSQ08Sv/UodBVtETLCn7k4S1Ibxwh7k68IwLZPgKaA==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.30.1.tgz",
+      "integrity": "sha512-xi6IyHML+c9+Q3W0S4fCQJOym42pyurFiJUHEcEyHS0CeKzia4yZDEsLlqOFykxOdHpNy0NmvVO31vcSqAxJCg==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
@@ -9384,22 +9430,22 @@
         "url": "https://opencollective.com/parcel"
       },
       "optionalDependencies": {
-        "lightningcss-darwin-arm64": "1.29.2",
-        "lightningcss-darwin-x64": "1.29.2",
-        "lightningcss-freebsd-x64": "1.29.2",
-        "lightningcss-linux-arm-gnueabihf": "1.29.2",
-        "lightningcss-linux-arm64-gnu": "1.29.2",
-        "lightningcss-linux-arm64-musl": "1.29.2",
-        "lightningcss-linux-x64-gnu": "1.29.2",
-        "lightningcss-linux-x64-musl": "1.29.2",
-        "lightningcss-win32-arm64-msvc": "1.29.2",
-        "lightningcss-win32-x64-msvc": "1.29.2"
+        "lightningcss-darwin-arm64": "1.30.1",
+        "lightningcss-darwin-x64": "1.30.1",
+        "lightningcss-freebsd-x64": "1.30.1",
+        "lightningcss-linux-arm-gnueabihf": "1.30.1",
+        "lightningcss-linux-arm64-gnu": "1.30.1",
+        "lightningcss-linux-arm64-musl": "1.30.1",
+        "lightningcss-linux-x64-gnu": "1.30.1",
+        "lightningcss-linux-x64-musl": "1.30.1",
+        "lightningcss-win32-arm64-msvc": "1.30.1",
+        "lightningcss-win32-x64-msvc": "1.30.1"
       }
     },
     "node_modules/lightningcss-darwin-arm64": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.29.2.tgz",
-      "integrity": "sha512-cK/eMabSViKn/PG8U/a7aCorpeKLMlK0bQeNHmdb7qUnBkNPnL+oV5DjJUo0kqWsJUapZsM4jCfYItbqBDvlcA==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.30.1.tgz",
+      "integrity": "sha512-c8JK7hyE65X1MHMN+Viq9n11RRC7hgin3HhYKhrMyaXflk5GVplZ60IxyoVtzILeKr+xAJwg6zK6sjTBJ0FKYQ==",
       "cpu": [
         "arm64"
       ],
@@ -9418,9 +9464,9 @@
       }
     },
     "node_modules/lightningcss-darwin-x64": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.29.2.tgz",
-      "integrity": "sha512-j5qYxamyQw4kDXX5hnnCKMf3mLlHvG44f24Qyi2965/Ycz829MYqjrVg2H8BidybHBp9kom4D7DR5VqCKDXS0w==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.30.1.tgz",
+      "integrity": "sha512-k1EvjakfumAQoTfcXUcHQZhSpLlkAuEkdMBsI/ivWw9hL+7FtilQc0Cy3hrx0AAQrVtQAbMI7YjCgYgvn37PzA==",
       "cpu": [
         "x64"
       ],
@@ -9439,9 +9485,9 @@
       }
     },
     "node_modules/lightningcss-freebsd-x64": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.29.2.tgz",
-      "integrity": "sha512-wDk7M2tM78Ii8ek9YjnY8MjV5f5JN2qNVO+/0BAGZRvXKtQrBC4/cn4ssQIpKIPP44YXw6gFdpUF+Ps+RGsCwg==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.30.1.tgz",
+      "integrity": "sha512-kmW6UGCGg2PcyUE59K5r0kWfKPAVy4SltVeut+umLCFoJ53RdCUWxcRDzO1eTaxf/7Q2H7LTquFHPL5R+Gjyig==",
       "cpu": [
         "x64"
       ],
@@ -9460,9 +9506,9 @@
       }
     },
     "node_modules/lightningcss-linux-arm-gnueabihf": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.29.2.tgz",
-      "integrity": "sha512-IRUrOrAF2Z+KExdExe3Rz7NSTuuJ2HvCGlMKoquK5pjvo2JY4Rybr+NrKnq0U0hZnx5AnGsuFHjGnNT14w26sg==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.30.1.tgz",
+      "integrity": "sha512-MjxUShl1v8pit+6D/zSPq9S9dQ2NPFSQwGvxBCYaBYLPlCWuPh9/t1MRS8iUaR8i+a6w7aps+B4N0S1TYP/R+Q==",
       "cpu": [
         "arm"
       ],
@@ -9481,9 +9527,9 @@
       }
     },
     "node_modules/lightningcss-linux-arm64-gnu": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.29.2.tgz",
-      "integrity": "sha512-KKCpOlmhdjvUTX/mBuaKemp0oeDIBBLFiU5Fnqxh1/DZ4JPZi4evEH7TKoSBFOSOV3J7iEmmBaw/8dpiUvRKlQ==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.30.1.tgz",
+      "integrity": "sha512-gB72maP8rmrKsnKYy8XUuXi/4OctJiuQjcuqWNlJQ6jZiWqtPvqFziskH3hnajfvKB27ynbVCucKSm2rkQp4Bw==",
       "cpu": [
         "arm64"
       ],
@@ -9502,9 +9548,9 @@
       }
     },
     "node_modules/lightningcss-linux-arm64-musl": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.29.2.tgz",
-      "integrity": "sha512-Q64eM1bPlOOUgxFmoPUefqzY1yV3ctFPE6d/Vt7WzLW4rKTv7MyYNky+FWxRpLkNASTnKQUaiMJ87zNODIrrKQ==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.30.1.tgz",
+      "integrity": "sha512-jmUQVx4331m6LIX+0wUhBbmMX7TCfjF5FoOH6SD1CttzuYlGNVpA7QnrmLxrsub43ClTINfGSYyHe2HWeLl5CQ==",
       "cpu": [
         "arm64"
       ],
@@ -9523,9 +9569,9 @@
       }
     },
     "node_modules/lightningcss-linux-x64-gnu": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.29.2.tgz",
-      "integrity": "sha512-0v6idDCPG6epLXtBH/RPkHvYx74CVziHo6TMYga8O2EiQApnUPZsbR9nFNrg2cgBzk1AYqEd95TlrsL7nYABQg==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.30.1.tgz",
+      "integrity": "sha512-piWx3z4wN8J8z3+O5kO74+yr6ze/dKmPnI7vLqfSqI8bccaTGY5xiSGVIJBDd5K5BHlvVLpUB3S2YCfelyJ1bw==",
       "cpu": [
         "x64"
       ],
@@ -9544,9 +9590,9 @@
       }
     },
     "node_modules/lightningcss-linux-x64-musl": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.29.2.tgz",
-      "integrity": "sha512-rMpz2yawkgGT8RULc5S4WiZopVMOFWjiItBT7aSfDX4NQav6M44rhn5hjtkKzB+wMTRlLLqxkeYEtQ3dd9696w==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.30.1.tgz",
+      "integrity": "sha512-rRomAK7eIkL+tHY0YPxbc5Dra2gXlI63HL+v1Pdi1a3sC+tJTcFrHX+E86sulgAXeI7rSzDYhPSeHHjqFhqfeQ==",
       "cpu": [
         "x64"
       ],
@@ -9565,9 +9611,9 @@
       }
     },
     "node_modules/lightningcss-win32-arm64-msvc": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.29.2.tgz",
-      "integrity": "sha512-nL7zRW6evGQqYVu/bKGK+zShyz8OVzsCotFgc7judbt6wnB2KbiKKJwBE4SGoDBQ1O94RjW4asrCjQL4i8Fhbw==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.30.1.tgz",
+      "integrity": "sha512-mSL4rqPi4iXq5YVqzSsJgMVFENoa4nGTT/GjO2c0Yl9OuQfPsIfncvLrEW6RbbB24WtZ3xP/2CCmI3tNkNV4oA==",
       "cpu": [
         "arm64"
       ],
@@ -9586,9 +9632,9 @@
       }
     },
     "node_modules/lightningcss-win32-x64-msvc": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.29.2.tgz",
-      "integrity": "sha512-EdIUW3B2vLuHmv7urfzMI/h2fmlnOQBk1xlsDxkN1tCWKjNFjfLhGxYk8C8mzpSfr+A6jFFIi8fU6LbQGsRWjA==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.30.1.tgz",
+      "integrity": "sha512-PVqXh48wh4T53F/1CCu8PIPCxLzWyCnn/9T5W1Jpmdy5h9Cwd+0YQS6/LwhHXSafuc61/xg9Lv5OrCby6a++jg==",
       "cpu": [
         "x64"
       ],
@@ -9726,13 +9772,13 @@
       }
     },
     "node_modules/magic-string": {
-      "version": "0.30.17",
-      "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.17.tgz",
-      "integrity": "sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==",
+      "version": "0.30.19",
+      "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.19.tgz",
+      "integrity": "sha512-2N21sPY9Ws53PZvsEpVtNuSW+ScYbQdp4b9qUaL+9QkHUrGFKo56Lg9Emg5s9V/qrtNBmiR01sYhUOwu3H+VOw==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "@jridgewell/sourcemap-codec": "^1.5.0"
+        "@jridgewell/sourcemap-codec": "^1.5.5"
       }
     },
     "node_modules/make-dir": {
@@ -10717,9 +10763,9 @@
       }
     },
     "node_modules/minizlib": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.2.tgz",
-      "integrity": "sha512-oG62iEk+CYt5Xj2YqI5Xi9xWUeZhDI8jjQmC5oThVH5JGCTgIjr7ciJDzC7MBzYd//WvR1OTmP5Q38Q8ShQtVA==",
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz",
+      "integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -10729,22 +10775,6 @@
         "node": ">= 18"
       }
     },
-    "node_modules/mkdirp": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
-      "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
-      "dev": true,
-      "license": "MIT",
-      "bin": {
-        "mkdirp": "dist/cjs/src/bin.js"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
     "node_modules/motion-dom": {
       "version": "12.23.23",
       "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.23.23.tgz",
@@ -12989,34 +13019,37 @@
       }
     },
     "node_modules/tailwindcss": {
-      "version": "4.1.13",
-      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.13.tgz",
-      "integrity": "sha512-i+zidfmTqtwquj4hMEwdjshYYgMbOrPzb9a0M3ZgNa0JMoZeFC6bxZvO8yr8ozS6ix2SDz0+mvryPeBs2TFE+w==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.14.tgz",
+      "integrity": "sha512-b7pCxjGO98LnxVkKjaZSDeNuljC4ueKUddjENJOADtubtdo8llTaJy7HwBMeLNSSo2N5QIAgklslK1+Ir8r6CA==",
       "dev": true,
       "license": "MIT"
     },
     "node_modules/tapable": {
-      "version": "2.2.1",
-      "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
-      "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==",
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.3.0.tgz",
+      "integrity": "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==",
       "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=6"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/webpack"
       }
     },
     "node_modules/tar": {
-      "version": "7.4.3",
-      "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
-      "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
+      "version": "7.5.1",
+      "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.1.tgz",
+      "integrity": "sha512-nlGpxf+hv0v7GkWBK2V9spgactGOp0qvfWRxUMjqHyzrt3SgwE48DIv/FhqPHJYLHpgW1opq3nERbz5Anq7n1g==",
       "dev": true,
       "license": "ISC",
       "dependencies": {
         "@isaacs/fs-minipass": "^4.0.0",
         "chownr": "^3.0.0",
         "minipass": "^7.1.2",
-        "minizlib": "^3.0.1",
-        "mkdirp": "^3.0.1",
+        "minizlib": "^3.1.0",
         "yallist": "^5.0.0"
       },
       "engines": {
@@ -13418,9 +13451,9 @@
       }
     },
     "node_modules/undici-types": {
-      "version": "7.10.0",
-      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz",
-      "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==",
+      "version": "7.14.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.14.0.tgz",
+      "integrity": "sha512-QQiYxHuyZ9gQUIrmPo3IA+hUl4KYk8uSA7cHrcKd/l3p1OTpZcM0Tbp9x7FAtXdAYhlasd60ncPpgu6ihG6TOA==",
       "license": "MIT"
     },
     "node_modules/unified": {
diff --git a/scripts/docker.sh b/scripts/docker.sh
index 7a5c3e6e0..a0690c8a9 100755
--- a/scripts/docker.sh
+++ b/scripts/docker.sh
@@ -156,6 +156,16 @@ DISTRO=$(echo "$DISTRO" | sed 's/^docker://')
 
 CONTAINER_NAME="llama-stack-test-$DISTRO"
 
+should_copy_source() {
+    if [[ "$USE_COPY_NOT_MOUNT" == "true" ]]; then
+        return 0
+    fi
+    if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
+        return 0
+    fi
+    return 1
+}
+
 # Function to check if container is running
 is_container_running() {
     docker ps --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
@@ -183,20 +193,29 @@ stop_container() {
 build_image() {
     echo "=== Building Docker Image for distribution: $DISTRO ==="
     # Get the repo root (parent of scripts directory)
-    SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-    REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
+    local script_dir
+    script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+    local repo_root
+    repo_root=$(cd "$script_dir/.." && pwd)
 
-    # Determine whether to copy or mount source
-    # Copy in CI or if explicitly requested, otherwise mount for live development
-    BUILD_ENV="LLAMA_STACK_DIR=$REPO_ROOT"
-    if [[ "$USE_COPY_NOT_MOUNT" == "true" ]] || [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
-        echo "Copying source into image (USE_COPY_NOT_MOUNT=true, CI=${CI:-false}, GITHUB_ACTIONS=${GITHUB_ACTIONS:-false})"
-        BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
-    else
-        echo "Will mount source for live development"
+    local containerfile="$repo_root/containers/Containerfile"
+    if [[ ! -f "$containerfile" ]]; then
+        echo "❌ Containerfile not found at $containerfile"
+        exit 1
     fi
 
-    if ! eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"; then
+    local build_cmd=(
+        docker
+        build
+        "$repo_root"
+        -f "$containerfile"
+        --tag "localhost/distribution-$DISTRO:dev"
+        --build-arg "DISTRO_NAME=$DISTRO"
+        --build-arg "INSTALL_MODE=editable"
+        --build-arg "LLAMA_STACK_DIR=/workspace"
+    )
+
+    if ! "${build_cmd[@]}"; then
         echo "❌ Failed to build Docker image"
         exit 1
     fi
@@ -224,7 +243,7 @@ start_container() {
         # Check if image exists (with or without localhost/ prefix)
         if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "distribution-$DISTRO:dev$"; then
             echo "❌ Error: Image distribution-$DISTRO:dev does not exist"
-            echo "Either build it first without --no-rebuild, or run: llama stack build --distro $DISTRO --image-type container"
+            echo "Either build it first without --no-rebuild, or run: docker build . -f containers/Containerfile --build-arg DISTRO_NAME=$DISTRO --tag localhost/distribution-$DISTRO:dev"
             exit 1
         fi
         echo "✅ Found existing image for distribution-$DISTRO:dev"
@@ -236,8 +255,10 @@ start_container() {
     echo "=== Starting Docker Container ==="
 
     # Get the repo root for volume mount
-    SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)
-    REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
+    local script_dir
+    script_dir=$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)
+    local repo_root
+    repo_root=$(cd "$script_dir/.." && pwd)
 
     # Determine the actual image name (may have localhost/ prefix)
     IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
@@ -279,10 +300,18 @@ start_container() {
         NETWORK_MODE="--network host"
     fi
 
+    local source_mount=""
+    if should_copy_source; then
+        echo "Source baked into image (no volume mount)"
+    else
+        source_mount="-v \"$repo_root\":/workspace"
+        echo "Mounting $repo_root into /workspace"
+    fi
+
     docker run -d $NETWORK_MODE --name "$CONTAINER_NAME" \
         -p $PORT:$PORT \
         $DOCKER_ENV_VARS \
-        -v "$REPO_ROOT":/app/llama-stack-source \
+        $source_mount \
         "$IMAGE_NAME" \
         --port $PORT
 
diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index e19a5cc55..93739052b 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -238,6 +238,8 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
         echo "Stopping Docker container..."
         container_name="llama-stack-test-$DISTRO"
         if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then
+            echo "Dumping container logs before stopping..."
+            docker logs "$container_name" > "docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true
             echo "Stopping and removing container: $container_name"
             docker stop "$container_name" 2>/dev/null || true
             docker rm "$container_name" 2>/dev/null || true
@@ -252,19 +254,24 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
     export LLAMA_STACK_PORT=8321
 
     echo "=== Building Docker Image for distribution: $DISTRO ==="
-    # Set LLAMA_STACK_DIR to repo root
-    # USE_COPY_NOT_MOUNT copies files into image (for CI), otherwise mounts for live development
-    BUILD_ENV="LLAMA_STACK_DIR=$ROOT_DIR"
-    if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
-        echo "CI detected (CI=$CI, GITHUB_ACTIONS=$GITHUB_ACTIONS): copying source into image"
-        BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
-    else
-        echo "Local mode: will mount source for live development"
+    containerfile="$ROOT_DIR/containers/Containerfile"
+    if [[ ! -f "$containerfile" ]]; then
+        echo "❌ Containerfile not found at $containerfile"
+        exit 1
     fi
 
-    eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"
+    build_cmd=(
+        docker
+        build
+        "$ROOT_DIR"
+        -f "$containerfile"
+        --tag "localhost/distribution-$DISTRO:dev"
+        --build-arg "DISTRO_NAME=$DISTRO"
+        --build-arg "INSTALL_MODE=editable"
+        --build-arg "LLAMA_STACK_DIR=/workspace"
+    )
 
-    if [ $? -ne 0 ]; then
+    if ! "${build_cmd[@]}"; then
         echo "❌ Failed to build Docker image"
         exit 1
     fi
@@ -304,7 +311,6 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
     docker run -d --network host --name "$container_name" \
         -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
         $DOCKER_ENV_VARS \
-        -v $ROOT_DIR:/app/llama-stack-source \
         "$IMAGE_NAME" \
         --port $LLAMA_STACK_PORT
 
@@ -404,6 +410,21 @@ elif [ $exit_code -eq 5 ]; then
     echo "⚠️ No tests collected (pattern matched no tests)"
 else
     echo "❌ Tests failed"
+    echo ""
+    echo "=== Dumping last 100 lines of logs for debugging ==="
+
+    # Output server or container logs based on stack config
+    if [[ "$STACK_CONFIG" == *"server:"* && -f "server.log" ]]; then
+        echo "--- Last 100 lines of server.log ---"
+        tail -100 server.log
+    elif [[ "$STACK_CONFIG" == *"docker:"* ]]; then
+        docker_log_file="docker-${DISTRO}-${INFERENCE_MODE}.log"
+        if [[ -f "$docker_log_file" ]]; then
+            echo "--- Last 100 lines of $docker_log_file ---"
+            tail -100 "$docker_log_file"
+        fi
+    fi
+
     exit 1
 fi
 
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index a258eb1a0..041d10f10 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -37,6 +37,9 @@ def pytest_sessionstart(session):
     if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ:
         os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"
 
+    if "LLAMA_STACK_LOGGING" not in os.environ:
+        os.environ["LLAMA_STACK_LOGGING"] = "all=warning"
+
     if "SQLITE_STORE_DIR" not in os.environ:
         os.environ["SQLITE_STORE_DIR"] = tempfile.mkdtemp()
 
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index ffd49033d..6a9e1f3b2 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -40,7 +40,7 @@ def is_port_available(port: int, host: str = "localhost") -> bool:
 
 def start_llama_stack_server(config_name: str) -> subprocess.Popen:
     """Start a llama stack server with the given config."""
-    cmd = f"uv run --with llama-stack llama stack build --distro {config_name} --image-type venv --run"
+    cmd = f"uv run llama stack run {config_name}"
     devnull = open(os.devnull, "w")
     process = subprocess.Popen(
         shlex.split(cmd),
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index e5ca7a0db..1f67ddb24 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -49,46 +49,50 @@ def client_with_empty_registry(client_with_models):
 
 
 @vector_provider_wrapper
-def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id):
-    vector_db_name = "test_vector_db"
+def test_vector_store_retrieve(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
+    vector_store_name = "test_vector_store"
     create_response = client_with_empty_registry.vector_stores.create(
-        name=vector_db_name,
+        name=vector_store_name,
         extra_body={
             "provider_id": vector_io_provider_id,
         },
     )
 
-    actual_vector_db_id = create_response.id
+    actual_vector_store_id = create_response.id
 
     # Retrieve the vector store and validate its properties
-    response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_db_id)
+    response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_store_id)
     assert response is not None
-    assert response.id == actual_vector_db_id
-    assert response.name == vector_db_name
+    assert response.id == actual_vector_store_id
+    assert response.name == vector_store_name
     assert response.id.startswith("vs_")
 
 
 @vector_provider_wrapper
-def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id):
-    vector_db_name = "test_vector_db"
+def test_vector_store_register(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
+    vector_store_name = "test_vector_store"
     response = client_with_empty_registry.vector_stores.create(
-        name=vector_db_name,
+        name=vector_store_name,
         extra_body={
             "provider_id": vector_io_provider_id,
         },
     )
 
-    actual_vector_db_id = response.id
-    assert actual_vector_db_id.startswith("vs_")
-    assert actual_vector_db_id != vector_db_name
+    actual_vector_store_id = response.id
+    assert actual_vector_store_id.startswith("vs_")
+    assert actual_vector_store_id != vector_store_name
 
     vector_stores = client_with_empty_registry.vector_stores.list()
     assert len(vector_stores.data) == 1
     vector_store = vector_stores.data[0]
-    assert vector_store.id == actual_vector_db_id
-    assert vector_store.name == vector_db_name
+    assert vector_store.id == actual_vector_store_id
+    assert vector_store.name == vector_store_name
 
-    client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_db_id)
+    client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_store_id)
 
     vector_stores = client_with_empty_registry.vector_stores.list()
     assert len(vector_stores.data) == 0
@@ -108,23 +112,23 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe
 def test_insert_chunks(
     client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case, vector_io_provider_id
 ):
-    vector_db_name = "test_vector_db"
+    vector_store_name = "test_vector_store"
     create_response = client_with_empty_registry.vector_stores.create(
-        name=vector_db_name,
+        name=vector_store_name,
         extra_body={
             "provider_id": vector_io_provider_id,
         },
     )
 
-    actual_vector_db_id = create_response.id
+    actual_vector_store_id = create_response.id
 
     client_with_empty_registry.vector_io.insert(
-        vector_db_id=actual_vector_db_id,
+        vector_db_id=actual_vector_store_id,
         chunks=sample_chunks,
     )
 
     response = client_with_empty_registry.vector_io.query(
-        vector_db_id=actual_vector_db_id,
+        vector_db_id=actual_vector_store_id,
         query="What is the capital of France?",
     )
     assert response is not None
@@ -133,7 +137,7 @@ def test_insert_chunks(
 
     query, expected_doc_id = test_case
     response = client_with_empty_registry.vector_io.query(
-        vector_db_id=actual_vector_db_id,
+        vector_db_id=actual_vector_store_id,
         query=query,
     )
     assert response is not None
@@ -151,15 +155,15 @@ def test_insert_chunks_with_precomputed_embeddings(
         "inline::qdrant": {"score_threshold": -1.0},
         "remote::qdrant": {"score_threshold": -1.0},
     }
-    vector_db_name = "test_precomputed_embeddings_db"
+    vector_store_name = "test_precomputed_embeddings_db"
     register_response = client_with_empty_registry.vector_stores.create(
-        name=vector_db_name,
+        name=vector_store_name,
         extra_body={
             "provider_id": vector_io_provider_id,
         },
     )
 
-    actual_vector_db_id = register_response.id
+    actual_vector_store_id = register_response.id
 
     chunks_with_embeddings = [
         Chunk(
@@ -170,13 +174,13 @@ def test_insert_chunks_with_precomputed_embeddings(
     ]
 
     client_with_empty_registry.vector_io.insert(
-        vector_db_id=actual_vector_db_id,
+        vector_db_id=actual_vector_store_id,
         chunks=chunks_with_embeddings,
     )
 
     provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
     response = client_with_empty_registry.vector_io.query(
-        vector_db_id=actual_vector_db_id,
+        vector_db_id=actual_vector_store_id,
         query="precomputed embedding test",
         params=vector_io_provider_params_dict.get(provider, None),
     )
@@ -200,16 +204,16 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
         "remote::qdrant": {"score_threshold": 0.0},
         "inline::qdrant": {"score_threshold": 0.0},
     }
-    vector_db_name = "test_precomputed_embeddings_db"
+    vector_store_name = "test_precomputed_embeddings_db"
     register_response = client_with_empty_registry.vector_stores.create(
-        name=vector_db_name,
+        name=vector_store_name,
         extra_body={
             "embedding_model": embedding_model_id,
             "provider_id": vector_io_provider_id,
         },
     )
 
-    actual_vector_db_id = register_response.id
+    actual_vector_store_id = register_response.id
 
     chunks_with_embeddings = [
         Chunk(
@@ -220,13 +224,13 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
     ]
 
     client_with_empty_registry.vector_io.insert(
-        vector_db_id=actual_vector_db_id,
+        vector_db_id=actual_vector_store_id,
         chunks=chunks_with_embeddings,
     )
 
     provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
     response = client_with_empty_registry.vector_io.query(
-        vector_db_id=actual_vector_db_id,
+        vector_db_id=actual_vector_store_id,
         query="duplicate",
         params=vector_io_provider_params_dict.get(provider, None),
     )
diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py
index 997df0d78..dd3246cb3 100644
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@@ -21,7 +21,7 @@ async def test_single_provider_auto_selection():
             Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384})
         ]
     )
-    mock_routing_table.register_vector_db = AsyncMock(
+    mock_routing_table.register_vector_store = AsyncMock(
         return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123")
     )
     mock_routing_table.get_provider_impl = AsyncMock(
diff --git a/tests/unit/distribution/test_build_path.py b/tests/unit/distribution/test_build_path.py
deleted file mode 100644
index 52a71286b..000000000
--- a/tests/unit/distribution/test_build_path.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.cli.stack._build import (
-    _run_stack_build_command_from_build_config,
-)
-from llama_stack.core.datatypes import BuildConfig, DistributionSpec
-from llama_stack.core.utils.image_types import LlamaStackImageType
-
-
-def test_container_build_passes_path(monkeypatch, tmp_path):
-    called_with = {}
-
-    def spy_build_image(build_config, image_name, distro_or_config, run_config=None):
-        called_with["path"] = distro_or_config
-        called_with["run_config"] = run_config
-        return 0
-
-    monkeypatch.setattr(
-        "llama_stack.cli.stack._build.build_image",
-        spy_build_image,
-        raising=True,
-    )
-
-    cfg = BuildConfig(
-        image_type=LlamaStackImageType.CONTAINER.value,
-        distribution_spec=DistributionSpec(providers={}, description=""),
-    )
-
-    _run_stack_build_command_from_build_config(cfg, image_name="dummy")
-
-    assert "path" in called_with
-    assert isinstance(called_with["path"], str)
-    assert Path(called_with["path"]).exists()
-    assert called_with["run_config"] is None
diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py
index 590bdd1d2..3a5cd5bf7 100644
--- a/tests/unit/providers/utils/memory/test_vector_store.py
+++ b/tests/unit/providers/utils/memory/test_vector_store.py
@@ -4,138 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import patch
 
 import pytest
 
-from llama_stack.apis.common.content_types import URL, TextContentItem
-from llama_stack.apis.tools import RAGDocument
-from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc
-
-
-async def test_content_from_doc_with_url():
-    """Test extracting content from RAGDocument with URL content."""
-    mock_url = URL(uri="https://example.com")
-    mock_doc = RAGDocument(document_id="foo", content=mock_url)
-
-    mock_response = MagicMock()
-    mock_response.text = "Sample content from URL"
-
-    with patch("httpx.AsyncClient") as mock_client:
-        mock_instance = AsyncMock()
-        mock_instance.get.return_value = mock_response
-        mock_client.return_value.__aenter__.return_value = mock_instance
-
-        result = await content_from_doc(mock_doc)
-
-        assert result == "Sample content from URL"
-        mock_instance.get.assert_called_once_with(mock_url.uri)
-
-
-async def test_content_from_doc_with_pdf_url():
-    """Test extracting content from RAGDocument with URL pointing to a PDF."""
-    mock_url = URL(uri="https://example.com/document.pdf")
-    mock_doc = RAGDocument(document_id="foo", content=mock_url, mime_type="application/pdf")
-
-    mock_response = MagicMock()
-    mock_response.content = b"PDF binary data"
-
-    with (
-        patch("httpx.AsyncClient") as mock_client,
-        patch("llama_stack.providers.utils.memory.vector_store.parse_pdf") as mock_parse_pdf,
-    ):
-        mock_instance = AsyncMock()
-        mock_instance.get.return_value = mock_response
-        mock_client.return_value.__aenter__.return_value = mock_instance
-        mock_parse_pdf.return_value = "Extracted PDF content"
-
-        result = await content_from_doc(mock_doc)
-
-        assert result == "Extracted PDF content"
-        mock_instance.get.assert_called_once_with(mock_url.uri)
-        mock_parse_pdf.assert_called_once_with(b"PDF binary data")
-
-
-async def test_content_from_doc_with_data_url():
-    """Test extracting content from RAGDocument with data URL content."""
-    data_url = "data:text/plain;base64,SGVsbG8gV29ybGQ="  # "Hello World" base64 encoded
-    mock_url = URL(uri=data_url)
-    mock_doc = RAGDocument(document_id="foo", content=mock_url)
-
-    with patch("llama_stack.providers.utils.memory.vector_store.content_from_data") as mock_content_from_data:
-        mock_content_from_data.return_value = "Hello World"
-
-        result = await content_from_doc(mock_doc)
-
-        assert result == "Hello World"
-        mock_content_from_data.assert_called_once_with(data_url)
-
-
-async def test_content_from_doc_with_string():
-    """Test extracting content from RAGDocument with string content."""
-    content_string = "This is plain text content"
-    mock_doc = RAGDocument(document_id="foo", content=content_string)
-
-    result = await content_from_doc(mock_doc)
-
-    assert result == content_string
-
-
-async def test_content_from_doc_with_string_url():
-    """Test extracting content from RAGDocument with string URL content."""
-    url_string = "https://example.com"
-    mock_doc = RAGDocument(document_id="foo", content=url_string)
-
-    mock_response = MagicMock()
-    mock_response.text = "Sample content from URL string"
-
-    with patch("httpx.AsyncClient") as mock_client:
-        mock_instance = AsyncMock()
-        mock_instance.get.return_value = mock_response
-        mock_client.return_value.__aenter__.return_value = mock_instance
-
-        result = await content_from_doc(mock_doc)
-
-        assert result == "Sample content from URL string"
-        mock_instance.get.assert_called_once_with(url_string)
-
-
-async def test_content_from_doc_with_string_pdf_url():
-    """Test extracting content from RAGDocument with string URL pointing to a PDF."""
-    url_string = "https://example.com/document.pdf"
-    mock_doc = RAGDocument(document_id="foo", content=url_string, mime_type="application/pdf")
-
-    mock_response = MagicMock()
-    mock_response.content = b"PDF binary data"
-
-    with (
-        patch("httpx.AsyncClient") as mock_client,
-        patch("llama_stack.providers.utils.memory.vector_store.parse_pdf") as mock_parse_pdf,
-    ):
-        mock_instance = AsyncMock()
-        mock_instance.get.return_value = mock_response
-        mock_client.return_value.__aenter__.return_value = mock_instance
-        mock_parse_pdf.return_value = "Extracted PDF content from string URL"
-
-        result = await content_from_doc(mock_doc)
-
-        assert result == "Extracted PDF content from string URL"
-        mock_instance.get.assert_called_once_with(url_string)
-        mock_parse_pdf.assert_called_once_with(b"PDF binary data")
-
-
-async def test_content_from_doc_with_interleaved_content():
-    """Test extracting content from RAGDocument with InterleavedContent (the new case added in the commit)."""
-    interleaved_content = [TextContentItem(text="First item"), TextContentItem(text="Second item")]
-    mock_doc = RAGDocument(document_id="foo", content=interleaved_content)
-
-    with patch("llama_stack.providers.utils.memory.vector_store.interleaved_content_as_str") as mock_interleaved:
-        mock_interleaved.return_value = "First item\nSecond item"
-
-        result = await content_from_doc(mock_doc)
-
-        assert result == "First item\nSecond item"
-        mock_interleaved.assert_called_once_with(interleaved_content)
+from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type
 
 
 def test_content_from_data_and_mime_type_success_utf8():
@@ -178,41 +51,3 @@ def test_content_from_data_and_mime_type_both_encodings_fail():
         # Should raise an exception instead of returning empty string
         with pytest.raises(UnicodeDecodeError):
             content_from_data_and_mime_type(data, mime_type)
-
-
-async def test_memory_tool_error_handling():
-    """Test that memory tool handles various failures gracefully without crashing."""
-    from llama_stack.providers.inline.tool_runtime.rag.config import RagToolRuntimeConfig
-    from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
-
-    config = RagToolRuntimeConfig()
-    memory_tool = MemoryToolRuntimeImpl(
-        config=config,
-        vector_io_api=AsyncMock(),
-        inference_api=AsyncMock(),
-        files_api=AsyncMock(),
-    )
-
-    docs = [
-        RAGDocument(document_id="good_doc", content="Good content", metadata={}),
-        RAGDocument(document_id="bad_url_doc", content=URL(uri="https://bad.url"), metadata={}),
-        RAGDocument(document_id="another_good_doc", content="Another good content", metadata={}),
-    ]
-
-    mock_file1 = MagicMock()
-    mock_file1.id = "file_good1"
-    mock_file2 = MagicMock()
-    mock_file2.id = "file_good2"
-    memory_tool.files_api.openai_upload_file.side_effect = [mock_file1, mock_file2]
-
-    with patch("httpx.AsyncClient") as mock_client:
-        mock_instance = AsyncMock()
-        mock_instance.get.side_effect = Exception("Bad URL")
-        mock_client.return_value.__aenter__.return_value = mock_instance
-
-        # won't raise exception despite one document failing
-        await memory_tool.insert(docs, "vector_store_123")
-
-    # processed 2 documents successfully, skipped 1
-    assert memory_tool.files_api.openai_upload_file.call_count == 2
-    assert memory_tool.vector_io_api.openai_attach_file_to_vector_store.call_count == 2
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index c78596018..2951ca2e5 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -10,8 +10,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import numpy as np
 import pytest
 
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
@@ -31,7 +31,7 @@ def vector_provider(request):
 
 
 @pytest.fixture
-def vector_db_id() -> str:
+def vector_store_id() -> str:
     return f"test-vector-db-{random.randint(1, 100)}"
 
 
@@ -149,8 +149,8 @@ async def sqlite_vec_adapter(sqlite_vec_db_path, unique_kvstore_config, mock_inf
     )
     collection_id = f"sqlite_test_collection_{np.random.randint(1e6)}"
     await adapter.initialize()
-    await adapter.register_vector_db(
-        VectorDB(
+    await adapter.register_vector_store(
+        VectorStore(
             identifier=collection_id,
             provider_id="test_provider",
             embedding_model="test_model",
@@ -186,8 +186,8 @@ async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding
         files_api=None,
     )
     await adapter.initialize()
-    await adapter.register_vector_db(
-        VectorDB(
+    await adapter.register_vector_store(
+        VectorStore(
             identifier=f"faiss_test_collection_{np.random.randint(1e6)}",
             provider_id="test_provider",
             embedding_model="test_model",
@@ -215,7 +215,7 @@ def mock_psycopg2_connection():
 async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection):
     connection, cursor = mock_psycopg2_connection
 
-    vector_db = VectorDB(
+    vector_store = VectorStore(
         identifier="test-vector-db",
         embedding_model="test-model",
         embedding_dimension=embedding_dimension,
@@ -225,7 +225,7 @@ async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection):
 
     with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"):
         with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.execute_values"):
-            index = PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="COSINE")
+            index = PGVectorIndex(vector_store, embedding_dimension, connection, distance_metric="COSINE")
             index._test_chunks = []
             original_add_chunks = index.add_chunks
 
@@ -281,30 +281,30 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd
                         await adapter.initialize()
                         adapter.conn = mock_conn
 
-                        async def mock_insert_chunks(vector_db_id, chunks, ttl_seconds=None):
-                            index = await adapter._get_and_cache_vector_db_index(vector_db_id)
+                        async def mock_insert_chunks(vector_store_id, chunks, ttl_seconds=None):
+                            index = await adapter._get_and_cache_vector_store_index(vector_store_id)
                             if not index:
-                                raise ValueError(f"Vector DB {vector_db_id} not found")
+                                raise ValueError(f"Vector DB {vector_store_id} not found")
                             await index.insert_chunks(chunks)
 
                         adapter.insert_chunks = mock_insert_chunks
 
-                        async def mock_query_chunks(vector_db_id, query, params=None):
-                            index = await adapter._get_and_cache_vector_db_index(vector_db_id)
+                        async def mock_query_chunks(vector_store_id, query, params=None):
+                            index = await adapter._get_and_cache_vector_store_index(vector_store_id)
                             if not index:
-                                raise ValueError(f"Vector DB {vector_db_id} not found")
+                                raise ValueError(f"Vector DB {vector_store_id} not found")
                             return await index.query_chunks(query, params)
 
                         adapter.query_chunks = mock_query_chunks
 
-                        test_vector_db = VectorDB(
+                        test_vector_store = VectorStore(
                             identifier=f"pgvector_test_collection_{random.randint(1, 1_000_000)}",
                             provider_id="test_provider",
                             embedding_model="test_model",
                             embedding_dimension=embedding_dimension,
                         )
-                        await adapter.register_vector_db(test_vector_db)
-                        adapter.test_collection_id = test_vector_db.identifier
+                        await adapter.register_vector_store(test_vector_store)
+                        adapter.test_collection_id = test_vector_store.identifier
 
                         yield adapter
                         await adapter.shutdown()
diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py
index fa5c5f56b..7b870d16e 100644
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@@ -11,8 +11,8 @@ import numpy as np
 import pytest
 
 from llama_stack.apis.files import Files
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import (
@@ -43,8 +43,8 @@ def embedding_dimension():
 
 
 @pytest.fixture
-def vector_db_id():
-    return "test_vector_db"
+def vector_store_id():
+    return "test_vector_store"
 
 
 @pytest.fixture
@@ -61,12 +61,12 @@ def sample_embeddings(embedding_dimension):
 
 
 @pytest.fixture
-def mock_vector_db(vector_db_id, embedding_dimension) -> MagicMock:
-    mock_vector_db = MagicMock(spec=VectorDB)
-    mock_vector_db.embedding_model = "mock_embedding_model"
-    mock_vector_db.identifier = vector_db_id
-    mock_vector_db.embedding_dimension = embedding_dimension
-    return mock_vector_db
+def mock_vector_store(vector_store_id, embedding_dimension) -> MagicMock:
+    mock_vector_store = MagicMock(spec=VectorStore)
+    mock_vector_store.embedding_model = "mock_embedding_model"
+    mock_vector_store.identifier = vector_store_id
+    mock_vector_store.embedding_dimension = embedding_dimension
+    return mock_vector_store
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index ad55b9336..65d7b7602 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -12,7 +12,6 @@ import numpy as np
 import pytest
 
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
@@ -21,6 +20,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreChunkingStrategyAuto,
     VectorStoreFileObject,
 )
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
 
 # This test is a unit test for the inline VectorIO providers. This should only contain
@@ -71,7 +71,7 @@ async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimensio
 
 async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter):
     key = f"{VECTOR_DBS_PREFIX}db1"
-    dummy = VectorDB(
+    dummy = VectorStore(
         identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128
     )
     await vector_io_adapter.kvstore.set(key=key, value=json.dumps(dummy.model_dump()))
@@ -81,10 +81,10 @@ async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter):
 
 async def test_persistence_across_adapter_restarts(vector_io_adapter):
     await vector_io_adapter.initialize()
-    dummy = VectorDB(
+    dummy = VectorStore(
         identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128
     )
-    await vector_io_adapter.register_vector_db(dummy)
+    await vector_io_adapter.register_vector_store(dummy)
     await vector_io_adapter.shutdown()
 
     await vector_io_adapter.initialize()
@@ -92,15 +92,15 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter):
     await vector_io_adapter.shutdown()
 
 
-async def test_register_and_unregister_vector_db(vector_io_adapter):
+async def test_register_and_unregister_vector_store(vector_io_adapter):
     unique_id = f"foo_db_{np.random.randint(1e6)}"
-    dummy = VectorDB(
+    dummy = VectorStore(
         identifier=unique_id, provider_id="test_provider", embedding_model="test_model", embedding_dimension=128
     )
 
-    await vector_io_adapter.register_vector_db(dummy)
+    await vector_io_adapter.register_vector_store(dummy)
     assert dummy.identifier in vector_io_adapter.cache
-    await vector_io_adapter.unregister_vector_db(dummy.identifier)
+    await vector_io_adapter.unregister_vector_store(dummy.identifier)
     assert dummy.identifier not in vector_io_adapter.cache
 
 
@@ -121,7 +121,7 @@ async def test_insert_chunks_calls_underlying_index(vector_io_adapter):
 
 
 async def test_insert_chunks_missing_db_raises(vector_io_adapter):
-    vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None)
+    vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None)
 
     with pytest.raises(ValueError):
         await vector_io_adapter.insert_chunks("db_not_exist", [])
@@ -170,7 +170,7 @@ async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter
 
 
 async def test_query_chunks_missing_db_raises(vector_io_adapter):
-    vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None)
+    vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None)
 
     with pytest.raises(ValueError):
         await vector_io_adapter.query_chunks("db_missing", "q", None)
@@ -182,7 +182,7 @@ async def test_save_openai_vector_store(vector_io_adapter):
         "id": store_id,
         "name": "Test Store",
         "description": "A test OpenAI vector store",
-        "vector_db_id": "test_db",
+        "vector_store_id": "test_db",
         "embedding_model": "test_model",
     }
 
@@ -198,7 +198,7 @@ async def test_update_openai_vector_store(vector_io_adapter):
         "id": store_id,
         "name": "Test Store",
         "description": "A test OpenAI vector store",
-        "vector_db_id": "test_db",
+        "vector_store_id": "test_db",
         "embedding_model": "test_model",
     }
 
@@ -214,7 +214,7 @@ async def test_delete_openai_vector_store(vector_io_adapter):
         "id": store_id,
         "name": "Test Store",
         "description": "A test OpenAI vector store",
-        "vector_db_id": "test_db",
+        "vector_store_id": "test_db",
         "embedding_model": "test_model",
     }
 
@@ -229,7 +229,7 @@ async def test_load_openai_vector_stores(vector_io_adapter):
         "id": store_id,
         "name": "Test Store",
         "description": "A test OpenAI vector store",
-        "vector_db_id": "test_db",
+        "vector_store_id": "test_db",
         "embedding_model": "test_model",
     }
 
@@ -998,8 +998,8 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
 async def test_embedding_config_from_metadata(vector_io_adapter):
     """Test that embedding configuration is correctly extracted from metadata."""
 
-    # Mock register_vector_db to avoid actual registration
-    vector_io_adapter.register_vector_db = AsyncMock()
+    # Mock register_vector_store to avoid actual registration
+    vector_io_adapter.register_vector_store = AsyncMock()
     # Set provider_id attribute for the adapter
     vector_io_adapter.__provider_id__ = "test_provider"
 
@@ -1015,9 +1015,9 @@ async def test_embedding_config_from_metadata(vector_io_adapter):
 
     await vector_io_adapter.openai_create_vector_store(params)
 
-    # Verify VectorDB was registered with correct embedding config from metadata
-    vector_io_adapter.register_vector_db.assert_called_once()
-    call_args = vector_io_adapter.register_vector_db.call_args[0][0]
+    # Verify VectorStore was registered with correct embedding config from metadata
+    vector_io_adapter.register_vector_store.assert_called_once()
+    call_args = vector_io_adapter.register_vector_store.call_args[0][0]
     assert call_args.embedding_model == "test-embedding-model"
     assert call_args.embedding_dimension == 512
 
@@ -1025,8 +1025,8 @@ async def test_embedding_config_from_metadata(vector_io_adapter):
 async def test_embedding_config_from_extra_body(vector_io_adapter):
     """Test that embedding configuration is correctly extracted from extra_body when metadata is empty."""
 
-    # Mock register_vector_db to avoid actual registration
-    vector_io_adapter.register_vector_db = AsyncMock()
+    # Mock register_vector_store to avoid actual registration
+    vector_io_adapter.register_vector_store = AsyncMock()
     # Set provider_id attribute for the adapter
     vector_io_adapter.__provider_id__ = "test_provider"
 
@@ -1042,9 +1042,9 @@ async def test_embedding_config_from_extra_body(vector_io_adapter):
 
     await vector_io_adapter.openai_create_vector_store(params)
 
-    # Verify VectorDB was registered with correct embedding config from extra_body
-    vector_io_adapter.register_vector_db.assert_called_once()
-    call_args = vector_io_adapter.register_vector_db.call_args[0][0]
+    # Verify VectorStore was registered with correct embedding config from extra_body
+    vector_io_adapter.register_vector_store.assert_called_once()
+    call_args = vector_io_adapter.register_vector_store.call_args[0][0]
     assert call_args.embedding_model == "extra-body-model"
     assert call_args.embedding_dimension == 1024
 
@@ -1052,8 +1052,8 @@ async def test_embedding_config_from_extra_body(vector_io_adapter):
 async def test_embedding_config_consistency_check_passes(vector_io_adapter):
     """Test that consistent embedding config in both metadata and extra_body passes validation."""
 
-    # Mock register_vector_db to avoid actual registration
-    vector_io_adapter.register_vector_db = AsyncMock()
+    # Mock register_vector_store to avoid actual registration
+    vector_io_adapter.register_vector_store = AsyncMock()
     # Set provider_id attribute for the adapter
     vector_io_adapter.__provider_id__ = "test_provider"
 
@@ -1073,8 +1073,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter):
     await vector_io_adapter.openai_create_vector_store(params)
 
     # Should not raise any error and use metadata config
-    vector_io_adapter.register_vector_db.assert_called_once()
-    call_args = vector_io_adapter.register_vector_db.call_args[0][0]
+    vector_io_adapter.register_vector_store.assert_called_once()
+    call_args = vector_io_adapter.register_vector_store.call_args[0][0]
     assert call_args.embedding_model == "consistent-model"
     assert call_args.embedding_dimension == 768
 
@@ -1082,8 +1082,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter):
 async def test_embedding_config_inconsistency_errors(vector_io_adapter):
     """Test that inconsistent embedding config between metadata and extra_body raises errors."""
 
-    # Mock register_vector_db to avoid actual registration
-    vector_io_adapter.register_vector_db = AsyncMock()
+    # Mock register_vector_store to avoid actual registration
+    vector_io_adapter.register_vector_store = AsyncMock()
     # Set provider_id attribute for the adapter
     vector_io_adapter.__provider_id__ = "test_provider"
 
@@ -1104,7 +1104,7 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter):
         await vector_io_adapter.openai_create_vector_store(params)
 
     # Reset mock for second test
-    vector_io_adapter.register_vector_db.reset_mock()
+    vector_io_adapter.register_vector_store.reset_mock()
 
     # Test with inconsistent embedding dimension
     params = OpenAICreateVectorStoreRequestWithExtraBody(
@@ -1126,8 +1126,8 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter):
 async def test_embedding_config_defaults_when_missing(vector_io_adapter):
     """Test that embedding dimension defaults to 768 when not provided."""
 
-    # Mock register_vector_db to avoid actual registration
-    vector_io_adapter.register_vector_db = AsyncMock()
+    # Mock register_vector_store to avoid actual registration
+    vector_io_adapter.register_vector_store = AsyncMock()
     # Set provider_id attribute for the adapter
     vector_io_adapter.__provider_id__ = "test_provider"
 
@@ -1143,8 +1143,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter):
     await vector_io_adapter.openai_create_vector_store(params)
 
     # Should default to 768 dimensions
-    vector_io_adapter.register_vector_db.assert_called_once()
-    call_args = vector_io_adapter.register_vector_db.call_args[0][0]
+    vector_io_adapter.register_vector_store.assert_called_once()
+    call_args = vector_io_adapter.register_vector_store.call_args[0][0]
     assert call_args.embedding_model == "model-without-dimension"
     assert call_args.embedding_dimension == 768
 
@@ -1152,8 +1152,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter):
 async def test_embedding_config_required_model_missing(vector_io_adapter):
     """Test that missing embedding model raises error."""
 
-    # Mock register_vector_db to avoid actual registration
-    vector_io_adapter.register_vector_db = AsyncMock()
+    # Mock register_vector_store to avoid actual registration
+    vector_io_adapter.register_vector_store = AsyncMock()
     # Set provider_id attribute for the adapter
     vector_io_adapter.__provider_id__ = "test_provider"
     # Mock the default model lookup to return None (no default model available)
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
deleted file mode 100644
index a45b66f02..000000000
--- a/tests/unit/rag/test_rag_query.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-
-from llama_stack.apis.tools.rag_tool import RAGQueryConfig
-from llama_stack.apis.vector_io import (
-    Chunk,
-    ChunkMetadata,
-    QueryChunksResponse,
-)
-from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
-
-
-class TestRagQuery:
-    async def test_query_raises_on_empty_vector_db_ids(self):
-        rag_tool = MemoryToolRuntimeImpl(
-            config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
-        )
-        with pytest.raises(ValueError):
-            await rag_tool.query(content=MagicMock(), vector_db_ids=[])
-
-    async def test_query_chunk_metadata_handling(self):
-        rag_tool = MemoryToolRuntimeImpl(
-            config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
-        )
-        content = "test query content"
-        vector_db_ids = ["db1"]
-
-        chunk_metadata = ChunkMetadata(
-            document_id="doc1",
-            chunk_id="chunk1",
-            source="test_source",
-            metadata_token_count=5,
-        )
-        interleaved_content = MagicMock()
-        chunk = Chunk(
-            content=interleaved_content,
-            metadata={
-                "key1": "value1",
-                "token_count": 10,
-                "metadata_token_count": 5,
-                # Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert()
-                "document_id": "doc1",
-            },
-            stored_chunk_id="chunk1",
-            chunk_metadata=chunk_metadata,
-        )
-
-        query_response = QueryChunksResponse(chunks=[chunk], scores=[1.0])
-
-        rag_tool.vector_io_api.query_chunks = AsyncMock(return_value=query_response)
-        result = await rag_tool.query(content=content, vector_db_ids=vector_db_ids)
-
-        assert result is not None
-        expected_metadata_string = (
-            "Metadata: {'chunk_id': 'chunk1', 'document_id': 'doc1', 'source': 'test_source', 'key1': 'value1'}"
-        )
-        assert expected_metadata_string in result.content[1].text
-        assert result.content is not None
-
-    async def test_query_raises_incorrect_mode(self):
-        with pytest.raises(ValueError):
-            RAGQueryConfig(mode="invalid_mode")
-
-    async def test_query_accepts_valid_modes(self):
-        default_config = RAGQueryConfig()  # Test default (vector)
-        assert default_config.mode == "vector"
-        vector_config = RAGQueryConfig(mode="vector")  # Test vector
-        assert vector_config.mode == "vector"
-        keyword_config = RAGQueryConfig(mode="keyword")  # Test keyword
-        assert keyword_config.mode == "keyword"
-        hybrid_config = RAGQueryConfig(mode="hybrid")  # Test hybrid
-        assert hybrid_config.mode == "hybrid"
-
-        # Test that invalid mode raises an error
-        with pytest.raises(ValueError):
-            RAGQueryConfig(mode="wrong_mode")
-
-    async def test_query_adds_vector_db_id_to_chunk_metadata(self):
-        rag_tool = MemoryToolRuntimeImpl(
-            config=MagicMock(),
-            vector_io_api=MagicMock(),
-            inference_api=MagicMock(),
-            files_api=MagicMock(),
-        )
-
-        vector_db_ids = ["db1", "db2"]
-
-        # Fake chunks from each DB
-        chunk_metadata1 = ChunkMetadata(
-            document_id="doc1",
-            chunk_id="chunk1",
-            source="test_source1",
-            metadata_token_count=5,
-        )
-        chunk1 = Chunk(
-            content="chunk from db1",
-            metadata={"vector_db_id": "db1", "document_id": "doc1"},
-            stored_chunk_id="c1",
-            chunk_metadata=chunk_metadata1,
-        )
-
-        chunk_metadata2 = ChunkMetadata(
-            document_id="doc2",
-            chunk_id="chunk2",
-            source="test_source2",
-            metadata_token_count=5,
-        )
-        chunk2 = Chunk(
-            content="chunk from db2",
-            metadata={"vector_db_id": "db2", "document_id": "doc2"},
-            stored_chunk_id="c2",
-            chunk_metadata=chunk_metadata2,
-        )
-
-        rag_tool.vector_io_api.query_chunks = AsyncMock(
-            side_effect=[
-                QueryChunksResponse(chunks=[chunk1], scores=[0.9]),
-                QueryChunksResponse(chunks=[chunk2], scores=[0.8]),
-            ]
-        )
-
-        result = await rag_tool.query(content="test", vector_db_ids=vector_db_ids)
-        returned_chunks = result.metadata["chunks"]
-        returned_scores = result.metadata["scores"]
-        returned_doc_ids = result.metadata["document_ids"]
-        returned_vector_db_ids = result.metadata["vector_db_ids"]
-
-        assert returned_chunks == ["chunk from db1", "chunk from db2"]
-        assert returned_scores == (0.9, 0.8)
-        assert returned_doc_ids == ["doc1", "doc2"]
-        assert returned_vector_db_ids == ["db1", "db2"]
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 1e40c98e8..e185b83e7 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -4,10 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import base64
-import mimetypes
-import os
-from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock
 
 import numpy as np
@@ -17,37 +13,13 @@ from llama_stack.apis.inference.inference import (
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
 )
-from llama_stack.apis.tools import RAGDocument
 from llama_stack.apis.vector_io import Chunk
 from llama_stack.providers.utils.memory.vector_store import (
-    URL,
-    VectorDBWithIndex,
+    VectorStoreWithIndex,
     _validate_embedding,
-    content_from_doc,
     make_overlapped_chunks,
 )
 
-DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"
-# Depending on the machine, this can get parsed a couple of ways
-DUMMY_PDF_TEXT_CHOICES = ["Dummy PDF file", "Dumm y PDF file"]
-
-
-def read_file(file_path: str) -> bytes:
-    with open(file_path, "rb") as file:
-        return file.read()
-
-
-def data_url_from_file(file_path: str) -> str:
-    with open(file_path, "rb") as file:
-        file_content = file.read()
-
-    base64_content = base64.b64encode(file_content).decode("utf-8")
-    mime_type, _ = mimetypes.guess_type(file_path)
-
-    data_url = f"data:{mime_type};base64,{base64_content}"
-
-    return data_url
-
 
 class TestChunk:
     def test_chunk(self):
@@ -116,45 +88,6 @@ class TestValidateEmbedding:
 
 
 class TestVectorStore:
-    async def test_returns_content_from_pdf_data_uri(self):
-        data_uri = data_url_from_file(DUMMY_PDF_PATH)
-        doc = RAGDocument(
-            document_id="dummy",
-            content=data_uri,
-            mime_type="application/pdf",
-            metadata={},
-        )
-        content = await content_from_doc(doc)
-        assert content in DUMMY_PDF_TEXT_CHOICES
-
-    @pytest.mark.allow_network
-    async def test_downloads_pdf_and_returns_content(self):
-        # Using GitHub to host the PDF file
-        url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf"
-        doc = RAGDocument(
-            document_id="dummy",
-            content=url,
-            mime_type="application/pdf",
-            metadata={},
-        )
-        content = await content_from_doc(doc)
-        assert content in DUMMY_PDF_TEXT_CHOICES
-
-    @pytest.mark.allow_network
-    async def test_downloads_pdf_and_returns_content_with_url_object(self):
-        # Using GitHub to host the PDF file
-        url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf"
-        doc = RAGDocument(
-            document_id="dummy",
-            content=URL(
-                uri=url,
-            ),
-            mime_type="application/pdf",
-            metadata={},
-        )
-        content = await content_from_doc(doc)
-        assert content in DUMMY_PDF_TEXT_CHOICES
-
     @pytest.mark.parametrize(
         "window_len, overlap_len, expected_chunks",
         [
@@ -206,15 +139,15 @@ class TestVectorStore:
         assert str(excinfo.value.__cause__) == "Cannot convert to string"
 
 
-class TestVectorDBWithIndex:
+class TestVectorStoreWithIndex:
     async def test_insert_chunks_without_embeddings(self):
-        mock_vector_db = MagicMock()
-        mock_vector_db.embedding_model = "test-model without embeddings"
+        mock_vector_store = MagicMock()
+        mock_vector_store.embedding_model = "test-model without embeddings"
         mock_index = AsyncMock()
         mock_inference_api = AsyncMock()
 
-        vector_db_with_index = VectorDBWithIndex(
-            vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
+        vector_store_with_index = VectorStoreWithIndex(
+            vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
         )
 
         chunks = [
@@ -227,7 +160,7 @@ class TestVectorDBWithIndex:
             OpenAIEmbeddingData(embedding=[0.4, 0.5, 0.6], index=1),
         ]
 
-        await vector_db_with_index.insert_chunks(chunks)
+        await vector_store_with_index.insert_chunks(chunks)
 
         # Verify openai_embeddings was called with correct params
         mock_inference_api.openai_embeddings.assert_called_once()
@@ -243,14 +176,14 @@ class TestVectorDBWithIndex:
         assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
 
     async def test_insert_chunks_with_valid_embeddings(self):
-        mock_vector_db = MagicMock()
-        mock_vector_db.embedding_model = "test-model with embeddings"
-        mock_vector_db.embedding_dimension = 3
+        mock_vector_store = MagicMock()
+        mock_vector_store.embedding_model = "test-model with embeddings"
+        mock_vector_store.embedding_dimension = 3
         mock_index = AsyncMock()
         mock_inference_api = AsyncMock()
 
-        vector_db_with_index = VectorDBWithIndex(
-            vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
+        vector_store_with_index = VectorStoreWithIndex(
+            vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
         )
 
         chunks = [
@@ -258,7 +191,7 @@ class TestVectorDBWithIndex:
             Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}),
         ]
 
-        await vector_db_with_index.insert_chunks(chunks)
+        await vector_store_with_index.insert_chunks(chunks)
 
         mock_inference_api.openai_embeddings.assert_not_called()
         mock_index.add_chunks.assert_called_once()
@@ -267,14 +200,14 @@ class TestVectorDBWithIndex:
         assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
 
     async def test_insert_chunks_with_invalid_embeddings(self):
-        mock_vector_db = MagicMock()
-        mock_vector_db.embedding_dimension = 3
-        mock_vector_db.embedding_model = "test-model with invalid embeddings"
+        mock_vector_store = MagicMock()
+        mock_vector_store.embedding_dimension = 3
+        mock_vector_store.embedding_model = "test-model with invalid embeddings"
         mock_index = AsyncMock()
         mock_inference_api = AsyncMock()
 
-        vector_db_with_index = VectorDBWithIndex(
-            vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
+        vector_store_with_index = VectorStoreWithIndex(
+            vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
         )
 
         # Verify Chunk raises ValueError for invalid embedding type
@@ -283,7 +216,7 @@ class TestVectorDBWithIndex:
 
         # Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
         with pytest.raises(ValueError, match="Input should be a valid list"):
-            await vector_db_with_index.insert_chunks(
+            await vector_store_with_index.insert_chunks(
                 [
                     Chunk(content="Test 1", embedding=None, metadata={}),
                     Chunk(content="Test 2", embedding="invalid_type", metadata={}),
@@ -292,7 +225,7 @@ class TestVectorDBWithIndex:
 
         # Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
         with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "):
-            await vector_db_with_index.insert_chunks(
+            await vector_store_with_index.insert_chunks(
                 Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={})
             )
 
@@ -300,20 +233,20 @@ class TestVectorDBWithIndex:
             Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}),
         ]
         with pytest.raises(ValueError, match="has dimension 4, expected 3"):
-            await vector_db_with_index.insert_chunks(chunks_wrong_dim)
+            await vector_store_with_index.insert_chunks(chunks_wrong_dim)
 
         mock_inference_api.openai_embeddings.assert_not_called()
         mock_index.add_chunks.assert_not_called()
 
     async def test_insert_chunks_with_partially_precomputed_embeddings(self):
-        mock_vector_db = MagicMock()
-        mock_vector_db.embedding_model = "test-model with partial embeddings"
-        mock_vector_db.embedding_dimension = 3
+        mock_vector_store = MagicMock()
+        mock_vector_store.embedding_model = "test-model with partial embeddings"
+        mock_vector_store.embedding_dimension = 3
         mock_index = AsyncMock()
         mock_inference_api = AsyncMock()
 
-        vector_db_with_index = VectorDBWithIndex(
-            vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
+        vector_store_with_index = VectorStoreWithIndex(
+            vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
         )
 
         chunks = [
@@ -327,7 +260,7 @@ class TestVectorDBWithIndex:
             OpenAIEmbeddingData(embedding=[0.3, 0.3, 0.3], index=1),
         ]
 
-        await vector_db_with_index.insert_chunks(chunks)
+        await vector_store_with_index.insert_chunks(chunks)
 
         # Verify openai_embeddings was called with correct params
         mock_inference_api.openai_embeddings.assert_called_once()
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index 95022ad33..d4c9786d1 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -8,8 +8,8 @@
 import pytest
 
 from llama_stack.apis.inference import Model
-from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.core.datatypes import VectorDBWithOwner
+from llama_stack.apis.vector_stores import VectorStore
+from llama_stack.core.datatypes import VectorStoreWithOwner
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.core.store.registry import (
     KEY_FORMAT,
@@ -20,12 +20,12 @@ from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_b
 
 
 @pytest.fixture
-def sample_vector_db():
-    return VectorDB(
-        identifier="test_vector_db",
+def sample_vector_store():
+    return VectorStore(
+        identifier="test_vector_store",
         embedding_model="nomic-embed-text-v1.5",
         embedding_dimension=768,
-        provider_resource_id="test_vector_db",
+        provider_resource_id="test_vector_store",
         provider_id="test-provider",
     )
 
@@ -45,17 +45,17 @@ async def test_registry_initialization(disk_dist_registry):
     assert result is None
 
 
-async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_model):
-    print(f"Registering {sample_vector_db}")
-    await disk_dist_registry.register(sample_vector_db)
+async def test_basic_registration(disk_dist_registry, sample_vector_store, sample_model):
+    print(f"Registering {sample_vector_store}")
+    await disk_dist_registry.register(sample_vector_store)
     print(f"Registering {sample_model}")
     await disk_dist_registry.register(sample_model)
-    print("Getting vector_db")
-    result_vector_db = await disk_dist_registry.get("vector_db", "test_vector_db")
-    assert result_vector_db is not None
-    assert result_vector_db.identifier == sample_vector_db.identifier
-    assert result_vector_db.embedding_model == sample_vector_db.embedding_model
-    assert result_vector_db.provider_id == sample_vector_db.provider_id
+    print("Getting vector_store")
+    result_vector_store = await disk_dist_registry.get("vector_store", "test_vector_store")
+    assert result_vector_store is not None
+    assert result_vector_store.identifier == sample_vector_store.identifier
+    assert result_vector_store.embedding_model == sample_vector_store.embedding_model
+    assert result_vector_store.provider_id == sample_vector_store.provider_id
 
     result_model = await disk_dist_registry.get("model", "test_model")
     assert result_model is not None
@@ -63,11 +63,11 @@ async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_m
     assert result_model.provider_id == sample_model.provider_id
 
 
-async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, sample_model):
+async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_store, sample_model):
     # First populate the disk registry
     disk_registry = DiskDistributionRegistry(sqlite_kvstore)
     await disk_registry.initialize()
-    await disk_registry.register(sample_vector_db)
+    await disk_registry.register(sample_vector_store)
     await disk_registry.register(sample_model)
 
     # Test cached version loads from disk
@@ -79,29 +79,29 @@ async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db,
     )
     await cached_registry.initialize()
 
-    result_vector_db = await cached_registry.get("vector_db", "test_vector_db")
-    assert result_vector_db is not None
-    assert result_vector_db.identifier == sample_vector_db.identifier
-    assert result_vector_db.embedding_model == sample_vector_db.embedding_model
-    assert result_vector_db.embedding_dimension == sample_vector_db.embedding_dimension
-    assert result_vector_db.provider_id == sample_vector_db.provider_id
+    result_vector_store = await cached_registry.get("vector_store", "test_vector_store")
+    assert result_vector_store is not None
+    assert result_vector_store.identifier == sample_vector_store.identifier
+    assert result_vector_store.embedding_model == sample_vector_store.embedding_model
+    assert result_vector_store.embedding_dimension == sample_vector_store.embedding_dimension
+    assert result_vector_store.provider_id == sample_vector_store.provider_id
 
 
 async def test_cached_registry_updates(cached_disk_dist_registry):
-    new_vector_db = VectorDB(
-        identifier="test_vector_db_2",
+    new_vector_store = VectorStore(
+        identifier="test_vector_store_2",
         embedding_model="nomic-embed-text-v1.5",
         embedding_dimension=768,
-        provider_resource_id="test_vector_db_2",
+        provider_resource_id="test_vector_store_2",
         provider_id="baz",
     )
-    await cached_disk_dist_registry.register(new_vector_db)
+    await cached_disk_dist_registry.register(new_vector_store)
 
     # Verify in cache
-    result_vector_db = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2")
-    assert result_vector_db is not None
-    assert result_vector_db.identifier == new_vector_db.identifier
-    assert result_vector_db.provider_id == new_vector_db.provider_id
+    result_vector_store = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2")
+    assert result_vector_store is not None
+    assert result_vector_store.identifier == new_vector_store.identifier
+    assert result_vector_store.provider_id == new_vector_store.provider_id
 
     # Verify persisted to disk
     db_path = cached_disk_dist_registry.kvstore.db_path
@@ -111,87 +111,89 @@ async def test_cached_registry_updates(cached_disk_dist_registry):
         await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry"))
     )
     await new_registry.initialize()
-    result_vector_db = await new_registry.get("vector_db", "test_vector_db_2")
-    assert result_vector_db is not None
-    assert result_vector_db.identifier == new_vector_db.identifier
-    assert result_vector_db.provider_id == new_vector_db.provider_id
+    result_vector_store = await new_registry.get("vector_store", "test_vector_store_2")
+    assert result_vector_store is not None
+    assert result_vector_store.identifier == new_vector_store.identifier
+    assert result_vector_store.provider_id == new_vector_store.provider_id
 
 
 async def test_duplicate_provider_registration(cached_disk_dist_registry):
-    original_vector_db = VectorDB(
-        identifier="test_vector_db_2",
+    original_vector_store = VectorStore(
+        identifier="test_vector_store_2",
         embedding_model="nomic-embed-text-v1.5",
         embedding_dimension=768,
-        provider_resource_id="test_vector_db_2",
+        provider_resource_id="test_vector_store_2",
         provider_id="baz",
     )
-    assert await cached_disk_dist_registry.register(original_vector_db)
+    assert await cached_disk_dist_registry.register(original_vector_store)
 
-    duplicate_vector_db = VectorDB(
-        identifier="test_vector_db_2",
+    duplicate_vector_store = VectorStore(
+        identifier="test_vector_store_2",
         embedding_model="different-model",
         embedding_dimension=768,
-        provider_resource_id="test_vector_db_2",
+        provider_resource_id="test_vector_store_2",
         provider_id="baz",  # Same provider_id
     )
-    with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db_2' already exists"):
-        await cached_disk_dist_registry.register(duplicate_vector_db)
+    with pytest.raises(
+        ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store_2' already exists"
+    ):
+        await cached_disk_dist_registry.register(duplicate_vector_store)
 
-    result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2")
+    result = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2")
     assert result is not None
-    assert result.embedding_model == original_vector_db.embedding_model  # Original values preserved
+    assert result.embedding_model == original_vector_store.embedding_model  # Original values preserved
 
 
 async def test_get_all_objects(cached_disk_dist_registry):
     # Create multiple test banks
     # Create multiple test banks
-    test_vector_dbs = [
-        VectorDB(
-            identifier=f"test_vector_db_{i}",
+    test_vector_stores = [
+        VectorStore(
+            identifier=f"test_vector_store_{i}",
             embedding_model="nomic-embed-text-v1.5",
             embedding_dimension=768,
-            provider_resource_id=f"test_vector_db_{i}",
+            provider_resource_id=f"test_vector_store_{i}",
             provider_id=f"provider_{i}",
         )
         for i in range(3)
     ]
 
-    # Register all vector_dbs
-    for vector_db in test_vector_dbs:
-        await cached_disk_dist_registry.register(vector_db)
+    # Register all vector_stores
+    for vector_store in test_vector_stores:
+        await cached_disk_dist_registry.register(vector_store)
 
     # Test get_all retrieval
     all_results = await cached_disk_dist_registry.get_all()
     assert len(all_results) == 3
 
-    # Verify each vector_db was stored correctly
-    for original_vector_db in test_vector_dbs:
-        matching_vector_dbs = [v for v in all_results if v.identifier == original_vector_db.identifier]
-        assert len(matching_vector_dbs) == 1
-        stored_vector_db = matching_vector_dbs[0]
-        assert stored_vector_db.embedding_model == original_vector_db.embedding_model
-        assert stored_vector_db.provider_id == original_vector_db.provider_id
-        assert stored_vector_db.embedding_dimension == original_vector_db.embedding_dimension
+    # Verify each vector_store was stored correctly
+    for original_vector_store in test_vector_stores:
+        matching_vector_stores = [v for v in all_results if v.identifier == original_vector_store.identifier]
+        assert len(matching_vector_stores) == 1
+        stored_vector_store = matching_vector_stores[0]
+        assert stored_vector_store.embedding_model == original_vector_store.embedding_model
+        assert stored_vector_store.provider_id == original_vector_store.provider_id
+        assert stored_vector_store.embedding_dimension == original_vector_store.embedding_dimension
 
 
 async def test_parse_registry_values_error_handling(sqlite_kvstore):
-    valid_db = VectorDB(
-        identifier="valid_vector_db",
+    valid_db = VectorStore(
+        identifier="valid_vector_store",
         embedding_model="nomic-embed-text-v1.5",
         embedding_dimension=768,
-        provider_resource_id="valid_vector_db",
+        provider_resource_id="valid_vector_store",
         provider_id="test-provider",
     )
 
     await sqlite_kvstore.set(
-        KEY_FORMAT.format(type="vector_db", identifier="valid_vector_db"), valid_db.model_dump_json()
+        KEY_FORMAT.format(type="vector_store", identifier="valid_vector_store"), valid_db.model_dump_json()
     )
 
-    await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_db", identifier="corrupted_json"), "{not valid json")
+    await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_store", identifier="corrupted_json"), "{not valid json")
 
     await sqlite_kvstore.set(
-        KEY_FORMAT.format(type="vector_db", identifier="missing_fields"),
-        '{"type": "vector_db", "identifier": "missing_fields"}',
+        KEY_FORMAT.format(type="vector_store", identifier="missing_fields"),
+        '{"type": "vector_store", "identifier": "missing_fields"}',
     )
 
     test_registry = DiskDistributionRegistry(sqlite_kvstore)
@@ -202,18 +204,18 @@ async def test_parse_registry_values_error_handling(sqlite_kvstore):
 
     # Should have filtered out the invalid entries
     assert len(all_objects) == 1
-    assert all_objects[0].identifier == "valid_vector_db"
+    assert all_objects[0].identifier == "valid_vector_store"
 
     # Check that the get method also handles errors correctly
-    invalid_obj = await test_registry.get("vector_db", "corrupted_json")
+    invalid_obj = await test_registry.get("vector_store", "corrupted_json")
     assert invalid_obj is None
 
-    invalid_obj = await test_registry.get("vector_db", "missing_fields")
+    invalid_obj = await test_registry.get("vector_store", "missing_fields")
     assert invalid_obj is None
 
 
 async def test_cached_registry_error_handling(sqlite_kvstore):
-    valid_db = VectorDB(
+    valid_db = VectorStore(
         identifier="valid_cached_db",
         embedding_model="nomic-embed-text-v1.5",
         embedding_dimension=768,
@@ -222,12 +224,12 @@ async def test_cached_registry_error_handling(sqlite_kvstore):
     )
 
     await sqlite_kvstore.set(
-        KEY_FORMAT.format(type="vector_db", identifier="valid_cached_db"), valid_db.model_dump_json()
+        KEY_FORMAT.format(type="vector_store", identifier="valid_cached_db"), valid_db.model_dump_json()
     )
 
     await sqlite_kvstore.set(
-        KEY_FORMAT.format(type="vector_db", identifier="invalid_cached_db"),
-        '{"type": "vector_db", "identifier": "invalid_cached_db", "embedding_model": 12345}',  # Should be string
+        KEY_FORMAT.format(type="vector_store", identifier="invalid_cached_db"),
+        '{"type": "vector_store", "identifier": "invalid_cached_db", "embedding_model": 12345}',  # Should be string
     )
 
     cached_registry = CachedDiskDistributionRegistry(sqlite_kvstore)
@@ -237,63 +239,65 @@ async def test_cached_registry_error_handling(sqlite_kvstore):
     assert len(all_objects) == 1
     assert all_objects[0].identifier == "valid_cached_db"
 
-    invalid_obj = await cached_registry.get("vector_db", "invalid_cached_db")
+    invalid_obj = await cached_registry.get("vector_store", "invalid_cached_db")
     assert invalid_obj is None
 
 
 async def test_double_registration_identical_objects(disk_dist_registry):
     """Test that registering identical objects succeeds (idempotent)."""
-    vector_db = VectorDBWithOwner(
-        identifier="test_vector_db",
+    vector_store = VectorStoreWithOwner(
+        identifier="test_vector_store",
         embedding_model="all-MiniLM-L6-v2",
         embedding_dimension=384,
-        provider_resource_id="test_vector_db",
+        provider_resource_id="test_vector_store",
         provider_id="test-provider",
     )
 
     # First registration should succeed
-    result1 = await disk_dist_registry.register(vector_db)
+    result1 = await disk_dist_registry.register(vector_store)
     assert result1 is True
 
     # Second registration of identical object should also succeed (idempotent)
-    result2 = await disk_dist_registry.register(vector_db)
+    result2 = await disk_dist_registry.register(vector_store)
     assert result2 is True
 
     # Verify object exists and is unchanged
-    retrieved = await disk_dist_registry.get("vector_db", "test_vector_db")
+    retrieved = await disk_dist_registry.get("vector_store", "test_vector_store")
     assert retrieved is not None
-    assert retrieved.identifier == vector_db.identifier
-    assert retrieved.embedding_model == vector_db.embedding_model
+    assert retrieved.identifier == vector_store.identifier
+    assert retrieved.embedding_model == vector_store.embedding_model
 
 
 async def test_double_registration_different_objects(disk_dist_registry):
     """Test that registering different objects with same identifier fails."""
-    vector_db1 = VectorDBWithOwner(
-        identifier="test_vector_db",
+    vector_store1 = VectorStoreWithOwner(
+        identifier="test_vector_store",
         embedding_model="all-MiniLM-L6-v2",
         embedding_dimension=384,
-        provider_resource_id="test_vector_db",
+        provider_resource_id="test_vector_store",
         provider_id="test-provider",
     )
 
-    vector_db2 = VectorDBWithOwner(
-        identifier="test_vector_db",  # Same identifier
+    vector_store2 = VectorStoreWithOwner(
+        identifier="test_vector_store",  # Same identifier
         embedding_model="different-model",  # Different embedding model
         embedding_dimension=384,
-        provider_resource_id="test_vector_db",
+        provider_resource_id="test_vector_store",
         provider_id="test-provider",
     )
 
     # First registration should succeed
-    result1 = await disk_dist_registry.register(vector_db1)
+    result1 = await disk_dist_registry.register(vector_store1)
     assert result1 is True
 
     # Second registration with different data should fail
-    with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db' already exists"):
-        await disk_dist_registry.register(vector_db2)
+    with pytest.raises(
+        ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store' already exists"
+    ):
+        await disk_dist_registry.register(vector_store2)
 
     # Verify original object is unchanged
-    retrieved = await disk_dist_registry.get("vector_db", "test_vector_db")
+    retrieved = await disk_dist_registry.get("vector_store", "test_vector_store")
     assert retrieved is not None
     assert retrieved.embedding_model == "all-MiniLM-L6-v2"  # Original value
 
diff --git a/tests/unit/server/test_server.py b/tests/unit/server/test_server.py
index f21bbdd67..d6d4f4f23 100644
--- a/tests/unit/server/test_server.py
+++ b/tests/unit/server/test_server.py
@@ -41,7 +41,7 @@ class TestTranslateException:
                 self.identifier = identifier
                 self.owner = owner
 
-        resource = MockResource("vector_db", "test-db")
+        resource = MockResource("vector_store", "test-db")
 
         exc = AccessDeniedError("create", resource, user)
         result = translate_exception(exc)
@@ -49,7 +49,7 @@ class TestTranslateException:
         assert isinstance(result, HTTPException)
         assert result.status_code == 403
         assert "test-user" in result.detail
-        assert "vector_db::test-db" in result.detail
+        assert "vector_store::test-db" in result.detail
         assert "create" in result.detail
         assert "roles=['user']" in result.detail
         assert "teams=['dev']" in result.detail
diff --git a/uv.lock b/uv.lock
index 7f6e0401b..bbc917df5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -921,16 +921,16 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.116.1"
+version = "0.119.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pydantic" },
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/78/d7/6c8b3bfe33eeffa208183ec037fee0cce9f7f024089ab1c5d12ef04bd27c/fastapi-0.116.1.tar.gz", hash = "sha256:ed52cbf946abfd70c5a0dccb24673f0670deeb517a88b3544d03c2a6bf283143", size = 296485, upload-time = "2025-07-11T16:22:32.057Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/f9/5c5bcce82a7997cc0eb8c47b7800f862f6b56adc40486ed246e5010d443b/fastapi-0.119.0.tar.gz", hash = "sha256:451082403a2c1f0b99c6bd57c09110ed5463856804c8078d38e5a1f1035dbbb7", size = 336756, upload-time = "2025-10-11T17:13:40.53Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e5/47/d63c60f59a59467fda0f93f46335c9d18526d7071f025cb5b89d5353ea42/fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565", size = 95631, upload-time = "2025-07-11T16:22:30.485Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/70/584c4d7cad80f5e833715c0a29962d7c93b4d18eed522a02981a6d1b6ee5/fastapi-0.119.0-py3-none-any.whl", hash = "sha256:90a2e49ed19515320abb864df570dd766be0662c5d577688f1600170f7f73cf2", size = 107095, upload-time = "2025-10-11T17:13:39.048Z" },
 ]
 
 [[package]]
@@ -5231,7 +5231,7 @@ wheels = [
 
 [[package]]
 name = "weaviate-client"
-version = "4.16.9"
+version = "4.17.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "authlib" },
@@ -5242,9 +5242,9 @@ dependencies = [
     { name = "pydantic" },
     { name = "validators" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f4/e4/6a0b1501645f17a851067fc7bd0d5b53dc9777f2818be9c43debe06eda19/weaviate_client-4.16.9.tar.gz", hash = "sha256:d461071f1ff5ebddd0fc697959628a1d8caa12af1da071401ef25583c3084eba", size = 766390, upload-time = "2025-08-20T15:00:03.924Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bd/0e/e4582b007427187a9fde55fa575db4b766c81929d2b43a3dd8becce50567/weaviate_client-4.17.0.tar.gz", hash = "sha256:731d58d84b0989df4db399b686357ed285fb95971a492ccca8dec90bb2343c51", size = 769019, upload-time = "2025-09-26T11:20:27.381Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/1a/fc66f5f33961351c759d56453d18176849da8f64186c941183bb574b808b/weaviate_client-4.16.9-py3-none-any.whl", hash = "sha256:8b4adabaec0d513edef94c8c1de61c89a86eba3b63a4dc1acdfc9580e80199f4", size = 579098, upload-time = "2025-08-20T15:00:01.882Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/c5/2da3a45866da7a935dab8ad07be05dcaee48b3ad4955144583b651929be7/weaviate_client-4.17.0-py3-none-any.whl", hash = "sha256:60e4a355b90537ee1e942ab0b76a94750897a13d9cf13c5a6decbd166d0ca8b5", size = 582763, upload-time = "2025-09-26T11:20:25.864Z" },
 ]
 
 [[package]]