[memory refactor][6/n] Update naming and routes (#839)

Making a few small naming changes as per feedback: - RAGToolRuntime methods are called `insert` and `query` to keep them more general - The tool names are changed to non-namespaced forms `insert_into_memory` and `query_from_memory` - The REST endpoints are more REST-ful
2025-12-03 09:53:45 +00:00 · 2025-01-22 10:39:13 -08:00 · 2025-01-22 10:39:13 -08:00 · a63a43c646
commit a63a43c646
parent c9e5578151
11 changed files with 319 additions and 330 deletions
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@ -1887,6 +1887,49 @@
                ]
            }
        },
+        "/v1/tool-runtime/rag-tool/insert": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    }
+                },
+                "tags": [
+                    "ToolRuntime"
+                ],
+                "summary": "Index documents so they can be used by the RAG system",
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-Provider-Data",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-Client-Version",
+                        "in": "header",
+                        "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/InsertRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
        "/v1/vector-io/insert": {
            "post": {
                "responses": {
@ -1929,49 +1972,6 @@
                }
            }
        },
-        "/v1/tool-runtime/rag-tool/insert-documents": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    }
-                },
-                "tags": [
-                    "ToolRuntime"
-                ],
-                "summary": "Index documents so they can be used by the RAG system",
-                "parameters": [
-                    {
-                        "name": "X-LlamaStack-Provider-Data",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "X-LlamaStack-Client-Version",
-                        "in": "header",
-                        "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/InsertDocumentsRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
        "/v1/tool-runtime/invoke": {
            "post": {
                "responses": {
@ -3033,6 +3033,56 @@
                }
            }
        },
+        "/v1/tool-runtime/rag-tool/query": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/RAGQueryResult"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "ToolRuntime"
+                ],
+                "summary": "Query the RAG system for context; typically invoked by the agent",
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-Provider-Data",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-Client-Version",
+                        "in": "header",
+                        "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/QueryRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
        "/v1/vector-io/query": {
            "post": {
                "responses": {
@ -3082,56 +3132,6 @@
                }
            }
        },
-        "/v1/tool-runtime/rag-tool/query-context": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/RAGQueryResult"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "ToolRuntime"
-                ],
-                "summary": "Query the RAG system for context; typically invoked by the agent",
-                "parameters": [
-                    {
-                        "name": "X-LlamaStack-Provider-Data",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "X-LlamaStack-Client-Version",
-                        "in": "header",
-                        "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QueryContextRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
        "/v1/telemetry/spans": {
            "get": {
                "responses": {
@ -5256,11 +5256,8 @@
                        "const": "memory_retrieval",
                        "default": "memory_retrieval"
                    },
-                    "memory_bank_ids": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        }
+                    "vector_db_ids": {
+                        "type": "string"
                    },
                    "inserted_context": {
                        "$ref": "#/components/schemas/InterleavedContent"
@ -5271,7 +5268,7 @@
                    "turn_id",
                    "step_id",
                    "step_type",
-                    "memory_bank_ids",
+                    "vector_db_ids",
                    "inserted_context"
                ]
            },
@ -6976,63 +6973,6 @@
                    "status"
                ]
            },
-            "InsertChunksRequest": {
-                "type": "object",
-                "properties": {
-                    "vector_db_id": {
-                        "type": "string"
-                    },
-                    "chunks": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "content": {
-                                    "$ref": "#/components/schemas/InterleavedContent"
-                                },
-                                "metadata": {
-                                    "type": "object",
-                                    "additionalProperties": {
-                                        "oneOf": [
-                                            {
-                                                "type": "null"
-                                            },
-                                            {
-                                                "type": "boolean"
-                                            },
-                                            {
-                                                "type": "number"
-                                            },
-                                            {
-                                                "type": "string"
-                                            },
-                                            {
-                                                "type": "array"
-                                            },
-                                            {
-                                                "type": "object"
-                                            }
-                                        ]
-                                    }
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "content",
-                                "metadata"
-                            ]
-                        }
-                    },
-                    "ttl_seconds": {
-                        "type": "integer"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "vector_db_id",
-                    "chunks"
-                ]
-            },
            "RAGDocument": {
                "type": "object",
                "properties": {
@ -7094,7 +7034,7 @@
                    "metadata"
                ]
            },
-            "InsertDocumentsRequest": {
+            "InsertRequest": {
                "type": "object",
                "properties": {
                    "documents": {
@ -7117,6 +7057,63 @@
                    "chunk_size_in_tokens"
                ]
            },
+            "InsertChunksRequest": {
+                "type": "object",
+                "properties": {
+                    "vector_db_id": {
+                        "type": "string"
+                    },
+                    "chunks": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "content": {
+                                    "$ref": "#/components/schemas/InterleavedContent"
+                                },
+                                "metadata": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "content",
+                                "metadata"
+                            ]
+                        }
+                    },
+                    "ttl_seconds": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "vector_db_id",
+                    "chunks"
+                ]
+            },
            "InvokeToolRequest": {
                "type": "object",
                "properties": {
@ -7883,6 +7880,110 @@
                    "job_uuid"
                ]
            },
+            "DefaultRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "default",
+                        "default": "default"
+                    },
+                    "separator": {
+                        "type": "string",
+                        "default": " "
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "separator"
+                ]
+            },
+            "LLMRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "llm",
+                        "default": "llm"
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "template": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "model",
+                    "template"
+                ]
+            },
+            "RAGQueryConfig": {
+                "type": "object",
+                "properties": {
+                    "query_generator_config": {
+                        "$ref": "#/components/schemas/RAGQueryGeneratorConfig"
+                    },
+                    "max_tokens_in_context": {
+                        "type": "integer",
+                        "default": 4096
+                    },
+                    "max_chunks": {
+                        "type": "integer",
+                        "default": 5
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "query_generator_config",
+                    "max_tokens_in_context",
+                    "max_chunks"
+                ]
+            },
+            "RAGQueryGeneratorConfig": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
+                    },
+                    {
+                        "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                    }
+                ]
+            },
+            "QueryRequest": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent"
+                    },
+                    "vector_db_ids": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    },
+                    "query_config": {
+                        "$ref": "#/components/schemas/RAGQueryConfig"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "vector_db_ids"
+                ]
+            },
+            "RAGQueryResult": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent"
+                    }
+                },
+                "additionalProperties": false
+            },
            "QueryChunksRequest": {
                "type": "object",
                "properties": {
@ -7981,111 +8082,6 @@
                    "scores"
                ]
            },
-            "DefaultRAGQueryGeneratorConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "default",
-                        "default": "default"
-                    },
-                    "separator": {
-                        "type": "string",
-                        "default": " "
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "separator"
-                ]
-            },
-            "LLMRAGQueryGeneratorConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "llm",
-                        "default": "llm"
-                    },
-                    "model": {
-                        "type": "string"
-                    },
-                    "template": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "model",
-                    "template"
-                ]
-            },
-            "RAGQueryConfig": {
-                "type": "object",
-                "properties": {
-                    "query_generator_config": {
-                        "$ref": "#/components/schemas/RAGQueryGeneratorConfig"
-                    },
-                    "max_tokens_in_context": {
-                        "type": "integer",
-                        "default": 4096
-                    },
-                    "max_chunks": {
-                        "type": "integer",
-                        "default": 5
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "query_generator_config",
-                    "max_tokens_in_context",
-                    "max_chunks"
-                ]
-            },
-            "RAGQueryGeneratorConfig": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
-                    },
-                    {
-                        "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
-                    }
-                ]
-            },
-            "QueryContextRequest": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent"
-                    },
-                    "query_config": {
-                        "$ref": "#/components/schemas/RAGQueryConfig"
-                    },
-                    "vector_db_ids": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "content",
-                    "query_config",
-                    "vector_db_ids"
-                ]
-            },
-            "RAGQueryResult": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent"
-                    }
-                },
-                "additionalProperties": false
-            },
            "QueryCondition": {
                "type": "object",
                "properties": {
@ -9246,8 +9242,8 @@
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InsertChunksRequest\" />"
        },
        {
-            "name": "InsertDocumentsRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InsertDocumentsRequest\" />"
+            "name": "InsertRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InsertRequest\" />"
        },
        {
            "name": "Inspect"
@ -9435,8 +9431,8 @@
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QueryConditionOp\" />"
        },
        {
-            "name": "QueryContextRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QueryContextRequest\" />"
+            "name": "QueryRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QueryRequest\" />"
        },
        {
            "name": "QuerySpanTreeResponse",
@ -9858,7 +9854,7 @@
                "ImageDelta",
                "InferenceStep",
                "InsertChunksRequest",
-                "InsertDocumentsRequest",
+                "InsertRequest",
                "InterleavedContent",
                "InterleavedContentItem",
                "InvokeToolRequest",
@ -9903,7 +9899,7 @@
                "QueryChunksResponse",
                "QueryCondition",
                "QueryConditionOp",
-                "QueryContextRequest",
+                "QueryRequest",
                "QuerySpanTreeResponse",
                "QuerySpansResponse",
                "QueryTracesResponse",
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@ -1009,7 +1009,7 @@ components:
      - vector_db_id
      - chunks
      type: object
-    InsertDocumentsRequest:
+    InsertRequest:
      additionalProperties: false
      properties:
        chunk_size_in_tokens:
@ -1299,10 +1299,6 @@ components:
          type: string
        inserted_context:
          $ref: '#/components/schemas/InterleavedContent'
-        memory_bank_ids:
-          items:
-            type: string
-          type: array
        started_at:
          format: date-time
          type: string
@ -1314,11 +1310,13 @@ components:
          type: string
        turn_id:
          type: string
+        vector_db_ids:
+          type: string
      required:
      - turn_id
      - step_id
      - step_type
-      - memory_bank_ids
+      - vector_db_ids
      - inserted_context
      type: object
    Message:
@ -1710,7 +1708,7 @@ components:
      - gt
      - lt
      type: string
-    QueryContextRequest:
+    QueryRequest:
      additionalProperties: false
      properties:
        content:
@ -1723,7 +1721,6 @@ components:
          type: array
      required:
      - content
-      - query_config
      - vector_db_ids
      type: object
    QuerySpanTreeResponse:
@ -5176,7 +5173,7 @@ paths:
          description: OK
      tags:
      - ToolRuntime
-  /v1/tool-runtime/rag-tool/insert-documents:
+  /v1/tool-runtime/rag-tool/insert:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
@ -5197,7 +5194,7 @@ paths:
        content:
          application/json:
            schema:
-              $ref: '#/components/schemas/InsertDocumentsRequest'
+              $ref: '#/components/schemas/InsertRequest'
        required: true
      responses:
        '200':
@ -5205,7 +5202,7 @@ paths:
      summary: Index documents so they can be used by the RAG system
      tags:
      - ToolRuntime
-  /v1/tool-runtime/rag-tool/query-context:
+  /v1/tool-runtime/rag-tool/query:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
@ -5226,7 +5223,7 @@ paths:
        content:
          application/json:
            schema:
-              $ref: '#/components/schemas/QueryContextRequest'
+              $ref: '#/components/schemas/QueryRequest'
        required: true
      responses:
        '200':
@ -5814,9 +5811,8 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/InsertChunksRequest"
    />
  name: InsertChunksRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/InsertDocumentsRequest"
-    />
-  name: InsertDocumentsRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/InsertRequest" />
+  name: InsertRequest
 - name: Inspect
 - description: <SchemaDefinition schemaRef="#/components/schemas/InterleavedContent"
    />
@ -5943,9 +5939,8 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/QueryConditionOp"
    />
  name: QueryConditionOp
- description: <SchemaDefinition schemaRef="#/components/schemas/QueryContextRequest"
-    />
-  name: QueryContextRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/QueryRequest" />
+  name: QueryRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/QuerySpanTreeResponse"
    />
  name: QuerySpanTreeResponse
@ -6245,7 +6240,7 @@ x-tagGroups:
  - ImageDelta
  - InferenceStep
  - InsertChunksRequest
-  - InsertDocumentsRequest
+  - InsertRequest
  - InterleavedContent
  - InterleavedContentItem
  - InvokeToolRequest
@ -6290,7 +6285,7 @@ x-tagGroups:
  - QueryChunksResponse
  - QueryCondition
  - QueryConditionOp
-  - QueryContextRequest
+  - QueryRequest
  - QuerySpanTreeResponse
  - QuerySpansResponse
  - QueryTracesResponse
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@ -74,8 +74,8 @@ class RAGQueryConfig(BaseModel):
@runtime_checkable
@trace_protocol
 class RAGToolRuntime(Protocol):
-    @webmethod(route="/tool-runtime/rag-tool/insert-documents", method="POST")
-    async def insert_documents(
+    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST")
+    async def insert(
        self,
        documents: List[RAGDocument],
        vector_db_id: str,
@ -84,12 +84,12 @@ class RAGToolRuntime(Protocol):
        """Index documents so they can be used by the RAG system"""
        ...

-    @webmethod(route="/tool-runtime/rag-tool/query-context", method="POST")
-    async def query_context(
+    @webmethod(route="/tool-runtime/rag-tool/query", method="POST")
+    async def query(
        self,
        content: InterleavedContent,
-        query_config: RAGQueryConfig,
        vector_db_ids: List[str],
+        query_config: Optional[RAGQueryConfig] = None,
    ) -> RAGQueryResult:
        """Query the RAG system for context; typically invoked by the agent"""
        ...
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -38,7 +38,7 @@ class VectorDBStore(Protocol):
 class VectorIO(Protocol):
    vector_db_store: VectorDBStore

-    # this will just block now until documents are inserted, but it should
+    # this will just block now until chunks are inserted, but it should
    # probably return a Job instance which can be polled for completion
    @webmethod(route="/vector-io/insert", method="POST")
    async def insert_chunks(
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@ -414,25 +414,25 @@ class ToolRuntimeRouter(ToolRuntime):
        ) -> None:
            self.routing_table = routing_table

-        async def query_context(
+        async def query(
            self,
            content: InterleavedContent,
-            query_config: RAGQueryConfig,
            vector_db_ids: List[str],
+            query_config: Optional[RAGQueryConfig] = None,
        ) -> RAGQueryResult:
            return await self.routing_table.get_provider_impl(
-                "rag_tool.query_context"
-            ).query_context(content, query_config, vector_db_ids)
+                "query_from_memory"
+            ).query(content, vector_db_ids, query_config)

-        async def insert_documents(
+        async def insert(
            self,
            documents: List[RAGDocument],
            vector_db_id: str,
            chunk_size_in_tokens: int = 512,
        ) -> None:
            return await self.routing_table.get_provider_impl(
-                "rag_tool.insert_documents"
-            ).insert_documents(documents, vector_db_id, chunk_size_in_tokens)
+                "insert_into_memory"
+            ).insert(documents, vector_db_id, chunk_size_in_tokens)

    def __init__(
        self,
@ -441,10 +441,9 @@ class ToolRuntimeRouter(ToolRuntime):
        self.routing_table = routing_table

        # HACK ALERT this should be in sync with "get_all_api_endpoints()"
-        # TODO: make sure rag_tool vs builtin::memory is correct everywhere
        self.rag_tool = self.RagToolImpl(routing_table)
-        setattr(self, "rag_tool.query_context", self.rag_tool.query_context)
-        setattr(self, "rag_tool.insert_documents", self.rag_tool.insert_documents)
+        for method in ("query", "insert"):
+            setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))

    async def initialize(self) -> None:
        pass
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -84,7 +84,7 @@ def make_random_string(length: int = 8):


 TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
-MEMORY_QUERY_TOOL = "rag_tool.query_context"
+MEMORY_QUERY_TOOL = "query_from_memory"
 WEB_SEARCH_TOOL = "web_search"
 MEMORY_GROUP = "builtin::memory"

@ -432,16 +432,16 @@ class ChatAgent(ShieldRunnerMixin):
                        )
                    )
                )
-                result = await self.tool_runtime_api.rag_tool.query_context(
+                result = await self.tool_runtime_api.rag_tool.query(
                    content=concat_interleaved_content(
                        [msg.content for msg in input_messages]
                    ),
+                    vector_db_ids=vector_db_ids,
                    query_config=RAGQueryConfig(
                        query_generator_config=DefaultRAGQueryGeneratorConfig(),
                        max_tokens_in_context=4096,
                        max_chunks=5,
                    ),
-                    vector_db_ids=vector_db_ids,
                )
                retrieved_context = result.content

@ -882,7 +882,7 @@ class ChatAgent(ShieldRunnerMixin):
            )
            for a in data
        ]
-        await self.tool_runtime_api.rag_tool.insert_documents(
+        await self.tool_runtime_api.rag_tool.insert(
            documents=documents,
            vector_db_id=vector_db_id,
            chunk_size_in_tokens=512,
--- a/llama_stack/providers/inline/tool_runtime/memory/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/memory/memory.py
@ -61,7 +61,7 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
    async def shutdown(self):
        pass

-    async def insert_documents(
+    async def insert(
        self,
        documents: List[RAGDocument],
        vector_db_id: str,
@ -87,15 +87,16 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
            vector_db_id=vector_db_id,
        )

-    async def query_context(
+    async def query(
        self,
        content: InterleavedContent,
-        query_config: RAGQueryConfig,
        vector_db_ids: List[str],
+        query_config: Optional[RAGQueryConfig] = None,
    ) -> RAGQueryResult:
        if not vector_db_ids:
            return RAGQueryResult(content=None)

+        query_config = query_config or RAGQueryConfig()
        query = await generate_rag_query(
            query_config.query_generator_config,
            content,
@ -159,11 +160,11 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
        # encountering fatals.
        return [
            ToolDef(
-                name="rag_tool.query_context",
+                name="query_from_memory",
                description="Retrieve context from memory",
            ),
            ToolDef(
-                name="rag_tool.insert_documents",
+                name="insert_into_memory",
                description="Insert documents into memory",
            ),
        ]
--- a/llama_stack/providers/tests/tools/test_tools.py
+++ b/llama_stack/providers/tests/tools/test_tools.py
@ -96,14 +96,14 @@ class TestTools:
        )

        # Insert documents into memory
-        await tools_impl.rag_tool.insert_documents(
+        await tools_impl.rag_tool.insert(
            documents=sample_documents,
            vector_db_id="test_bank",
            chunk_size_in_tokens=512,
        )

        # Execute the memory tool
-        response = await tools_impl.rag_tool.query_context(
+        response = await tools_impl.rag_tool.query(
            content="What are the main topics covered in the documentation?",
            vector_db_ids=["test_bank"],
        )
--- a/llama_stack/providers/tests/vector_io/test_vector_store.py
+++ b/llama_stack/providers/tests/vector_io/test_vector_store.py
@ -11,11 +11,9 @@ from pathlib import Path

 import pytest

-from llama_stack.providers.utils.memory.vector_store import (
-    content_from_doc,
-    MemoryBankDocument,
-    URL,
-)
+from llama_stack.apis.tools import RAGDocument
+
+from llama_stack.providers.utils.memory.vector_store import content_from_doc, URL

 DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"

@ -41,33 +39,33 @@ class TestVectorStore:
    @pytest.mark.asyncio
    async def test_returns_content_from_pdf_data_uri(self):
        data_uri = data_url_from_file(DUMMY_PDF_PATH)
-        doc = MemoryBankDocument(
+        doc = RAGDocument(
            document_id="dummy",
            content=data_uri,
            mime_type="application/pdf",
            metadata={},
        )
        content = await content_from_doc(doc)
-        assert content == "Dummy PDF file"
+        assert content == "Dumm y PDF file"

    @pytest.mark.asyncio
    async def test_downloads_pdf_and_returns_content(self):
        # Using GitHub to host the PDF file
        url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf"
-        doc = MemoryBankDocument(
+        doc = RAGDocument(
            document_id="dummy",
            content=url,
            mime_type="application/pdf",
            metadata={},
        )
        content = await content_from_doc(doc)
-        assert content == "Dummy PDF file"
+        assert content == "Dumm y PDF file"

    @pytest.mark.asyncio
    async def test_downloads_pdf_and_returns_content_with_url_object(self):
        # Using GitHub to host the PDF file
        url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf"
-        doc = MemoryBankDocument(
+        doc = RAGDocument(
            document_id="dummy",
            content=URL(
                uri=url,
@ -76,4 +74,4 @@ class TestVectorStore:
            metadata={},
        )
        content = await content_from_doc(doc)
-        assert content == "Dummy PDF file"
+        assert content == "Dumm y PDF file"
--- a/tests/client-sdk/agents/test_agents.py
+++ b/tests/client-sdk/agents/test_agents.py
@ -292,7 +292,7 @@ def test_rag_agent(llama_stack_client, agent_config):
        embedding_model="all-MiniLM-L6-v2",
        embedding_dimension=384,
    )
-    llama_stack_client.tool_runtime.rag_tool.insert_documents(
+    llama_stack_client.tool_runtime.rag_tool.insert(
        documents=documents,
        vector_db_id=vector_db_id,
        chunk_size_in_tokens=512,
@ -321,4 +321,4 @@ def test_rag_agent(llama_stack_client, agent_config):
        )
        logs = [str(log) for log in EventLogger().log(response) if log is not None]
        logs_str = "".join(logs)
-        assert "Tool:rag_tool.query_context" in logs_str
+        assert "Tool:query_from_memory" in logs_str
--- a/tests/client-sdk/tool_runtime/test_rag_tool.py
+++ b/tests/client-sdk/tool_runtime/test_rag_tool.py
@ -73,7 +73,7 @@ def test_vector_db_insert_inline_and_query(
    llama_stack_client, single_entry_vector_db_registry, sample_documents
 ):
    vector_db_id = single_entry_vector_db_registry[0]
-    llama_stack_client.tool_runtime.rag_tool.insert_documents(
+    llama_stack_client.tool_runtime.rag_tool.insert(
        documents=sample_documents,
        chunk_size_in_tokens=512,
        vector_db_id=vector_db_id,
@ -157,7 +157,7 @@ def test_vector_db_insert_from_url_and_query(
        for i, url in enumerate(urls)
    ]

-    llama_stack_client.tool_runtime.rag_tool.insert_documents(
+    llama_stack_client.tool_runtime.rag_tool.insert(
        documents=documents,
        vector_db_id=vector_db_id,
        chunk_size_in_tokens=512,