diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
index 4cea9d970..abeb16936 100644
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@@ -172,10 +172,16 @@ def _get_endpoint_functions(
 def _get_defining_class(member_fn: str, derived_cls: type) -> type:
     "Find the class in which a member function is first defined in a class inheritance hierarchy."
 
+    # This import must be dynamic here
+    from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime
+
     # iterate in reverse member resolution order to find most specific class first
     for cls in reversed(inspect.getmro(derived_cls)):
         for name, _ in inspect.getmembers(cls, inspect.isfunction):
             if name == member_fn:
+                # HACK ALERT
+                if cls == RAGToolRuntime:
+                    return ToolRuntime
                 return cls
 
     raise ValidationError(
diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index ffa1df8e6..f00d7b291 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -1929,6 +1929,49 @@
                 }
             }
         },
+        "/v1/tool-runtime/rag-tool/insert-documents": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    }
+                },
+                "tags": [
+                    "ToolRuntime"
+                ],
+                "summary": "Index documents so they can be used by the RAG system",
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-Provider-Data",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-Client-Version",
+                        "in": "header",
+                        "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/InsertDocumentsRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/tool-runtime/invoke": {
             "post": {
                 "responses": {
@@ -3039,6 +3082,56 @@
                 }
             }
         },
+        "/v1/tool-runtime/rag-tool/query-context": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/RAGQueryResult"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "ToolRuntime"
+                ],
+                "summary": "Query the RAG system for context; typically invoked by the agent",
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-Provider-Data",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-Client-Version",
+                        "in": "header",
+                        "description": "Version of the client making the request. This is used to ensure that the client and server are compatible.",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/QueryContextRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/v1/telemetry/spans": {
             "get": {
                 "responses": {
@@ -6940,6 +7033,90 @@
                     "chunks"
                 ]
             },
+            "RAGDocument": {
+                "type": "object",
+                "properties": {
+                    "document_id": {
+                        "type": "string"
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/InterleavedContentItem"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/InterleavedContentItem"
+                                }
+                            },
+                            {
+                                "$ref": "#/components/schemas/URL"
+                            }
+                        ]
+                    },
+                    "mime_type": {
+                        "type": "string"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "document_id",
+                    "content",
+                    "metadata"
+                ]
+            },
+            "InsertDocumentsRequest": {
+                "type": "object",
+                "properties": {
+                    "documents": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/RAGDocument"
+                        }
+                    },
+                    "vector_db_id": {
+                        "type": "string"
+                    },
+                    "chunk_size_in_tokens": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "documents",
+                    "vector_db_id",
+                    "chunk_size_in_tokens"
+                ]
+            },
             "InvokeToolRequest": {
                 "type": "object",
                 "properties": {
@@ -7804,6 +7981,111 @@
                     "scores"
                 ]
             },
+            "DefaultRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "default",
+                        "default": "default"
+                    },
+                    "separator": {
+                        "type": "string",
+                        "default": " "
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "separator"
+                ]
+            },
+            "LLMRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "llm",
+                        "default": "llm"
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "template": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "model",
+                    "template"
+                ]
+            },
+            "RAGQueryConfig": {
+                "type": "object",
+                "properties": {
+                    "query_generator_config": {
+                        "$ref": "#/components/schemas/RAGQueryGeneratorConfig"
+                    },
+                    "max_tokens_in_context": {
+                        "type": "integer",
+                        "default": 4096
+                    },
+                    "max_chunks": {
+                        "type": "integer",
+                        "default": 5
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "query_generator_config",
+                    "max_tokens_in_context",
+                    "max_chunks"
+                ]
+            },
+            "RAGQueryGeneratorConfig": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
+                    },
+                    {
+                        "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                    }
+                ]
+            },
+            "QueryContextRequest": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent"
+                    },
+                    "query_config": {
+                        "$ref": "#/components/schemas/RAGQueryConfig"
+                    },
+                    "vector_db_ids": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "query_config",
+                    "vector_db_ids"
+                ]
+            },
+            "RAGQueryResult": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent"
+                    }
+                },
+                "additionalProperties": false
+            },
             "QueryCondition": {
                 "type": "object",
                 "properties": {
@@ -8902,6 +9184,10 @@
         {
             "name": "Datasets"
         },
+        {
+            "name": "DefaultRAGQueryGeneratorConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DefaultRAGQueryGeneratorConfig\" />"
+        },
         {
             "name": "EfficiencyConfig",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EfficiencyConfig\" />"
@@ -8959,6 +9245,10 @@
             "name": "InsertChunksRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InsertChunksRequest\" />"
         },
+        {
+            "name": "InsertDocumentsRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InsertDocumentsRequest\" />"
+        },
         {
             "name": "Inspect"
         },
@@ -8990,6 +9280,10 @@
             "name": "LLMAsJudgeScoringFnParams",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/LLMAsJudgeScoringFnParams\" />"
         },
+        {
+            "name": "LLMRAGQueryGeneratorConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/LLMRAGQueryGeneratorConfig\" />"
+        },
         {
             "name": "ListDatasetsResponse",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ListDatasetsResponse\" />"
@@ -9140,6 +9434,10 @@
             "name": "QueryConditionOp",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QueryConditionOp\" />"
         },
+        {
+            "name": "QueryContextRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QueryContextRequest\" />"
+        },
         {
             "name": "QuerySpanTreeResponse",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QuerySpanTreeResponse\" />"
@@ -9152,6 +9450,22 @@
             "name": "QueryTracesResponse",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QueryTracesResponse\" />"
         },
+        {
+            "name": "RAGDocument",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RAGDocument\" />"
+        },
+        {
+            "name": "RAGQueryConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RAGQueryConfig\" />"
+        },
+        {
+            "name": "RAGQueryGeneratorConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RAGQueryGeneratorConfig\" />"
+        },
+        {
+            "name": "RAGQueryResult",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RAGQueryResult\" />"
+        },
         {
             "name": "RegexParserScoringFnParams",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RegexParserScoringFnParams\" />"
@@ -9531,6 +9845,7 @@
                 "DataConfig",
                 "Dataset",
                 "DatasetFormat",
+                "DefaultRAGQueryGeneratorConfig",
                 "EfficiencyConfig",
                 "EmbeddingsRequest",
                 "EmbeddingsResponse",
@@ -9543,6 +9858,7 @@
                 "ImageDelta",
                 "InferenceStep",
                 "InsertChunksRequest",
+                "InsertDocumentsRequest",
                 "InterleavedContent",
                 "InterleavedContentItem",
                 "InvokeToolRequest",
@@ -9550,6 +9866,7 @@
                 "JobStatus",
                 "JsonType",
                 "LLMAsJudgeScoringFnParams",
+                "LLMRAGQueryGeneratorConfig",
                 "ListDatasetsResponse",
                 "ListEvalTasksResponse",
                 "ListModelsResponse",
@@ -9586,9 +9903,14 @@
                 "QueryChunksResponse",
                 "QueryCondition",
                 "QueryConditionOp",
+                "QueryContextRequest",
                 "QuerySpanTreeResponse",
                 "QuerySpansResponse",
                 "QueryTracesResponse",
+                "RAGDocument",
+                "RAGQueryConfig",
+                "RAGQueryGeneratorConfig",
+                "RAGQueryResult",
                 "RegexParserScoringFnParams",
                 "RegisterDatasetRequest",
                 "RegisterEvalTaskRequest",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 1678b1bb9..e1ae07c45 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -761,6 +761,20 @@ components:
       - instruct
       - dialog
       type: string
+    DefaultRAGQueryGeneratorConfig:
+      additionalProperties: false
+      properties:
+        separator:
+          default: ' '
+          type: string
+        type:
+          const: default
+          default: default
+          type: string
+      required:
+      - type
+      - separator
+      type: object
     EfficiencyConfig:
       additionalProperties: false
       properties:
@@ -995,6 +1009,22 @@ components:
       - vector_db_id
       - chunks
       type: object
+    InsertDocumentsRequest:
+      additionalProperties: false
+      properties:
+        chunk_size_in_tokens:
+          type: integer
+        documents:
+          items:
+            $ref: '#/components/schemas/RAGDocument'
+          type: array
+        vector_db_id:
+          type: string
+      required:
+      - documents
+      - vector_db_id
+      - chunk_size_in_tokens
+      type: object
     InterleavedContent:
       oneOf:
       - type: string
@@ -1073,6 +1103,22 @@ components:
       - type
       - judge_model
       type: object
+    LLMRAGQueryGeneratorConfig:
+      additionalProperties: false
+      properties:
+        model:
+          type: string
+        template:
+          type: string
+        type:
+          const: llm
+          default: llm
+          type: string
+      required:
+      - type
+      - model
+      - template
+      type: object
     ListDatasetsResponse:
       additionalProperties: false
       properties:
@@ -1664,6 +1710,22 @@ components:
       - gt
       - lt
       type: string
+    QueryContextRequest:
+      additionalProperties: false
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+        query_config:
+          $ref: '#/components/schemas/RAGQueryConfig'
+        vector_db_ids:
+          items:
+            type: string
+          type: array
+      required:
+      - content
+      - query_config
+      - vector_db_ids
+      type: object
     QuerySpanTreeResponse:
       additionalProperties: false
       properties:
@@ -1694,6 +1756,62 @@ components:
       required:
       - data
       type: object
+    RAGDocument:
+      additionalProperties: false
+      properties:
+        content:
+          oneOf:
+          - type: string
+          - $ref: '#/components/schemas/InterleavedContentItem'
+          - items:
+              $ref: '#/components/schemas/InterleavedContentItem'
+            type: array
+          - $ref: '#/components/schemas/URL'
+        document_id:
+          type: string
+        metadata:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        mime_type:
+          type: string
+      required:
+      - document_id
+      - content
+      - metadata
+      type: object
+    RAGQueryConfig:
+      additionalProperties: false
+      properties:
+        max_chunks:
+          default: 5
+          type: integer
+        max_tokens_in_context:
+          default: 4096
+          type: integer
+        query_generator_config:
+          $ref: '#/components/schemas/RAGQueryGeneratorConfig'
+      required:
+      - query_generator_config
+      - max_tokens_in_context
+      - max_chunks
+      type: object
+    RAGQueryGeneratorConfig:
+      oneOf:
+      - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+      - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+    RAGQueryResult:
+      additionalProperties: false
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+      type: object
     RegexParserScoringFnParams:
       additionalProperties: false
       properties:
@@ -5058,6 +5176,68 @@ paths:
           description: OK
       tags:
       - ToolRuntime
+  /v1/tool-runtime/rag-tool/insert-documents:
+    post:
+      parameters:
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-Provider-Data
+        required: false
+        schema:
+          type: string
+      - description: Version of the client making the request. This is used to ensure
+          that the client and server are compatible.
+        in: header
+        name: X-LlamaStack-Client-Version
+        required: false
+        schema:
+          type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/InsertDocumentsRequest'
+        required: true
+      responses:
+        '200':
+          description: OK
+      summary: Index documents so they can be used by the RAG system
+      tags:
+      - ToolRuntime
+  /v1/tool-runtime/rag-tool/query-context:
+    post:
+      parameters:
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-Provider-Data
+        required: false
+        schema:
+          type: string
+      - description: Version of the client making the request. This is used to ensure
+          that the client and server are compatible.
+        in: header
+        name: X-LlamaStack-Client-Version
+        required: false
+        schema:
+          type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/QueryContextRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RAGQueryResult'
+          description: OK
+      summary: Query the RAG system for context; typically invoked by the agent
+      tags:
+      - ToolRuntime
   /v1/toolgroups:
     get:
       parameters:
@@ -5596,6 +5776,9 @@ tags:
   name: DatasetFormat
 - name: DatasetIO
 - name: Datasets
+- description: <SchemaDefinition schemaRef="#/components/schemas/DefaultRAGQueryGeneratorConfig"
+    />
+  name: DefaultRAGQueryGeneratorConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/EfficiencyConfig"
     />
   name: EfficiencyConfig
@@ -5631,6 +5814,9 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/InsertChunksRequest"
     />
   name: InsertChunksRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/InsertDocumentsRequest"
+    />
+  name: InsertDocumentsRequest
 - name: Inspect
 - description: <SchemaDefinition schemaRef="#/components/schemas/InterleavedContent"
     />
@@ -5650,6 +5836,9 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/LLMAsJudgeScoringFnParams"
     />
   name: LLMAsJudgeScoringFnParams
+- description: <SchemaDefinition schemaRef="#/components/schemas/LLMRAGQueryGeneratorConfig"
+    />
+  name: LLMRAGQueryGeneratorConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/ListDatasetsResponse"
     />
   name: ListDatasetsResponse
@@ -5754,6 +5943,9 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/QueryConditionOp"
     />
   name: QueryConditionOp
+- description: <SchemaDefinition schemaRef="#/components/schemas/QueryContextRequest"
+    />
+  name: QueryContextRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/QuerySpanTreeResponse"
     />
   name: QuerySpanTreeResponse
@@ -5763,6 +5955,15 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/QueryTracesResponse"
     />
   name: QueryTracesResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/RAGDocument" />
+  name: RAGDocument
+- description: <SchemaDefinition schemaRef="#/components/schemas/RAGQueryConfig" />
+  name: RAGQueryConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/RAGQueryGeneratorConfig"
+    />
+  name: RAGQueryGeneratorConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/RAGQueryResult" />
+  name: RAGQueryResult
 - description: <SchemaDefinition schemaRef="#/components/schemas/RegexParserScoringFnParams"
     />
   name: RegexParserScoringFnParams
@@ -6031,6 +6232,7 @@ x-tagGroups:
   - DataConfig
   - Dataset
   - DatasetFormat
+  - DefaultRAGQueryGeneratorConfig
   - EfficiencyConfig
   - EmbeddingsRequest
   - EmbeddingsResponse
@@ -6043,6 +6245,7 @@ x-tagGroups:
   - ImageDelta
   - InferenceStep
   - InsertChunksRequest
+  - InsertDocumentsRequest
   - InterleavedContent
   - InterleavedContentItem
   - InvokeToolRequest
@@ -6050,6 +6253,7 @@ x-tagGroups:
   - JobStatus
   - JsonType
   - LLMAsJudgeScoringFnParams
+  - LLMRAGQueryGeneratorConfig
   - ListDatasetsResponse
   - ListEvalTasksResponse
   - ListModelsResponse
@@ -6086,9 +6290,14 @@ x-tagGroups:
   - QueryChunksResponse
   - QueryCondition
   - QueryConditionOp
+  - QueryContextRequest
   - QuerySpanTreeResponse
   - QuerySpansResponse
   - QueryTracesResponse
+  - RAGDocument
+  - RAGQueryConfig
+  - RAGQueryGeneratorConfig
+  - RAGQueryResult
   - RegexParserScoringFnParams
   - RegisterDatasetRequest
   - RegisterEvalTaskRequest
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index 180ec0ecc..f0c34dba4 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -29,7 +29,7 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
 from llama_stack.apis.shields import Shields
 from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration
 from llama_stack.apis.telemetry import Telemetry
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
+from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.distribution.datatypes import StackRunConfig
@@ -62,6 +62,7 @@ class LlamaStack(
     Inspect,
     ToolGroups,
     ToolRuntime,
+    RAGToolRuntime,
 ):
     pass