revert: "chore(cleanup)!: remove tool_runtime.rag_tool" (#3877)

Reverts llamastack/llama-stack#3871 This PR broke RAG (even from Responses -- there _is_ a dependency)
2025-10-23 00:27:26 +00:00 · 2025-10-21 11:22:06 -07:00 · 2025-10-21 11:22:06 -07:00 · bd3c473208
commit bd3c473208
parent eb3e9b85f9
55 changed files with 3114 additions and 17 deletions
--- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
@ -21,7 +21,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
 | inference | `inline::meta-reference` |
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::model-context-protocol` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |


--- a/docs/docs/distributions/self_hosted_distro/nvidia.md
+++ b/docs/docs/distributions/self_hosted_distro/nvidia.md
@ -16,7 +16,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
 | post_training | `remote::nvidia` |
 | safety | `remote::nvidia` |
 | scoring | `inline::basic` |
-| tool_runtime |  |
+| tool_runtime | `inline::rag-runtime` |
 | vector_io | `inline::faiss` |


--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@ -28,7 +28,7 @@ description: |
  #### Empirical Example

  Consider the histogram below in which 10,000 randomly generated strings were inserted
-  in batches of 100 into both Faiss and sqlite-vec.
+  in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.

  ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
  :alt: Comparison of SQLite-Vec and Faiss write times
@ -233,7 +233,7 @@ Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, i
 #### Empirical Example

 Consider the histogram below in which 10,000 randomly generated strings were inserted
-in batches of 100 into both Faiss and sqlite-vec.
+in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.

 ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
 :alt: Comparison of SQLite-Vec and Faiss write times
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@ -196,10 +196,16 @@ def _get_endpoint_functions(
 def _get_defining_class(member_fn: str, derived_cls: type) -> type:
    "Find the class in which a member function is first defined in a class inheritance hierarchy."

+    # This import must be dynamic here
+    from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime
+
    # iterate in reverse member resolution order to find most specific class first
    for cls in reversed(inspect.getmro(derived_cls)):
        for name, _ in inspect.getmembers(cls, inspect.isfunction):
            if name == member_fn:
+                # HACK ALERT
+                if cls == RAGToolRuntime:
+                    return ToolRuntime
                return cls

    raise ValidationError(
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -2624,6 +2624,89 @@
                "deprecated": false
            }
        },
+        "/v1/tool-runtime/rag-tool/insert": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "ToolRuntime"
+                ],
+                "summary": "Index documents so they can be used by the RAG system.",
+                "description": "Index documents so they can be used by the RAG system.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/InsertRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                },
+                "deprecated": false
+            }
+        },
+        "/v1/tool-runtime/rag-tool/query": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "RAGQueryResult containing the retrieved content and metadata",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/RAGQueryResult"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "ToolRuntime"
+                ],
+                "summary": "Query the RAG system for context; typically invoked by the agent.",
+                "description": "Query the RAG system for context; typically invoked by the agent.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/QueryRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                },
+                "deprecated": false
+            }
+        },
        "/v1/toolgroups": {
            "get": {
                "responses": {
@ -11300,6 +11383,346 @@
                "title": "ListToolDefsResponse",
                "description": "Response containing a list of tool definitions."
            },
+            "RAGDocument": {
+                "type": "object",
+                "properties": {
+                    "document_id": {
+                        "type": "string",
+                        "description": "The unique identifier for the document."
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/InterleavedContentItem"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/InterleavedContentItem"
+                                }
+                            },
+                            {
+                                "$ref": "#/components/schemas/URL"
+                            }
+                        ],
+                        "description": "The content of the document."
+                    },
+                    "mime_type": {
+                        "type": "string",
+                        "description": "The MIME type of the document."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Additional metadata for the document."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "document_id",
+                    "content",
+                    "metadata"
+                ],
+                "title": "RAGDocument",
+                "description": "A document to be used for document ingestion in the RAG Tool."
+            },
+            "InsertRequest": {
+                "type": "object",
+                "properties": {
+                    "documents": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/RAGDocument"
+                        },
+                        "description": "List of documents to index in the RAG system"
+                    },
+                    "vector_db_id": {
+                        "type": "string",
+                        "description": "ID of the vector database to store the document embeddings"
+                    },
+                    "chunk_size_in_tokens": {
+                        "type": "integer",
+                        "description": "(Optional) Size in tokens for document chunking during indexing"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "documents",
+                    "vector_db_id",
+                    "chunk_size_in_tokens"
+                ],
+                "title": "InsertRequest"
+            },
+            "DefaultRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "default",
+                        "default": "default",
+                        "description": "Type of query generator, always 'default'"
+                    },
+                    "separator": {
+                        "type": "string",
+                        "default": " ",
+                        "description": "String separator used to join query terms"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "separator"
+                ],
+                "title": "DefaultRAGQueryGeneratorConfig",
+                "description": "Configuration for the default RAG query generator."
+            },
+            "LLMRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "llm",
+                        "default": "llm",
+                        "description": "Type of query generator, always 'llm'"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "Name of the language model to use for query generation"
+                    },
+                    "template": {
+                        "type": "string",
+                        "description": "Template string for formatting the query generation prompt"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "model",
+                    "template"
+                ],
+                "title": "LLMRAGQueryGeneratorConfig",
+                "description": "Configuration for the LLM-based RAG query generator."
+            },
+            "RAGQueryConfig": {
+                "type": "object",
+                "properties": {
+                    "query_generator_config": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
+                            },
+                            {
+                                "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig",
+                                "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                            }
+                        },
+                        "description": "Configuration for the query generator."
+                    },
+                    "max_tokens_in_context": {
+                        "type": "integer",
+                        "default": 4096,
+                        "description": "Maximum number of tokens in the context."
+                    },
+                    "max_chunks": {
+                        "type": "integer",
+                        "default": 5,
+                        "description": "Maximum number of chunks to retrieve."
+                    },
+                    "chunk_template": {
+                        "type": "string",
+                        "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
+                        "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
+                    },
+                    "mode": {
+                        "$ref": "#/components/schemas/RAGSearchMode",
+                        "default": "vector",
+                        "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
+                    },
+                    "ranker": {
+                        "$ref": "#/components/schemas/Ranker",
+                        "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "query_generator_config",
+                    "max_tokens_in_context",
+                    "max_chunks",
+                    "chunk_template"
+                ],
+                "title": "RAGQueryConfig",
+                "description": "Configuration for the RAG query generation."
+            },
+            "RAGSearchMode": {
+                "type": "string",
+                "enum": [
+                    "vector",
+                    "keyword",
+                    "hybrid"
+                ],
+                "title": "RAGSearchMode",
+                "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results"
+            },
+            "RRFRanker": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "rrf",
+                        "default": "rrf",
+                        "description": "The type of ranker, always \"rrf\""
+                    },
+                    "impact_factor": {
+                        "type": "number",
+                        "default": 60.0,
+                        "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "impact_factor"
+                ],
+                "title": "RRFRanker",
+                "description": "Reciprocal Rank Fusion (RRF) ranker configuration."
+            },
+            "Ranker": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/RRFRanker"
+                    },
+                    {
+                        "$ref": "#/components/schemas/WeightedRanker"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "rrf": "#/components/schemas/RRFRanker",
+                        "weighted": "#/components/schemas/WeightedRanker"
+                    }
+                }
+            },
+            "WeightedRanker": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "weighted",
+                        "default": "weighted",
+                        "description": "The type of ranker, always \"weighted\""
+                    },
+                    "alpha": {
+                        "type": "number",
+                        "default": 0.5,
+                        "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "alpha"
+                ],
+                "title": "WeightedRanker",
+                "description": "Weighted ranker configuration that combines vector and keyword scores."
+            },
+            "QueryRequest": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The query content to search for in the indexed documents"
+                    },
+                    "vector_db_ids": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "List of vector database IDs to search within"
+                    },
+                    "query_config": {
+                        "$ref": "#/components/schemas/RAGQueryConfig",
+                        "description": "(Optional) Configuration parameters for the query operation"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "vector_db_ids"
+                ],
+                "title": "QueryRequest"
+            },
+            "RAGQueryResult": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "(Optional) The retrieved content from the query"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Additional metadata about the query result"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "metadata"
+                ],
+                "title": "RAGQueryResult",
+                "description": "Result of a RAG query containing retrieved content and metadata."
+            },
            "ToolGroup": {
                "type": "object",
                "properties": {
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -2036,6 +2036,69 @@ paths:
          schema:
            $ref: '#/components/schemas/URL'
      deprecated: false
+  /v1/tool-runtime/rag-tool/insert:
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolRuntime
+      summary: >-
+        Index documents so they can be used by the RAG system.
+      description: >-
+        Index documents so they can be used by the RAG system.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/InsertRequest'
+        required: true
+      deprecated: false
+  /v1/tool-runtime/rag-tool/query:
+    post:
+      responses:
+        '200':
+          description: >-
+            RAGQueryResult containing the retrieved content and metadata
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RAGQueryResult'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolRuntime
+      summary: >-
+        Query the RAG system for context; typically invoked by the agent.
+      description: >-
+        Query the RAG system for context; typically invoked by the agent.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/QueryRequest'
+        required: true
+      deprecated: false
  /v1/toolgroups:
    get:
      responses:
@ -8645,6 +8708,274 @@ components:
      title: ListToolDefsResponse
      description: >-
        Response containing a list of tool definitions.
+    RAGDocument:
+      type: object
+      properties:
+        document_id:
+          type: string
+          description: The unique identifier for the document.
+        content:
+          oneOf:
+            - type: string
+            - $ref: '#/components/schemas/InterleavedContentItem'
+            - type: array
+              items:
+                $ref: '#/components/schemas/InterleavedContentItem'
+            - $ref: '#/components/schemas/URL'
+          description: The content of the document.
+        mime_type:
+          type: string
+          description: The MIME type of the document.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Additional metadata for the document.
+      additionalProperties: false
+      required:
+        - document_id
+        - content
+        - metadata
+      title: RAGDocument
+      description: >-
+        A document to be used for document ingestion in the RAG Tool.
+    InsertRequest:
+      type: object
+      properties:
+        documents:
+          type: array
+          items:
+            $ref: '#/components/schemas/RAGDocument'
+          description: >-
+            List of documents to index in the RAG system
+        vector_db_id:
+          type: string
+          description: >-
+            ID of the vector database to store the document embeddings
+        chunk_size_in_tokens:
+          type: integer
+          description: >-
+            (Optional) Size in tokens for document chunking during indexing
+      additionalProperties: false
+      required:
+        - documents
+        - vector_db_id
+        - chunk_size_in_tokens
+      title: InsertRequest
+    DefaultRAGQueryGeneratorConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: default
+          default: default
+          description: >-
+            Type of query generator, always 'default'
+        separator:
+          type: string
+          default: ' '
+          description: >-
+            String separator used to join query terms
+      additionalProperties: false
+      required:
+        - type
+        - separator
+      title: DefaultRAGQueryGeneratorConfig
+      description: >-
+        Configuration for the default RAG query generator.
+    LLMRAGQueryGeneratorConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: llm
+          default: llm
+          description: Type of query generator, always 'llm'
+        model:
+          type: string
+          description: >-
+            Name of the language model to use for query generation
+        template:
+          type: string
+          description: >-
+            Template string for formatting the query generation prompt
+      additionalProperties: false
+      required:
+        - type
+        - model
+        - template
+      title: LLMRAGQueryGeneratorConfig
+      description: >-
+        Configuration for the LLM-based RAG query generator.
+    RAGQueryConfig:
+      type: object
+      properties:
+        query_generator_config:
+          oneOf:
+            - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+            - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+          discriminator:
+            propertyName: type
+            mapping:
+              default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+              llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+          description: Configuration for the query generator.
+        max_tokens_in_context:
+          type: integer
+          default: 4096
+          description: Maximum number of tokens in the context.
+        max_chunks:
+          type: integer
+          default: 5
+          description: Maximum number of chunks to retrieve.
+        chunk_template:
+          type: string
+          default: >
+            Result {index}
+
+            Content: {chunk.content}
+
+            Metadata: {metadata}
+          description: >-
+            Template for formatting each retrieved chunk in the context. Available
+            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
+            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
+            {chunk.content}\nMetadata: {metadata}\n"
+        mode:
+          $ref: '#/components/schemas/RAGSearchMode'
+          default: vector
+          description: >-
+            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
+            "vector".
+        ranker:
+          $ref: '#/components/schemas/Ranker'
+          description: >-
+            Configuration for the ranker to use in hybrid search. Defaults to RRF
+            ranker.
+      additionalProperties: false
+      required:
+        - query_generator_config
+        - max_tokens_in_context
+        - max_chunks
+        - chunk_template
+      title: RAGQueryConfig
+      description: >-
+        Configuration for the RAG query generation.
+    RAGSearchMode:
+      type: string
+      enum:
+        - vector
+        - keyword
+        - hybrid
+      title: RAGSearchMode
+      description: >-
+        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
+        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
+        - HYBRID: Combines both vector and keyword search for better results
+    RRFRanker:
+      type: object
+      properties:
+        type:
+          type: string
+          const: rrf
+          default: rrf
+          description: The type of ranker, always "rrf"
+        impact_factor:
+          type: number
+          default: 60.0
+          description: >-
+            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
+            results. Must be greater than 0
+      additionalProperties: false
+      required:
+        - type
+        - impact_factor
+      title: RRFRanker
+      description: >-
+        Reciprocal Rank Fusion (RRF) ranker configuration.
+    Ranker:
+      oneOf:
+        - $ref: '#/components/schemas/RRFRanker'
+        - $ref: '#/components/schemas/WeightedRanker'
+      discriminator:
+        propertyName: type
+        mapping:
+          rrf: '#/components/schemas/RRFRanker'
+          weighted: '#/components/schemas/WeightedRanker'
+    WeightedRanker:
+      type: object
+      properties:
+        type:
+          type: string
+          const: weighted
+          default: weighted
+          description: The type of ranker, always "weighted"
+        alpha:
+          type: number
+          default: 0.5
+          description: >-
+            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
+            only use vector scores, values in between blend both scores.
+      additionalProperties: false
+      required:
+        - type
+        - alpha
+      title: WeightedRanker
+      description: >-
+        Weighted ranker configuration that combines vector and keyword scores.
+    QueryRequest:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            The query content to search for in the indexed documents
+        vector_db_ids:
+          type: array
+          items:
+            type: string
+          description: >-
+            List of vector database IDs to search within
+        query_config:
+          $ref: '#/components/schemas/RAGQueryConfig'
+          description: >-
+            (Optional) Configuration parameters for the query operation
+      additionalProperties: false
+      required:
+        - content
+        - vector_db_ids
+      title: QueryRequest
+    RAGQueryResult:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            (Optional) The retrieved content from the query
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Additional metadata about the query result
+      additionalProperties: false
+      required:
+        - metadata
+      title: RAGQueryResult
+      description: >-
+        Result of a RAG query containing retrieved content and metadata.
    ToolGroup:
      type: object
      properties:
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -2624,6 +2624,89 @@
                "deprecated": false
            }
        },
+        "/v1/tool-runtime/rag-tool/insert": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "ToolRuntime"
+                ],
+                "summary": "Index documents so they can be used by the RAG system.",
+                "description": "Index documents so they can be used by the RAG system.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/InsertRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                },
+                "deprecated": false
+            }
+        },
+        "/v1/tool-runtime/rag-tool/query": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "RAGQueryResult containing the retrieved content and metadata",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/RAGQueryResult"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "ToolRuntime"
+                ],
+                "summary": "Query the RAG system for context; typically invoked by the agent.",
+                "description": "Query the RAG system for context; typically invoked by the agent.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/QueryRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                },
+                "deprecated": false
+            }
+        },
        "/v1/toolgroups": {
            "get": {
                "responses": {
@ -12972,6 +13055,346 @@
                "title": "ListToolDefsResponse",
                "description": "Response containing a list of tool definitions."
            },
+            "RAGDocument": {
+                "type": "object",
+                "properties": {
+                    "document_id": {
+                        "type": "string",
+                        "description": "The unique identifier for the document."
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/InterleavedContentItem"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/InterleavedContentItem"
+                                }
+                            },
+                            {
+                                "$ref": "#/components/schemas/URL"
+                            }
+                        ],
+                        "description": "The content of the document."
+                    },
+                    "mime_type": {
+                        "type": "string",
+                        "description": "The MIME type of the document."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Additional metadata for the document."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "document_id",
+                    "content",
+                    "metadata"
+                ],
+                "title": "RAGDocument",
+                "description": "A document to be used for document ingestion in the RAG Tool."
+            },
+            "InsertRequest": {
+                "type": "object",
+                "properties": {
+                    "documents": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/RAGDocument"
+                        },
+                        "description": "List of documents to index in the RAG system"
+                    },
+                    "vector_db_id": {
+                        "type": "string",
+                        "description": "ID of the vector database to store the document embeddings"
+                    },
+                    "chunk_size_in_tokens": {
+                        "type": "integer",
+                        "description": "(Optional) Size in tokens for document chunking during indexing"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "documents",
+                    "vector_db_id",
+                    "chunk_size_in_tokens"
+                ],
+                "title": "InsertRequest"
+            },
+            "DefaultRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "default",
+                        "default": "default",
+                        "description": "Type of query generator, always 'default'"
+                    },
+                    "separator": {
+                        "type": "string",
+                        "default": " ",
+                        "description": "String separator used to join query terms"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "separator"
+                ],
+                "title": "DefaultRAGQueryGeneratorConfig",
+                "description": "Configuration for the default RAG query generator."
+            },
+            "LLMRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "llm",
+                        "default": "llm",
+                        "description": "Type of query generator, always 'llm'"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "Name of the language model to use for query generation"
+                    },
+                    "template": {
+                        "type": "string",
+                        "description": "Template string for formatting the query generation prompt"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "model",
+                    "template"
+                ],
+                "title": "LLMRAGQueryGeneratorConfig",
+                "description": "Configuration for the LLM-based RAG query generator."
+            },
+            "RAGQueryConfig": {
+                "type": "object",
+                "properties": {
+                    "query_generator_config": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
+                            },
+                            {
+                                "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                            }
+                        ],
+                        "discriminator": {
+                            "propertyName": "type",
+                            "mapping": {
+                                "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig",
+                                "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                            }
+                        },
+                        "description": "Configuration for the query generator."
+                    },
+                    "max_tokens_in_context": {
+                        "type": "integer",
+                        "default": 4096,
+                        "description": "Maximum number of tokens in the context."
+                    },
+                    "max_chunks": {
+                        "type": "integer",
+                        "default": 5,
+                        "description": "Maximum number of chunks to retrieve."
+                    },
+                    "chunk_template": {
+                        "type": "string",
+                        "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
+                        "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
+                    },
+                    "mode": {
+                        "$ref": "#/components/schemas/RAGSearchMode",
+                        "default": "vector",
+                        "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
+                    },
+                    "ranker": {
+                        "$ref": "#/components/schemas/Ranker",
+                        "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "query_generator_config",
+                    "max_tokens_in_context",
+                    "max_chunks",
+                    "chunk_template"
+                ],
+                "title": "RAGQueryConfig",
+                "description": "Configuration for the RAG query generation."
+            },
+            "RAGSearchMode": {
+                "type": "string",
+                "enum": [
+                    "vector",
+                    "keyword",
+                    "hybrid"
+                ],
+                "title": "RAGSearchMode",
+                "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results"
+            },
+            "RRFRanker": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "rrf",
+                        "default": "rrf",
+                        "description": "The type of ranker, always \"rrf\""
+                    },
+                    "impact_factor": {
+                        "type": "number",
+                        "default": 60.0,
+                        "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "impact_factor"
+                ],
+                "title": "RRFRanker",
+                "description": "Reciprocal Rank Fusion (RRF) ranker configuration."
+            },
+            "Ranker": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/RRFRanker"
+                    },
+                    {
+                        "$ref": "#/components/schemas/WeightedRanker"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "rrf": "#/components/schemas/RRFRanker",
+                        "weighted": "#/components/schemas/WeightedRanker"
+                    }
+                }
+            },
+            "WeightedRanker": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "weighted",
+                        "default": "weighted",
+                        "description": "The type of ranker, always \"weighted\""
+                    },
+                    "alpha": {
+                        "type": "number",
+                        "default": 0.5,
+                        "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "alpha"
+                ],
+                "title": "WeightedRanker",
+                "description": "Weighted ranker configuration that combines vector and keyword scores."
+            },
+            "QueryRequest": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The query content to search for in the indexed documents"
+                    },
+                    "vector_db_ids": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "List of vector database IDs to search within"
+                    },
+                    "query_config": {
+                        "$ref": "#/components/schemas/RAGQueryConfig",
+                        "description": "(Optional) Configuration parameters for the query operation"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "vector_db_ids"
+                ],
+                "title": "QueryRequest"
+            },
+            "RAGQueryResult": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "(Optional) The retrieved content from the query"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Additional metadata about the query result"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "metadata"
+                ],
+                "title": "RAGQueryResult",
+                "description": "Result of a RAG query containing retrieved content and metadata."
+            },
            "ToolGroup": {
                "type": "object",
                "properties": {
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -2039,6 +2039,69 @@ paths:
          schema:
            $ref: '#/components/schemas/URL'
      deprecated: false
+  /v1/tool-runtime/rag-tool/insert:
+    post:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolRuntime
+      summary: >-
+        Index documents so they can be used by the RAG system.
+      description: >-
+        Index documents so they can be used by the RAG system.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/InsertRequest'
+        required: true
+      deprecated: false
+  /v1/tool-runtime/rag-tool/query:
+    post:
+      responses:
+        '200':
+          description: >-
+            RAGQueryResult containing the retrieved content and metadata
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RAGQueryResult'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - ToolRuntime
+      summary: >-
+        Query the RAG system for context; typically invoked by the agent.
+      description: >-
+        Query the RAG system for context; typically invoked by the agent.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/QueryRequest'
+        required: true
+      deprecated: false
  /v1/toolgroups:
    get:
      responses:
@ -9858,6 +9921,274 @@ components:
      title: ListToolDefsResponse
      description: >-
        Response containing a list of tool definitions.
+    RAGDocument:
+      type: object
+      properties:
+        document_id:
+          type: string
+          description: The unique identifier for the document.
+        content:
+          oneOf:
+            - type: string
+            - $ref: '#/components/schemas/InterleavedContentItem'
+            - type: array
+              items:
+                $ref: '#/components/schemas/InterleavedContentItem'
+            - $ref: '#/components/schemas/URL'
+          description: The content of the document.
+        mime_type:
+          type: string
+          description: The MIME type of the document.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Additional metadata for the document.
+      additionalProperties: false
+      required:
+        - document_id
+        - content
+        - metadata
+      title: RAGDocument
+      description: >-
+        A document to be used for document ingestion in the RAG Tool.
+    InsertRequest:
+      type: object
+      properties:
+        documents:
+          type: array
+          items:
+            $ref: '#/components/schemas/RAGDocument'
+          description: >-
+            List of documents to index in the RAG system
+        vector_db_id:
+          type: string
+          description: >-
+            ID of the vector database to store the document embeddings
+        chunk_size_in_tokens:
+          type: integer
+          description: >-
+            (Optional) Size in tokens for document chunking during indexing
+      additionalProperties: false
+      required:
+        - documents
+        - vector_db_id
+        - chunk_size_in_tokens
+      title: InsertRequest
+    DefaultRAGQueryGeneratorConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: default
+          default: default
+          description: >-
+            Type of query generator, always 'default'
+        separator:
+          type: string
+          default: ' '
+          description: >-
+            String separator used to join query terms
+      additionalProperties: false
+      required:
+        - type
+        - separator
+      title: DefaultRAGQueryGeneratorConfig
+      description: >-
+        Configuration for the default RAG query generator.
+    LLMRAGQueryGeneratorConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: llm
+          default: llm
+          description: Type of query generator, always 'llm'
+        model:
+          type: string
+          description: >-
+            Name of the language model to use for query generation
+        template:
+          type: string
+          description: >-
+            Template string for formatting the query generation prompt
+      additionalProperties: false
+      required:
+        - type
+        - model
+        - template
+      title: LLMRAGQueryGeneratorConfig
+      description: >-
+        Configuration for the LLM-based RAG query generator.
+    RAGQueryConfig:
+      type: object
+      properties:
+        query_generator_config:
+          oneOf:
+            - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+            - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+          discriminator:
+            propertyName: type
+            mapping:
+              default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+              llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+          description: Configuration for the query generator.
+        max_tokens_in_context:
+          type: integer
+          default: 4096
+          description: Maximum number of tokens in the context.
+        max_chunks:
+          type: integer
+          default: 5
+          description: Maximum number of chunks to retrieve.
+        chunk_template:
+          type: string
+          default: >
+            Result {index}
+
+            Content: {chunk.content}
+
+            Metadata: {metadata}
+          description: >-
+            Template for formatting each retrieved chunk in the context. Available
+            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
+            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
+            {chunk.content}\nMetadata: {metadata}\n"
+        mode:
+          $ref: '#/components/schemas/RAGSearchMode'
+          default: vector
+          description: >-
+            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
+            "vector".
+        ranker:
+          $ref: '#/components/schemas/Ranker'
+          description: >-
+            Configuration for the ranker to use in hybrid search. Defaults to RRF
+            ranker.
+      additionalProperties: false
+      required:
+        - query_generator_config
+        - max_tokens_in_context
+        - max_chunks
+        - chunk_template
+      title: RAGQueryConfig
+      description: >-
+        Configuration for the RAG query generation.
+    RAGSearchMode:
+      type: string
+      enum:
+        - vector
+        - keyword
+        - hybrid
+      title: RAGSearchMode
+      description: >-
+        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
+        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
+        - HYBRID: Combines both vector and keyword search for better results
+    RRFRanker:
+      type: object
+      properties:
+        type:
+          type: string
+          const: rrf
+          default: rrf
+          description: The type of ranker, always "rrf"
+        impact_factor:
+          type: number
+          default: 60.0
+          description: >-
+            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
+            results. Must be greater than 0
+      additionalProperties: false
+      required:
+        - type
+        - impact_factor
+      title: RRFRanker
+      description: >-
+        Reciprocal Rank Fusion (RRF) ranker configuration.
+    Ranker:
+      oneOf:
+        - $ref: '#/components/schemas/RRFRanker'
+        - $ref: '#/components/schemas/WeightedRanker'
+      discriminator:
+        propertyName: type
+        mapping:
+          rrf: '#/components/schemas/RRFRanker'
+          weighted: '#/components/schemas/WeightedRanker'
+    WeightedRanker:
+      type: object
+      properties:
+        type:
+          type: string
+          const: weighted
+          default: weighted
+          description: The type of ranker, always "weighted"
+        alpha:
+          type: number
+          default: 0.5
+          description: >-
+            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
+            only use vector scores, values in between blend both scores.
+      additionalProperties: false
+      required:
+        - type
+        - alpha
+      title: WeightedRanker
+      description: >-
+        Weighted ranker configuration that combines vector and keyword scores.
+    QueryRequest:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            The query content to search for in the indexed documents
+        vector_db_ids:
+          type: array
+          items:
+            type: string
+          description: >-
+            List of vector database IDs to search within
+        query_config:
+          $ref: '#/components/schemas/RAGQueryConfig'
+          description: >-
+            (Optional) Configuration parameters for the query operation
+      additionalProperties: false
+      required:
+        - content
+        - vector_db_ids
+      title: QueryRequest
+    RAGQueryResult:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            (Optional) The retrieved content from the query
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Additional metadata about the query result
+      additionalProperties: false
+      required:
+        - metadata
+      title: RAGQueryResult
+      description: >-
+        Result of a RAG query containing retrieved content and metadata.
    ToolGroup:
      type: object
      properties: