diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index c14661a5a..5d9917bfd 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -2055,69 +2055,6 @@ paths:
           schema:
             $ref: '#/components/schemas/URL'
       deprecated: false
-  /v1/tool-runtime/rag-tool/insert:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Index documents so they can be used by the RAG system.
-      description: >-
-        Index documents so they can be used by the RAG system.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InsertRequest'
-        required: true
-      deprecated: false
-  /v1/tool-runtime/rag-tool/query:
-    post:
-      responses:
-        '200':
-          description: >-
-            RAGQueryResult containing the retrieved content and metadata
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RAGQueryResult'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Query the RAG system for context; typically invoked by the agent.
-      description: >-
-        Query the RAG system for context; typically invoked by the agent.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryRequest'
-        required: true
-      deprecated: false
   /v1/toolgroups:
     get:
       responses:
@@ -9633,274 +9570,6 @@ components:
       title: ListToolDefsResponse
       description: >-
         Response containing a list of tool definitions.
-    RAGDocument:
-      type: object
-      properties:
-        document_id:
-          type: string
-          description: The unique identifier for the document.
-        content:
-          oneOf:
-            - type: string
-            - $ref: '#/components/schemas/InterleavedContentItem'
-            - type: array
-              items:
-                $ref: '#/components/schemas/InterleavedContentItem'
-            - $ref: '#/components/schemas/URL'
-          description: The content of the document.
-        mime_type:
-          type: string
-          description: The MIME type of the document.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the document.
-      additionalProperties: false
-      required:
-        - document_id
-        - content
-        - metadata
-      title: RAGDocument
-      description: >-
-        A document to be used for document ingestion in the RAG Tool.
-    InsertRequest:
-      type: object
-      properties:
-        documents:
-          type: array
-          items:
-            $ref: '#/components/schemas/RAGDocument'
-          description: >-
-            List of documents to index in the RAG system
-        vector_store_id:
-          type: string
-          description: >-
-            ID of the vector database to store the document embeddings
-        chunk_size_in_tokens:
-          type: integer
-          description: >-
-            (Optional) Size in tokens for document chunking during indexing
-      additionalProperties: false
-      required:
-        - documents
-        - vector_store_id
-        - chunk_size_in_tokens
-      title: InsertRequest
-    DefaultRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: default
-          default: default
-          description: >-
-            Type of query generator, always 'default'
-        separator:
-          type: string
-          default: ' '
-          description: >-
-            String separator used to join query terms
-      additionalProperties: false
-      required:
-        - type
-        - separator
-      title: DefaultRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the default RAG query generator.
-    LLMRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: llm
-          default: llm
-          description: Type of query generator, always 'llm'
-        model:
-          type: string
-          description: >-
-            Name of the language model to use for query generation
-        template:
-          type: string
-          description: >-
-            Template string for formatting the query generation prompt
-      additionalProperties: false
-      required:
-        - type
-        - model
-        - template
-      title: LLMRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the LLM-based RAG query generator.
-    RAGQueryConfig:
-      type: object
-      properties:
-        query_generator_config:
-          oneOf:
-            - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-            - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          discriminator:
-            propertyName: type
-            mapping:
-              default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-              llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          description: Configuration for the query generator.
-        max_tokens_in_context:
-          type: integer
-          default: 4096
-          description: Maximum number of tokens in the context.
-        max_chunks:
-          type: integer
-          default: 5
-          description: Maximum number of chunks to retrieve.
-        chunk_template:
-          type: string
-          default: >
-            Result {index}
-
-            Content: {chunk.content}
-
-            Metadata: {metadata}
-          description: >-
-            Template for formatting each retrieved chunk in the context. Available
-            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
-            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
-            {chunk.content}\nMetadata: {metadata}\n"
-        mode:
-          $ref: '#/components/schemas/RAGSearchMode'
-          default: vector
-          description: >-
-            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
-            "vector".
-        ranker:
-          $ref: '#/components/schemas/Ranker'
-          description: >-
-            Configuration for the ranker to use in hybrid search. Defaults to RRF
-            ranker.
-      additionalProperties: false
-      required:
-        - query_generator_config
-        - max_tokens_in_context
-        - max_chunks
-        - chunk_template
-      title: RAGQueryConfig
-      description: >-
-        Configuration for the RAG query generation.
-    RAGSearchMode:
-      type: string
-      enum:
-        - vector
-        - keyword
-        - hybrid
-      title: RAGSearchMode
-      description: >-
-        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
-        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
-        - HYBRID: Combines both vector and keyword search for better results
-    RRFRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: rrf
-          default: rrf
-          description: The type of ranker, always "rrf"
-        impact_factor:
-          type: number
-          default: 60.0
-          description: >-
-            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
-            results. Must be greater than 0
-      additionalProperties: false
-      required:
-        - type
-        - impact_factor
-      title: RRFRanker
-      description: >-
-        Reciprocal Rank Fusion (RRF) ranker configuration.
-    Ranker:
-      oneOf:
-        - $ref: '#/components/schemas/RRFRanker'
-        - $ref: '#/components/schemas/WeightedRanker'
-      discriminator:
-        propertyName: type
-        mapping:
-          rrf: '#/components/schemas/RRFRanker'
-          weighted: '#/components/schemas/WeightedRanker'
-    WeightedRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: weighted
-          default: weighted
-          description: The type of ranker, always "weighted"
-        alpha:
-          type: number
-          default: 0.5
-          description: >-
-            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
-            only use vector scores, values in between blend both scores.
-      additionalProperties: false
-      required:
-        - type
-        - alpha
-      title: WeightedRanker
-      description: >-
-        Weighted ranker configuration that combines vector and keyword scores.
-    QueryRequest:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The query content to search for in the indexed documents
-        vector_store_ids:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of vector database IDs to search within
-        query_config:
-          $ref: '#/components/schemas/RAGQueryConfig'
-          description: >-
-            (Optional) Configuration parameters for the query operation
-      additionalProperties: false
-      required:
-        - content
-        - vector_store_ids
-      title: QueryRequest
-    RAGQueryResult:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The retrieved content from the query
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Additional metadata about the query result
-      additionalProperties: false
-      required:
-        - metadata
-      title: RAGQueryResult
-      description: >-
-        Result of a RAG query containing retrieved content and metadata.
     ToolGroup:
       type: object
       properties:
diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
index 2970d7e53..a1c95c7a7 100644
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@@ -170,7 +170,7 @@ def _get_endpoint_functions(
         for webmethod in webmethods:
             print(f"Processing {colored(func_name, 'white')}...")
             operation_name = func_name
-            
+
             if webmethod.method == "GET":
                 prefix = "get"
             elif webmethod.method == "DELETE":
@@ -196,16 +196,10 @@ def _get_endpoint_functions(
 def _get_defining_class(member_fn: str, derived_cls: type) -> type:
     "Find the class in which a member function is first defined in a class inheritance hierarchy."
 
-    # This import must be dynamic here
-    from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime
-
     # iterate in reverse member resolution order to find most specific class first
     for cls in reversed(inspect.getmro(derived_cls)):
         for name, _ in inspect.getmembers(cls, inspect.isfunction):
             if name == member_fn:
-                # HACK ALERT
-                if cls == RAGToolRuntime:
-                    return ToolRuntime
                 return cls
 
     raise ValidationError(
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index ea6b07c0e..a705f499a 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -2052,69 +2052,6 @@ paths:
           schema:
             $ref: '#/components/schemas/URL'
       deprecated: false
-  /v1/tool-runtime/rag-tool/insert:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Index documents so they can be used by the RAG system.
-      description: >-
-        Index documents so they can be used by the RAG system.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InsertRequest'
-        required: true
-      deprecated: false
-  /v1/tool-runtime/rag-tool/query:
-    post:
-      responses:
-        '200':
-          description: >-
-            RAGQueryResult containing the retrieved content and metadata
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RAGQueryResult'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Query the RAG system for context; typically invoked by the agent.
-      description: >-
-        Query the RAG system for context; typically invoked by the agent.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryRequest'
-        required: true
-      deprecated: false
   /v1/toolgroups:
     get:
       responses:
@@ -8917,274 +8854,6 @@ components:
       title: ListToolDefsResponse
       description: >-
         Response containing a list of tool definitions.
-    RAGDocument:
-      type: object
-      properties:
-        document_id:
-          type: string
-          description: The unique identifier for the document.
-        content:
-          oneOf:
-            - type: string
-            - $ref: '#/components/schemas/InterleavedContentItem'
-            - type: array
-              items:
-                $ref: '#/components/schemas/InterleavedContentItem'
-            - $ref: '#/components/schemas/URL'
-          description: The content of the document.
-        mime_type:
-          type: string
-          description: The MIME type of the document.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the document.
-      additionalProperties: false
-      required:
-        - document_id
-        - content
-        - metadata
-      title: RAGDocument
-      description: >-
-        A document to be used for document ingestion in the RAG Tool.
-    InsertRequest:
-      type: object
-      properties:
-        documents:
-          type: array
-          items:
-            $ref: '#/components/schemas/RAGDocument'
-          description: >-
-            List of documents to index in the RAG system
-        vector_store_id:
-          type: string
-          description: >-
-            ID of the vector database to store the document embeddings
-        chunk_size_in_tokens:
-          type: integer
-          description: >-
-            (Optional) Size in tokens for document chunking during indexing
-      additionalProperties: false
-      required:
-        - documents
-        - vector_store_id
-        - chunk_size_in_tokens
-      title: InsertRequest
-    DefaultRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: default
-          default: default
-          description: >-
-            Type of query generator, always 'default'
-        separator:
-          type: string
-          default: ' '
-          description: >-
-            String separator used to join query terms
-      additionalProperties: false
-      required:
-        - type
-        - separator
-      title: DefaultRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the default RAG query generator.
-    LLMRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: llm
-          default: llm
-          description: Type of query generator, always 'llm'
-        model:
-          type: string
-          description: >-
-            Name of the language model to use for query generation
-        template:
-          type: string
-          description: >-
-            Template string for formatting the query generation prompt
-      additionalProperties: false
-      required:
-        - type
-        - model
-        - template
-      title: LLMRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the LLM-based RAG query generator.
-    RAGQueryConfig:
-      type: object
-      properties:
-        query_generator_config:
-          oneOf:
-            - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-            - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          discriminator:
-            propertyName: type
-            mapping:
-              default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-              llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          description: Configuration for the query generator.
-        max_tokens_in_context:
-          type: integer
-          default: 4096
-          description: Maximum number of tokens in the context.
-        max_chunks:
-          type: integer
-          default: 5
-          description: Maximum number of chunks to retrieve.
-        chunk_template:
-          type: string
-          default: >
-            Result {index}
-
-            Content: {chunk.content}
-
-            Metadata: {metadata}
-          description: >-
-            Template for formatting each retrieved chunk in the context. Available
-            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
-            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
-            {chunk.content}\nMetadata: {metadata}\n"
-        mode:
-          $ref: '#/components/schemas/RAGSearchMode'
-          default: vector
-          description: >-
-            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
-            "vector".
-        ranker:
-          $ref: '#/components/schemas/Ranker'
-          description: >-
-            Configuration for the ranker to use in hybrid search. Defaults to RRF
-            ranker.
-      additionalProperties: false
-      required:
-        - query_generator_config
-        - max_tokens_in_context
-        - max_chunks
-        - chunk_template
-      title: RAGQueryConfig
-      description: >-
-        Configuration for the RAG query generation.
-    RAGSearchMode:
-      type: string
-      enum:
-        - vector
-        - keyword
-        - hybrid
-      title: RAGSearchMode
-      description: >-
-        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
-        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
-        - HYBRID: Combines both vector and keyword search for better results
-    RRFRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: rrf
-          default: rrf
-          description: The type of ranker, always "rrf"
-        impact_factor:
-          type: number
-          default: 60.0
-          description: >-
-            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
-            results. Must be greater than 0
-      additionalProperties: false
-      required:
-        - type
-        - impact_factor
-      title: RRFRanker
-      description: >-
-        Reciprocal Rank Fusion (RRF) ranker configuration.
-    Ranker:
-      oneOf:
-        - $ref: '#/components/schemas/RRFRanker'
-        - $ref: '#/components/schemas/WeightedRanker'
-      discriminator:
-        propertyName: type
-        mapping:
-          rrf: '#/components/schemas/RRFRanker'
-          weighted: '#/components/schemas/WeightedRanker'
-    WeightedRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: weighted
-          default: weighted
-          description: The type of ranker, always "weighted"
-        alpha:
-          type: number
-          default: 0.5
-          description: >-
-            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
-            only use vector scores, values in between blend both scores.
-      additionalProperties: false
-      required:
-        - type
-        - alpha
-      title: WeightedRanker
-      description: >-
-        Weighted ranker configuration that combines vector and keyword scores.
-    QueryRequest:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The query content to search for in the indexed documents
-        vector_store_ids:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of vector database IDs to search within
-        query_config:
-          $ref: '#/components/schemas/RAGQueryConfig'
-          description: >-
-            (Optional) Configuration parameters for the query operation
-      additionalProperties: false
-      required:
-        - content
-        - vector_store_ids
-      title: QueryRequest
-    RAGQueryResult:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The retrieved content from the query
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Additional metadata about the query result
-      additionalProperties: false
-      required:
-        - metadata
-      title: RAGQueryResult
-      description: >-
-        Result of a RAG query containing retrieved content and metadata.
     ToolGroup:
       type: object
       properties:
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index c14661a5a..5d9917bfd 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -2055,69 +2055,6 @@ paths:
           schema:
             $ref: '#/components/schemas/URL'
       deprecated: false
-  /v1/tool-runtime/rag-tool/insert:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Index documents so they can be used by the RAG system.
-      description: >-
-        Index documents so they can be used by the RAG system.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InsertRequest'
-        required: true
-      deprecated: false
-  /v1/tool-runtime/rag-tool/query:
-    post:
-      responses:
-        '200':
-          description: >-
-            RAGQueryResult containing the retrieved content and metadata
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RAGQueryResult'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ToolRuntime
-      summary: >-
-        Query the RAG system for context; typically invoked by the agent.
-      description: >-
-        Query the RAG system for context; typically invoked by the agent.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryRequest'
-        required: true
-      deprecated: false
   /v1/toolgroups:
     get:
       responses:
@@ -9633,274 +9570,6 @@ components:
       title: ListToolDefsResponse
       description: >-
         Response containing a list of tool definitions.
-    RAGDocument:
-      type: object
-      properties:
-        document_id:
-          type: string
-          description: The unique identifier for the document.
-        content:
-          oneOf:
-            - type: string
-            - $ref: '#/components/schemas/InterleavedContentItem'
-            - type: array
-              items:
-                $ref: '#/components/schemas/InterleavedContentItem'
-            - $ref: '#/components/schemas/URL'
-          description: The content of the document.
-        mime_type:
-          type: string
-          description: The MIME type of the document.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the document.
-      additionalProperties: false
-      required:
-        - document_id
-        - content
-        - metadata
-      title: RAGDocument
-      description: >-
-        A document to be used for document ingestion in the RAG Tool.
-    InsertRequest:
-      type: object
-      properties:
-        documents:
-          type: array
-          items:
-            $ref: '#/components/schemas/RAGDocument'
-          description: >-
-            List of documents to index in the RAG system
-        vector_store_id:
-          type: string
-          description: >-
-            ID of the vector database to store the document embeddings
-        chunk_size_in_tokens:
-          type: integer
-          description: >-
-            (Optional) Size in tokens for document chunking during indexing
-      additionalProperties: false
-      required:
-        - documents
-        - vector_store_id
-        - chunk_size_in_tokens
-      title: InsertRequest
-    DefaultRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: default
-          default: default
-          description: >-
-            Type of query generator, always 'default'
-        separator:
-          type: string
-          default: ' '
-          description: >-
-            String separator used to join query terms
-      additionalProperties: false
-      required:
-        - type
-        - separator
-      title: DefaultRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the default RAG query generator.
-    LLMRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: llm
-          default: llm
-          description: Type of query generator, always 'llm'
-        model:
-          type: string
-          description: >-
-            Name of the language model to use for query generation
-        template:
-          type: string
-          description: >-
-            Template string for formatting the query generation prompt
-      additionalProperties: false
-      required:
-        - type
-        - model
-        - template
-      title: LLMRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the LLM-based RAG query generator.
-    RAGQueryConfig:
-      type: object
-      properties:
-        query_generator_config:
-          oneOf:
-            - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-            - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          discriminator:
-            propertyName: type
-            mapping:
-              default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-              llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-          description: Configuration for the query generator.
-        max_tokens_in_context:
-          type: integer
-          default: 4096
-          description: Maximum number of tokens in the context.
-        max_chunks:
-          type: integer
-          default: 5
-          description: Maximum number of chunks to retrieve.
-        chunk_template:
-          type: string
-          default: >
-            Result {index}
-
-            Content: {chunk.content}
-
-            Metadata: {metadata}
-          description: >-
-            Template for formatting each retrieved chunk in the context. Available
-            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
-            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
-            {chunk.content}\nMetadata: {metadata}\n"
-        mode:
-          $ref: '#/components/schemas/RAGSearchMode'
-          default: vector
-          description: >-
-            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
-            "vector".
-        ranker:
-          $ref: '#/components/schemas/Ranker'
-          description: >-
-            Configuration for the ranker to use in hybrid search. Defaults to RRF
-            ranker.
-      additionalProperties: false
-      required:
-        - query_generator_config
-        - max_tokens_in_context
-        - max_chunks
-        - chunk_template
-      title: RAGQueryConfig
-      description: >-
-        Configuration for the RAG query generation.
-    RAGSearchMode:
-      type: string
-      enum:
-        - vector
-        - keyword
-        - hybrid
-      title: RAGSearchMode
-      description: >-
-        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
-        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
-        - HYBRID: Combines both vector and keyword search for better results
-    RRFRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: rrf
-          default: rrf
-          description: The type of ranker, always "rrf"
-        impact_factor:
-          type: number
-          default: 60.0
-          description: >-
-            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
-            results. Must be greater than 0
-      additionalProperties: false
-      required:
-        - type
-        - impact_factor
-      title: RRFRanker
-      description: >-
-        Reciprocal Rank Fusion (RRF) ranker configuration.
-    Ranker:
-      oneOf:
-        - $ref: '#/components/schemas/RRFRanker'
-        - $ref: '#/components/schemas/WeightedRanker'
-      discriminator:
-        propertyName: type
-        mapping:
-          rrf: '#/components/schemas/RRFRanker'
-          weighted: '#/components/schemas/WeightedRanker'
-    WeightedRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: weighted
-          default: weighted
-          description: The type of ranker, always "weighted"
-        alpha:
-          type: number
-          default: 0.5
-          description: >-
-            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
-            only use vector scores, values in between blend both scores.
-      additionalProperties: false
-      required:
-        - type
-        - alpha
-      title: WeightedRanker
-      description: >-
-        Weighted ranker configuration that combines vector and keyword scores.
-    QueryRequest:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The query content to search for in the indexed documents
-        vector_store_ids:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of vector database IDs to search within
-        query_config:
-          $ref: '#/components/schemas/RAGQueryConfig'
-          description: >-
-            (Optional) Configuration parameters for the query operation
-      additionalProperties: false
-      required:
-        - content
-        - vector_store_ids
-      title: QueryRequest
-    RAGQueryResult:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The retrieved content from the query
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Additional metadata about the query result
-      additionalProperties: false
-      required:
-        - metadata
-      title: RAGQueryResult
-      description: >-
-        Result of a RAG query containing retrieved content and metadata.
     ToolGroup:
       type: object
       properties:
diff --git a/src/llama_stack/apis/tools/rag_tool.py b/src/llama_stack/apis/tools/rag_tool.py
index 4e43bb284..8bcc89bf0 100644
--- a/src/llama_stack/apis/tools/rag_tool.py
+++ b/src/llama_stack/apis/tools/rag_tool.py
@@ -5,18 +5,13 @@
 # the root directory of this source tree.
 
 from enum import Enum, StrEnum
-from typing import Annotated, Any, Literal, Protocol
+from typing import Annotated, Any, Literal
 
 from pydantic import BaseModel, Field, field_validator
-from typing_extensions import runtime_checkable
 
 from llama_stack.apis.common.content_types import URL, InterleavedContent
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.core.telemetry.trace_protocol import trace_protocol
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
-@json_schema_type
 class RRFRanker(BaseModel):
     """
     Reciprocal Rank Fusion (RRF) ranker configuration.
@@ -30,7 +25,6 @@ class RRFRanker(BaseModel):
     impact_factor: float = Field(default=60.0, gt=0.0)  # default of 60 for optimal performance
 
 
-@json_schema_type
 class WeightedRanker(BaseModel):
     """
     Weighted ranker configuration that combines vector and keyword scores.
@@ -55,10 +49,8 @@ Ranker = Annotated[
     RRFRanker | WeightedRanker,
     Field(discriminator="type"),
 ]
-register_schema(Ranker, name="Ranker")
 
 
-@json_schema_type
 class RAGDocument(BaseModel):
     """
     A document to be used for document ingestion in the RAG Tool.
@@ -75,7 +67,6 @@ class RAGDocument(BaseModel):
     metadata: dict[str, Any] = Field(default_factory=dict)
 
 
-@json_schema_type
 class RAGQueryResult(BaseModel):
     """Result of a RAG query containing retrieved content and metadata.
 
@@ -87,7 +78,6 @@ class RAGQueryResult(BaseModel):
     metadata: dict[str, Any] = Field(default_factory=dict)
 
 
-@json_schema_type
 class RAGQueryGenerator(Enum):
     """Types of query generators for RAG systems.
 
@@ -101,7 +91,6 @@ class RAGQueryGenerator(Enum):
     custom = "custom"
 
 
-@json_schema_type
 class RAGSearchMode(StrEnum):
     """
     Search modes for RAG query retrieval:
@@ -115,7 +104,6 @@ class RAGSearchMode(StrEnum):
     HYBRID = "hybrid"
 
 
-@json_schema_type
 class DefaultRAGQueryGeneratorConfig(BaseModel):
     """Configuration for the default RAG query generator.
 
@@ -127,7 +115,6 @@ class DefaultRAGQueryGeneratorConfig(BaseModel):
     separator: str = " "
 
 
-@json_schema_type
 class LLMRAGQueryGeneratorConfig(BaseModel):
     """Configuration for the LLM-based RAG query generator.
 
@@ -145,10 +132,8 @@ RAGQueryGeneratorConfig = Annotated[
     DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig,
     Field(discriminator="type"),
 ]
-register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig")
 
 
-@json_schema_type
 class RAGQueryConfig(BaseModel):
     """
     Configuration for the RAG query generation.
@@ -181,38 +166,3 @@ class RAGQueryConfig(BaseModel):
         if len(v) == 0:
             raise ValueError("chunk_template must not be empty")
         return v
-
-
-@runtime_checkable
-@trace_protocol
-class RAGToolRuntime(Protocol):
-    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
-    async def insert(
-        self,
-        documents: list[RAGDocument],
-        vector_store_id: str,
-        chunk_size_in_tokens: int = 512,
-    ) -> None:
-        """Index documents so they can be used by the RAG system.
-
-        :param documents: List of documents to index in the RAG system
-        :param vector_store_id: ID of the vector database to store the document embeddings
-        :param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
-        """
-        ...
-
-    @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
-    async def query(
-        self,
-        content: InterleavedContent,
-        vector_store_ids: list[str],
-        query_config: RAGQueryConfig | None = None,
-    ) -> RAGQueryResult:
-        """Query the RAG system for context; typically invoked by the agent.
-
-        :param content: The query content to search for in the indexed documents
-        :param vector_store_ids: List of vector database IDs to search within
-        :param query_config: (Optional) Configuration parameters for the query operation
-        :returns: RAGQueryResult containing the retrieved content and metadata
-        """
-        ...
diff --git a/src/llama_stack/apis/tools/tools.py b/src/llama_stack/apis/tools/tools.py
index b13ac2f19..29065a713 100644
--- a/src/llama_stack/apis/tools/tools.py
+++ b/src/llama_stack/apis/tools/tools.py
@@ -16,8 +16,6 @@ from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.core.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
 
-from .rag_tool import RAGToolRuntime
-
 
 @json_schema_type
 class ToolDef(BaseModel):
@@ -195,8 +193,6 @@ class SpecialToolGroup(Enum):
 class ToolRuntime(Protocol):
     tool_store: ToolStore | None = None
 
-    rag_tool: RAGToolRuntime | None = None
-
     # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
     @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
     async def list_runtime_tools(
diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
index be4c13905..fb13d94a4 100644
--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@@ -8,14 +8,9 @@ from typing import Any
 
 from llama_stack.apis.common.content_types import (
     URL,
-    InterleavedContent,
 )
 from llama_stack.apis.tools import (
     ListToolDefsResponse,
-    RAGDocument,
-    RAGQueryConfig,
-    RAGQueryResult,
-    RAGToolRuntime,
     ToolRuntime,
 )
 from llama_stack.log import get_logger
@@ -26,36 +21,6 @@ logger = get_logger(name=__name__, category="core::routers")
 
 
 class ToolRuntimeRouter(ToolRuntime):
-    class RagToolImpl(RAGToolRuntime):
-        def __init__(
-            self,
-            routing_table: ToolGroupsRoutingTable,
-        ) -> None:
-            logger.debug("Initializing ToolRuntimeRouter.RagToolImpl")
-            self.routing_table = routing_table
-
-        async def query(
-            self,
-            content: InterleavedContent,
-            vector_store_ids: list[str],
-            query_config: RAGQueryConfig | None = None,
-        ) -> RAGQueryResult:
-            logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_store_ids}")
-            provider = await self.routing_table.get_provider_impl("knowledge_search")
-            return await provider.query(content, vector_store_ids, query_config)
-
-        async def insert(
-            self,
-            documents: list[RAGDocument],
-            vector_store_id: str,
-            chunk_size_in_tokens: int = 512,
-        ) -> None:
-            logger.debug(
-                f"ToolRuntimeRouter.RagToolImpl.insert: {vector_store_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
-            )
-            provider = await self.routing_table.get_provider_impl("insert_into_memory")
-            return await provider.insert(documents, vector_store_id, chunk_size_in_tokens)
-
     def __init__(
         self,
         routing_table: ToolGroupsRoutingTable,
@@ -63,11 +28,6 @@ class ToolRuntimeRouter(ToolRuntime):
         logger.debug("Initializing ToolRuntimeRouter")
         self.routing_table = routing_table
 
-        # HACK ALERT this should be in sync with "get_all_api_endpoints()"
-        self.rag_tool = self.RagToolImpl(routing_table)
-        for method in ("query", "insert"):
-            setattr(self, f"rag_tool.{method}", getattr(self.rag_tool, method))
-
     async def initialize(self) -> None:
         logger.debug("ToolRuntimeRouter.initialize")
         pass
diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py
index 48a961318..4f7ff2295 100644
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@@ -13,7 +13,6 @@ from aiohttp import hdrs
 from starlette.routing import Route
 
 from llama_stack.apis.datatypes import Api, ExternalApiSpec
-from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
 from llama_stack.core.resolver import api_protocol_map
 from llama_stack.schema_utils import WebMethod
 
@@ -25,33 +24,16 @@ RouteImpls = dict[str, PathImpl]
 RouteMatch = tuple[EndpointFunc, PathParams, str, WebMethod]
 
 
-def toolgroup_protocol_map():
-    return {
-        SpecialToolGroup.rag_tool: RAGToolRuntime,
-    }
-
-
 def get_all_api_routes(
     external_apis: dict[Api, ExternalApiSpec] | None = None,
 ) -> dict[Api, list[tuple[Route, WebMethod]]]:
     apis = {}
 
     protocols = api_protocol_map(external_apis)
-    toolgroup_protocols = toolgroup_protocol_map()
     for api, protocol in protocols.items():
         routes = []
         protocol_methods = inspect.getmembers(protocol, predicate=inspect.isfunction)
 
-        # HACK ALERT
-        if api == Api.tool_runtime:
-            for tool_group in SpecialToolGroup:
-                sub_protocol = toolgroup_protocols[tool_group]
-                sub_protocol_methods = inspect.getmembers(sub_protocol, predicate=inspect.isfunction)
-                for name, method in sub_protocol_methods:
-                    if not hasattr(method, "__webmethod__"):
-                        continue
-                    protocol_methods.append((f"{tool_group.value}.{name}", method))
-
         for name, method in protocol_methods:
             # Get all webmethods for this method (supports multiple decorators)
             webmethods = getattr(method, "__webmethods__", [])
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index 2ff7db6eb..2ed0eccd2 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -31,7 +31,7 @@ from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
 from llama_stack.apis.scoring_functions import ScoringFunctions
 from llama_stack.apis.shields import Shields
-from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
+from llama_stack.apis.tools import ToolGroups, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
@@ -78,7 +78,6 @@ class LlamaStack(
     Inspect,
     ToolGroups,
     ToolRuntime,
-    RAGToolRuntime,
     Files,
     Prompts,
     Conversations,
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 3ee745bf1..6a59be0ca 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -27,7 +27,6 @@ from llama_stack.apis.tools import (
     RAGDocument,
     RAGQueryConfig,
     RAGQueryResult,
-    RAGToolRuntime,
     ToolDef,
     ToolGroup,
     ToolInvocationResult,
@@ -91,7 +90,7 @@ async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
             return content_str.encode("utf-8"), "text/plain"
 
 
-class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
+class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
     def __init__(
         self,
         config: RagToolRuntimeConfig,