rag_tool

2025-12-23 08:22:28 +00:00 · 2025-07-01 16:01:37 -07:00 · 2025-07-01 16:01:37 -07:00 · 3634bf05b4
commit 3634bf05b4
parent c263eca62d
4 changed files with 109 additions and 29 deletions
--- a/api_update_plan.md
+++ b/api_update_plan.md
@ -224,7 +224,7 @@ Before finalizing documentation, verify:
 [x] 3. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/safety/safety.py` - Safety and moderation
 [x] 4. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/models/models.py` - Model metadata and management
 [x] 5. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/tools.py` - Tool system APIs
-6. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/rag_tool.py` - RAG tool runtime
+[x] 6. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/rag_tool.py` - RAG tool runtime
 7. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_io/vector_io.py` - Vector database operations
 8. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_dbs/vector_dbs.py` - Vector database management
 9. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/files/files.py` - File management
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -1973,7 +1973,7 @@
                "tags": [
                    "ToolRuntime"
                ],
-                "description": "Index documents so they can be used by the RAG system",
+                "description": "Index documents so they can be used by the RAG system.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -4345,7 +4345,7 @@
            "post": {
                "responses": {
                    "200": {
-                        "description": "OK",
+                        "description": "RAGQueryResult containing the retrieved content and metadata",
                        "content": {
                            "application/json": {
                                "schema": {
@ -4370,7 +4370,7 @@
                "tags": [
                    "ToolRuntime"
                ],
-                "description": "Query the RAG system for context; typically invoked by the agent",
+                "description": "Query the RAG system for context; typically invoked by the agent.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -11493,13 +11493,16 @@
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/RAGDocument"
-                        }
+                        },
                        "description": "List of documents to index in the RAG system"
                    },
                    "vector_db_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "ID of the vector database to store the document embeddings"
                    },
                    "chunk_size_in_tokens": {
-                        "type": "integer"
+                        "type": "integer",
                        "description": "(Optional) Size in tokens for document chunking during indexing"
                    }
                },
                "additionalProperties": false,
@ -14813,11 +14816,13 @@
                    "type": {
                        "type": "string",
                        "const": "default",
-                        "default": "default"
+                        "default": "default",
                        "description": "Type of query generator, always 'default'"
                    },
                    "separator": {
                        "type": "string",
-                        "default": " "
+                        "default": " ",
                        "description": "String separator used to join query terms"
                    }
                },
                "additionalProperties": false,
@ -14825,7 +14830,8 @@
                    "type",
                    "separator"
                ],
-                "title": "DefaultRAGQueryGeneratorConfig"
+                "title": "DefaultRAGQueryGeneratorConfig",
                "description": "Configuration for the default RAG query generator."
            },
            "LLMRAGQueryGeneratorConfig": {
                "type": "object",
@ -14833,13 +14839,16 @@
                    "type": {
                        "type": "string",
                        "const": "llm",
-                        "default": "llm"
+                        "default": "llm",
                        "description": "Type of query generator, always 'llm'"
                    },
                    "model": {
-                        "type": "string"
+                        "type": "string",
                        "description": "Name of the language model to use for query generation"
                    },
                    "template": {
-                        "type": "string"
+                        "type": "string",
                        "description": "Template string for formatting the query generation prompt"
                    }
                },
                "additionalProperties": false,
@ -14848,7 +14857,8 @@
                    "model",
                    "template"
                ],
-                "title": "LLMRAGQueryGeneratorConfig"
+                "title": "LLMRAGQueryGeneratorConfig",
                "description": "Configuration for the LLM-based RAG query generator."
            },
            "RAGQueryConfig": {
                "type": "object",
@ -14920,7 +14930,7 @@
                    "impact_factor": {
                        "type": "number",
                        "default": 60.0,
-                        "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0. Default of 60 is from the original RRF paper (Cormack et al., 2009)."
+                        "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
                    }
                },
                "additionalProperties": false,
@ -14975,16 +14985,19 @@
                "type": "object",
                "properties": {
                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent"
+                        "$ref": "#/components/schemas/InterleavedContent",
                        "description": "The query content to search for in the indexed documents"
                    },
                    "vector_db_ids": {
                        "type": "array",
                        "items": {
                            "type": "string"
-                        }
+                        },
                        "description": "List of vector database IDs to search within"
                    },
                    "query_config": {
-                        "$ref": "#/components/schemas/RAGQueryConfig"
+                        "$ref": "#/components/schemas/RAGQueryConfig",
                        "description": "(Optional) Configuration parameters for the query operation"
                    }
                },
                "additionalProperties": false,
@ -14998,7 +15011,8 @@
                "type": "object",
                "properties": {
                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent"
+                        "$ref": "#/components/schemas/InterleavedContent",
                        "description": "(Optional) The retrieved content from the query"
                    },
                    "metadata": {
                        "type": "object",
@ -15023,14 +15037,16 @@
                                    "type": "object"
                                }
                            ]
-                        }
+                        },
                        "description": "Additional metadata about the query result"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "metadata"
                ],
-                "title": "RAGQueryResult"
+                "title": "RAGQueryResult",
                "description": "Result of a RAG query containing retrieved content and metadata."
            },
            "QueryChunksRequest": {
                "type": "object",
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -1360,7 +1360,7 @@ paths:
      tags:
        - ToolRuntime
      description: >-
-        Index documents so they can be used by the RAG system
+        Index documents so they can be used by the RAG system.
      parameters: []
      requestBody:
        content:
@ -3071,7 +3071,8 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: >-
            RAGQueryResult containing the retrieved content and metadata
          content:
            application/json:
              schema:
@ -3089,7 +3090,7 @@ paths:
      tags:
        - ToolRuntime
      description: >-
-        Query the RAG system for context; typically invoked by the agent
+        Query the RAG system for context; typically invoked by the agent.
      parameters: []
      requestBody:
        content:
@ -8202,10 +8203,16 @@ components:
          type: array
          items:
            $ref: '#/components/schemas/RAGDocument'
          description: >-
            List of documents to index in the RAG system
        vector_db_id:
          type: string
          description: >-
            ID of the vector database to store the document embeddings
        chunk_size_in_tokens:
          type: integer
          description: >-
            (Optional) Size in tokens for document chunking during indexing
      additionalProperties: false
      required:
        - documents
@ -10493,14 +10500,20 @@ components:
          type: string
          const: default
          default: default
          description: >-
            Type of query generator, always 'default'
        separator:
          type: string
          default: ' '
          description: >-
            String separator used to join query terms
      additionalProperties: false
      required:
        - type
        - separator
      title: DefaultRAGQueryGeneratorConfig
      description: >-
        Configuration for the default RAG query generator.
    LLMRAGQueryGeneratorConfig:
      type: object
      properties:
@ -10508,16 +10521,23 @@ components:
          type: string
          const: llm
          default: llm
          description: Type of query generator, always 'llm'
        model:
          type: string
          description: >-
            Name of the language model to use for query generation
        template:
          type: string
          description: >-
            Template string for formatting the query generation prompt
      additionalProperties: false
      required:
        - type
        - model
        - template
      title: LLMRAGQueryGeneratorConfig
      description: >-
        Configuration for the LLM-based RAG query generator.
    RAGQueryConfig:
      type: object
      properties:
@ -10586,8 +10606,7 @@ components:
          default: 60.0
          description: >-
            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
-            results. Must be greater than 0. Default of 60 is from the original RRF
+            results. Must be greater than 0
            paper (Cormack et al., 2009).
      additionalProperties: false
      required:
        - type
@ -10630,12 +10649,18 @@ components:
      properties:
        content:
          $ref: '#/components/schemas/InterleavedContent'
          description: >-
            The query content to search for in the indexed documents
        vector_db_ids:
          type: array
          items:
            type: string
          description: >-
            List of vector database IDs to search within
        query_config:
          $ref: '#/components/schemas/RAGQueryConfig'
          description: >-
            (Optional) Configuration parameters for the query operation
      additionalProperties: false
      required:
        - content
@ -10646,6 +10671,8 @@ components:
      properties:
        content:
          $ref: '#/components/schemas/InterleavedContent'
          description: >-
            (Optional) The retrieved content from the query
        metadata:
          type: object
          additionalProperties:
@ -10656,10 +10683,14 @@ components:
              - type: string
              - type: array
              - type: object
          description: >-
            Additional metadata about the query result
      additionalProperties: false
      required:
        - metadata
      title: RAGQueryResult
      description: >-
        Result of a RAG query containing retrieved content and metadata.
    QueryChunksRequest:
      type: object
      properties:
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@ -22,7 +22,7 @@ class RRFRanker(BaseModel):
    :param type: The type of ranker, always "rrf"
    :param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
-                         Must be greater than 0. Default of 60 is from the original RRF paper (Cormack et al., 2009).
+                         Must be greater than 0
    """
    type: Literal["rrf"] = "rrf"
@ -76,12 +76,23 @@ class RAGDocument(BaseModel):
@json_schema_type
 class RAGQueryResult(BaseModel):
    """Result of a RAG query containing retrieved content and metadata.
    :param content: (Optional) The retrieved content from the query
    :param metadata: Additional metadata about the query result
    """
    content: InterleavedContent | None = None
    metadata: dict[str, Any] = Field(default_factory=dict)
@json_schema_type
 class RAGQueryGenerator(Enum):
    """Types of query generators for RAG systems.
    :cvar default: Default query generator using simple text processing
    :cvar llm: LLM-based query generator for enhanced query understanding
    :cvar custom: Custom query generator implementation
    """
    default = "default"
    llm = "llm"
    custom = "custom"
@ -89,12 +100,23 @@ class RAGQueryGenerator(Enum):
@json_schema_type
 class DefaultRAGQueryGeneratorConfig(BaseModel):
    """Configuration for the default RAG query generator.
    :param type: Type of query generator, always 'default'
    :param separator: String separator used to join query terms
    """
    type: Literal["default"] = "default"
    separator: str = " "
@json_schema_type
 class LLMRAGQueryGeneratorConfig(BaseModel):
    """Configuration for the LLM-based RAG query generator.
    :param type: Type of query generator, always 'llm'
    :param model: Name of the language model to use for query generation
    :param template: Template string for formatting the query generation prompt
    """
    type: Literal["llm"] = "llm"
    model: str
    template: str
@ -152,7 +174,12 @@ class RAGToolRuntime(Protocol):
        vector_db_id: str,
        chunk_size_in_tokens: int = 512,
    ) -> None:
-        """Index documents so they can be used by the RAG system"""
+        """Index documents so they can be used by the RAG system.
        :param documents: List of documents to index in the RAG system
        :param vector_db_id: ID of the vector database to store the document embeddings
        :param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
        """
        ...
    @webmethod(route="/tool-runtime/rag-tool/query", method="POST")
@ -162,5 +189,11 @@ class RAGToolRuntime(Protocol):
        vector_db_ids: list[str],
        query_config: RAGQueryConfig | None = None,
    ) -> RAGQueryResult:
-        """Query the RAG system for context; typically invoked by the agent"""
+        """Query the RAG system for context; typically invoked by the agent.
        :param content: The query content to search for in the indexed documents
        :param vector_db_ids: List of vector database IDs to search within
        :param query_config: (Optional) Configuration parameters for the query operation
        :returns: RAGQueryResult containing the retrieved content and metadata
        """
        ...