diff --git a/api_update_plan.md b/api_update_plan.md index e14d70430..a3a6acd84 100644 --- a/api_update_plan.md +++ b/api_update_plan.md @@ -224,7 +224,7 @@ Before finalizing documentation, verify: [x] 3. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/safety/safety.py` - Safety and moderation [x] 4. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/models/models.py` - Model metadata and management [x] 5. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/tools.py` - Tool system APIs -6. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/rag_tool.py` - RAG tool runtime +[x] 6. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/rag_tool.py` - RAG tool runtime 7. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_io/vector_io.py` - Vector database operations 8. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_dbs/vector_dbs.py` - Vector database management 9. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/files/files.py` - File management diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 48575c4a3..44b681b71 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -1973,7 +1973,7 @@ "tags": [ "ToolRuntime" ], - "description": "Index documents so they can be used by the RAG system", + "description": "Index documents so they can be used by the RAG system.", "parameters": [], "requestBody": { "content": { @@ -4345,7 +4345,7 @@ "post": { "responses": { "200": { - "description": "OK", + "description": "RAGQueryResult containing the retrieved content and metadata", "content": { "application/json": { "schema": { @@ -4370,7 +4370,7 @@ "tags": [ "ToolRuntime" ], - "description": "Query the RAG system for context; typically invoked by the agent", + "description": "Query the RAG system for context; typically invoked by the agent.", "parameters": [], "requestBody": { "content": { @@ -11493,13 +11493,16 @@ "type": "array", "items": { "$ref": "#/components/schemas/RAGDocument" - } + }, + "description": "List of documents to index in the RAG system" }, "vector_db_id": { - "type": "string" + "type": "string", + "description": "ID of the vector database to store the document embeddings" }, "chunk_size_in_tokens": { - "type": "integer" + "type": "integer", + "description": "(Optional) Size in tokens for document chunking during indexing" } }, "additionalProperties": false, @@ -14813,11 +14816,13 @@ "type": { "type": "string", "const": "default", - "default": "default" + "default": "default", + "description": "Type of query generator, always 'default'" }, "separator": { "type": "string", - "default": " " + "default": " ", + "description": "String separator used to join query terms" } }, "additionalProperties": false, @@ -14825,7 +14830,8 @@ "type", "separator" ], - "title": "DefaultRAGQueryGeneratorConfig" + "title": "DefaultRAGQueryGeneratorConfig", + "description": "Configuration for the default RAG query generator." }, "LLMRAGQueryGeneratorConfig": { "type": "object", @@ -14833,13 +14839,16 @@ "type": { "type": "string", "const": "llm", - "default": "llm" + "default": "llm", + "description": "Type of query generator, always 'llm'" }, "model": { - "type": "string" + "type": "string", + "description": "Name of the language model to use for query generation" }, "template": { - "type": "string" + "type": "string", + "description": "Template string for formatting the query generation prompt" } }, "additionalProperties": false, @@ -14848,7 +14857,8 @@ "model", "template" ], - "title": "LLMRAGQueryGeneratorConfig" + "title": "LLMRAGQueryGeneratorConfig", + "description": "Configuration for the LLM-based RAG query generator." }, "RAGQueryConfig": { "type": "object", @@ -14920,7 +14930,7 @@ "impact_factor": { "type": "number", "default": 60.0, - "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0. Default of 60 is from the original RRF paper (Cormack et al., 2009)." + "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0" } }, "additionalProperties": false, @@ -14975,16 +14985,19 @@ "type": "object", "properties": { "content": { - "$ref": "#/components/schemas/InterleavedContent" + "$ref": "#/components/schemas/InterleavedContent", + "description": "The query content to search for in the indexed documents" }, "vector_db_ids": { "type": "array", "items": { "type": "string" - } + }, + "description": "List of vector database IDs to search within" }, "query_config": { - "$ref": "#/components/schemas/RAGQueryConfig" + "$ref": "#/components/schemas/RAGQueryConfig", + "description": "(Optional) Configuration parameters for the query operation" } }, "additionalProperties": false, @@ -14998,7 +15011,8 @@ "type": "object", "properties": { "content": { - "$ref": "#/components/schemas/InterleavedContent" + "$ref": "#/components/schemas/InterleavedContent", + "description": "(Optional) The retrieved content from the query" }, "metadata": { "type": "object", @@ -15023,14 +15037,16 @@ "type": "object" } ] - } + }, + "description": "Additional metadata about the query result" } }, "additionalProperties": false, "required": [ "metadata" ], - "title": "RAGQueryResult" + "title": "RAGQueryResult", + "description": "Result of a RAG query containing retrieved content and metadata." }, "QueryChunksRequest": { "type": "object", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 158eaa61d..07a9c87ef 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -1360,7 +1360,7 @@ paths: tags: - ToolRuntime description: >- - Index documents so they can be used by the RAG system + Index documents so they can be used by the RAG system. parameters: [] requestBody: content: @@ -3071,7 +3071,8 @@ paths: post: responses: '200': - description: OK + description: >- + RAGQueryResult containing the retrieved content and metadata content: application/json: schema: @@ -3089,7 +3090,7 @@ paths: tags: - ToolRuntime description: >- - Query the RAG system for context; typically invoked by the agent + Query the RAG system for context; typically invoked by the agent. parameters: [] requestBody: content: @@ -8202,10 +8203,16 @@ components: type: array items: $ref: '#/components/schemas/RAGDocument' + description: >- + List of documents to index in the RAG system vector_db_id: type: string + description: >- + ID of the vector database to store the document embeddings chunk_size_in_tokens: type: integer + description: >- + (Optional) Size in tokens for document chunking during indexing additionalProperties: false required: - documents @@ -10493,14 +10500,20 @@ components: type: string const: default default: default + description: >- + Type of query generator, always 'default' separator: type: string default: ' ' + description: >- + String separator used to join query terms additionalProperties: false required: - type - separator title: DefaultRAGQueryGeneratorConfig + description: >- + Configuration for the default RAG query generator. LLMRAGQueryGeneratorConfig: type: object properties: @@ -10508,16 +10521,23 @@ components: type: string const: llm default: llm + description: Type of query generator, always 'llm' model: type: string + description: >- + Name of the language model to use for query generation template: type: string + description: >- + Template string for formatting the query generation prompt additionalProperties: false required: - type - model - template title: LLMRAGQueryGeneratorConfig + description: >- + Configuration for the LLM-based RAG query generator. RAGQueryConfig: type: object properties: @@ -10586,8 +10606,7 @@ components: default: 60.0 description: >- The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0. Default of 60 is from the original RRF - paper (Cormack et al., 2009). + results. Must be greater than 0 additionalProperties: false required: - type @@ -10630,12 +10649,18 @@ components: properties: content: $ref: '#/components/schemas/InterleavedContent' + description: >- + The query content to search for in the indexed documents vector_db_ids: type: array items: type: string + description: >- + List of vector database IDs to search within query_config: $ref: '#/components/schemas/RAGQueryConfig' + description: >- + (Optional) Configuration parameters for the query operation additionalProperties: false required: - content @@ -10646,6 +10671,8 @@ components: properties: content: $ref: '#/components/schemas/InterleavedContent' + description: >- + (Optional) The retrieved content from the query metadata: type: object additionalProperties: @@ -10656,10 +10683,14 @@ components: - type: string - type: array - type: object + description: >- + Additional metadata about the query result additionalProperties: false required: - metadata title: RAGQueryResult + description: >- + Result of a RAG query containing retrieved content and metadata. QueryChunksRequest: type: object properties: diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py index d497fe1a7..f7cfff69d 100644 --- a/llama_stack/apis/tools/rag_tool.py +++ b/llama_stack/apis/tools/rag_tool.py @@ -22,7 +22,7 @@ class RRFRanker(BaseModel): :param type: The type of ranker, always "rrf" :param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. - Must be greater than 0. Default of 60 is from the original RRF paper (Cormack et al., 2009). + Must be greater than 0 """ type: Literal["rrf"] = "rrf" @@ -76,12 +76,23 @@ class RAGDocument(BaseModel): @json_schema_type class RAGQueryResult(BaseModel): + """Result of a RAG query containing retrieved content and metadata. + + :param content: (Optional) The retrieved content from the query + :param metadata: Additional metadata about the query result + """ content: InterleavedContent | None = None metadata: dict[str, Any] = Field(default_factory=dict) @json_schema_type class RAGQueryGenerator(Enum): + """Types of query generators for RAG systems. + + :cvar default: Default query generator using simple text processing + :cvar llm: LLM-based query generator for enhanced query understanding + :cvar custom: Custom query generator implementation + """ default = "default" llm = "llm" custom = "custom" @@ -89,12 +100,23 @@ class RAGQueryGenerator(Enum): @json_schema_type class DefaultRAGQueryGeneratorConfig(BaseModel): + """Configuration for the default RAG query generator. + + :param type: Type of query generator, always 'default' + :param separator: String separator used to join query terms + """ type: Literal["default"] = "default" separator: str = " " @json_schema_type class LLMRAGQueryGeneratorConfig(BaseModel): + """Configuration for the LLM-based RAG query generator. + + :param type: Type of query generator, always 'llm' + :param model: Name of the language model to use for query generation + :param template: Template string for formatting the query generation prompt + """ type: Literal["llm"] = "llm" model: str template: str @@ -152,7 +174,12 @@ class RAGToolRuntime(Protocol): vector_db_id: str, chunk_size_in_tokens: int = 512, ) -> None: - """Index documents so they can be used by the RAG system""" + """Index documents so they can be used by the RAG system. + + :param documents: List of documents to index in the RAG system + :param vector_db_id: ID of the vector database to store the document embeddings + :param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing + """ ... @webmethod(route="/tool-runtime/rag-tool/query", method="POST") @@ -162,5 +189,11 @@ class RAGToolRuntime(Protocol): vector_db_ids: list[str], query_config: RAGQueryConfig | None = None, ) -> RAGQueryResult: - """Query the RAG system for context; typically invoked by the agent""" + """Query the RAG system for context; typically invoked by the agent. + + :param content: The query content to search for in the indexed documents + :param vector_db_ids: List of vector database IDs to search within + :param query_config: (Optional) Configuration parameters for the query operation + :returns: RAGQueryResult containing the retrieved content and metadata + """ ...