This commit is contained in:
Sai Soundararaj 2025-07-01 16:01:37 -07:00
parent c263eca62d
commit 3634bf05b4
4 changed files with 109 additions and 29 deletions

View file

@ -224,7 +224,7 @@ Before finalizing documentation, verify:
[x] 3. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/safety/safety.py` - Safety and moderation [x] 3. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/safety/safety.py` - Safety and moderation
[x] 4. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/models/models.py` - Model metadata and management [x] 4. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/models/models.py` - Model metadata and management
[x] 5. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/tools.py` - Tool system APIs [x] 5. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/tools.py` - Tool system APIs
6. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/rag_tool.py` - RAG tool runtime [x] 6. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/rag_tool.py` - RAG tool runtime
7. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_io/vector_io.py` - Vector database operations 7. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_io/vector_io.py` - Vector database operations
8. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_dbs/vector_dbs.py` - Vector database management 8. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_dbs/vector_dbs.py` - Vector database management
9. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/files/files.py` - File management 9. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/files/files.py` - File management

View file

@ -1973,7 +1973,7 @@
"tags": [ "tags": [
"ToolRuntime" "ToolRuntime"
], ],
"description": "Index documents so they can be used by the RAG system", "description": "Index documents so they can be used by the RAG system.",
"parameters": [], "parameters": [],
"requestBody": { "requestBody": {
"content": { "content": {
@ -4345,7 +4345,7 @@
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
"description": "OK", "description": "RAGQueryResult containing the retrieved content and metadata",
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
@ -4370,7 +4370,7 @@
"tags": [ "tags": [
"ToolRuntime" "ToolRuntime"
], ],
"description": "Query the RAG system for context; typically invoked by the agent", "description": "Query the RAG system for context; typically invoked by the agent.",
"parameters": [], "parameters": [],
"requestBody": { "requestBody": {
"content": { "content": {
@ -11493,13 +11493,16 @@
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/RAGDocument" "$ref": "#/components/schemas/RAGDocument"
} },
"description": "List of documents to index in the RAG system"
}, },
"vector_db_id": { "vector_db_id": {
"type": "string" "type": "string",
"description": "ID of the vector database to store the document embeddings"
}, },
"chunk_size_in_tokens": { "chunk_size_in_tokens": {
"type": "integer" "type": "integer",
"description": "(Optional) Size in tokens for document chunking during indexing"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -14813,11 +14816,13 @@
"type": { "type": {
"type": "string", "type": "string",
"const": "default", "const": "default",
"default": "default" "default": "default",
"description": "Type of query generator, always 'default'"
}, },
"separator": { "separator": {
"type": "string", "type": "string",
"default": " " "default": " ",
"description": "String separator used to join query terms"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -14825,7 +14830,8 @@
"type", "type",
"separator" "separator"
], ],
"title": "DefaultRAGQueryGeneratorConfig" "title": "DefaultRAGQueryGeneratorConfig",
"description": "Configuration for the default RAG query generator."
}, },
"LLMRAGQueryGeneratorConfig": { "LLMRAGQueryGeneratorConfig": {
"type": "object", "type": "object",
@ -14833,13 +14839,16 @@
"type": { "type": {
"type": "string", "type": "string",
"const": "llm", "const": "llm",
"default": "llm" "default": "llm",
"description": "Type of query generator, always 'llm'"
}, },
"model": { "model": {
"type": "string" "type": "string",
"description": "Name of the language model to use for query generation"
}, },
"template": { "template": {
"type": "string" "type": "string",
"description": "Template string for formatting the query generation prompt"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -14848,7 +14857,8 @@
"model", "model",
"template" "template"
], ],
"title": "LLMRAGQueryGeneratorConfig" "title": "LLMRAGQueryGeneratorConfig",
"description": "Configuration for the LLM-based RAG query generator."
}, },
"RAGQueryConfig": { "RAGQueryConfig": {
"type": "object", "type": "object",
@ -14920,7 +14930,7 @@
"impact_factor": { "impact_factor": {
"type": "number", "type": "number",
"default": 60.0, "default": 60.0,
"description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0. Default of 60 is from the original RRF paper (Cormack et al., 2009)." "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -14975,16 +14985,19 @@
"type": "object", "type": "object",
"properties": { "properties": {
"content": { "content": {
"$ref": "#/components/schemas/InterleavedContent" "$ref": "#/components/schemas/InterleavedContent",
"description": "The query content to search for in the indexed documents"
}, },
"vector_db_ids": { "vector_db_ids": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
} },
"description": "List of vector database IDs to search within"
}, },
"query_config": { "query_config": {
"$ref": "#/components/schemas/RAGQueryConfig" "$ref": "#/components/schemas/RAGQueryConfig",
"description": "(Optional) Configuration parameters for the query operation"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -14998,7 +15011,8 @@
"type": "object", "type": "object",
"properties": { "properties": {
"content": { "content": {
"$ref": "#/components/schemas/InterleavedContent" "$ref": "#/components/schemas/InterleavedContent",
"description": "(Optional) The retrieved content from the query"
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -15023,14 +15037,16 @@
"type": "object" "type": "object"
} }
] ]
} },
"description": "Additional metadata about the query result"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"metadata" "metadata"
], ],
"title": "RAGQueryResult" "title": "RAGQueryResult",
"description": "Result of a RAG query containing retrieved content and metadata."
}, },
"QueryChunksRequest": { "QueryChunksRequest": {
"type": "object", "type": "object",

View file

@ -1360,7 +1360,7 @@ paths:
tags: tags:
- ToolRuntime - ToolRuntime
description: >- description: >-
Index documents so they can be used by the RAG system Index documents so they can be used by the RAG system.
parameters: [] parameters: []
requestBody: requestBody:
content: content:
@ -3071,7 +3071,8 @@ paths:
post: post:
responses: responses:
'200': '200':
description: OK description: >-
RAGQueryResult containing the retrieved content and metadata
content: content:
application/json: application/json:
schema: schema:
@ -3089,7 +3090,7 @@ paths:
tags: tags:
- ToolRuntime - ToolRuntime
description: >- description: >-
Query the RAG system for context; typically invoked by the agent Query the RAG system for context; typically invoked by the agent.
parameters: [] parameters: []
requestBody: requestBody:
content: content:
@ -8202,10 +8203,16 @@ components:
type: array type: array
items: items:
$ref: '#/components/schemas/RAGDocument' $ref: '#/components/schemas/RAGDocument'
description: >-
List of documents to index in the RAG system
vector_db_id: vector_db_id:
type: string type: string
description: >-
ID of the vector database to store the document embeddings
chunk_size_in_tokens: chunk_size_in_tokens:
type: integer type: integer
description: >-
(Optional) Size in tokens for document chunking during indexing
additionalProperties: false additionalProperties: false
required: required:
- documents - documents
@ -10493,14 +10500,20 @@ components:
type: string type: string
const: default const: default
default: default default: default
description: >-
Type of query generator, always 'default'
separator: separator:
type: string type: string
default: ' ' default: ' '
description: >-
String separator used to join query terms
additionalProperties: false additionalProperties: false
required: required:
- type - type
- separator - separator
title: DefaultRAGQueryGeneratorConfig title: DefaultRAGQueryGeneratorConfig
description: >-
Configuration for the default RAG query generator.
LLMRAGQueryGeneratorConfig: LLMRAGQueryGeneratorConfig:
type: object type: object
properties: properties:
@ -10508,16 +10521,23 @@ components:
type: string type: string
const: llm const: llm
default: llm default: llm
description: Type of query generator, always 'llm'
model: model:
type: string type: string
description: >-
Name of the language model to use for query generation
template: template:
type: string type: string
description: >-
Template string for formatting the query generation prompt
additionalProperties: false additionalProperties: false
required: required:
- type - type
- model - model
- template - template
title: LLMRAGQueryGeneratorConfig title: LLMRAGQueryGeneratorConfig
description: >-
Configuration for the LLM-based RAG query generator.
RAGQueryConfig: RAGQueryConfig:
type: object type: object
properties: properties:
@ -10586,8 +10606,7 @@ components:
default: 60.0 default: 60.0
description: >- description: >-
The impact factor for RRF scoring. Higher values give more weight to higher-ranked The impact factor for RRF scoring. Higher values give more weight to higher-ranked
results. Must be greater than 0. Default of 60 is from the original RRF results. Must be greater than 0
paper (Cormack et al., 2009).
additionalProperties: false additionalProperties: false
required: required:
- type - type
@ -10630,12 +10649,18 @@ components:
properties: properties:
content: content:
$ref: '#/components/schemas/InterleavedContent' $ref: '#/components/schemas/InterleavedContent'
description: >-
The query content to search for in the indexed documents
vector_db_ids: vector_db_ids:
type: array type: array
items: items:
type: string type: string
description: >-
List of vector database IDs to search within
query_config: query_config:
$ref: '#/components/schemas/RAGQueryConfig' $ref: '#/components/schemas/RAGQueryConfig'
description: >-
(Optional) Configuration parameters for the query operation
additionalProperties: false additionalProperties: false
required: required:
- content - content
@ -10646,6 +10671,8 @@ components:
properties: properties:
content: content:
$ref: '#/components/schemas/InterleavedContent' $ref: '#/components/schemas/InterleavedContent'
description: >-
(Optional) The retrieved content from the query
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -10656,10 +10683,14 @@ components:
- type: string - type: string
- type: array - type: array
- type: object - type: object
description: >-
Additional metadata about the query result
additionalProperties: false additionalProperties: false
required: required:
- metadata - metadata
title: RAGQueryResult title: RAGQueryResult
description: >-
Result of a RAG query containing retrieved content and metadata.
QueryChunksRequest: QueryChunksRequest:
type: object type: object
properties: properties:

View file

@ -22,7 +22,7 @@ class RRFRanker(BaseModel):
:param type: The type of ranker, always "rrf" :param type: The type of ranker, always "rrf"
:param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. :param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
Must be greater than 0. Default of 60 is from the original RRF paper (Cormack et al., 2009). Must be greater than 0
""" """
type: Literal["rrf"] = "rrf" type: Literal["rrf"] = "rrf"
@ -76,12 +76,23 @@ class RAGDocument(BaseModel):
@json_schema_type @json_schema_type
class RAGQueryResult(BaseModel): class RAGQueryResult(BaseModel):
"""Result of a RAG query containing retrieved content and metadata.
:param content: (Optional) The retrieved content from the query
:param metadata: Additional metadata about the query result
"""
content: InterleavedContent | None = None content: InterleavedContent | None = None
metadata: dict[str, Any] = Field(default_factory=dict) metadata: dict[str, Any] = Field(default_factory=dict)
@json_schema_type @json_schema_type
class RAGQueryGenerator(Enum): class RAGQueryGenerator(Enum):
"""Types of query generators for RAG systems.
:cvar default: Default query generator using simple text processing
:cvar llm: LLM-based query generator for enhanced query understanding
:cvar custom: Custom query generator implementation
"""
default = "default" default = "default"
llm = "llm" llm = "llm"
custom = "custom" custom = "custom"
@ -89,12 +100,23 @@ class RAGQueryGenerator(Enum):
@json_schema_type @json_schema_type
class DefaultRAGQueryGeneratorConfig(BaseModel): class DefaultRAGQueryGeneratorConfig(BaseModel):
"""Configuration for the default RAG query generator.
:param type: Type of query generator, always 'default'
:param separator: String separator used to join query terms
"""
type: Literal["default"] = "default" type: Literal["default"] = "default"
separator: str = " " separator: str = " "
@json_schema_type @json_schema_type
class LLMRAGQueryGeneratorConfig(BaseModel): class LLMRAGQueryGeneratorConfig(BaseModel):
"""Configuration for the LLM-based RAG query generator.
:param type: Type of query generator, always 'llm'
:param model: Name of the language model to use for query generation
:param template: Template string for formatting the query generation prompt
"""
type: Literal["llm"] = "llm" type: Literal["llm"] = "llm"
model: str model: str
template: str template: str
@ -152,7 +174,12 @@ class RAGToolRuntime(Protocol):
vector_db_id: str, vector_db_id: str,
chunk_size_in_tokens: int = 512, chunk_size_in_tokens: int = 512,
) -> None: ) -> None:
"""Index documents so they can be used by the RAG system""" """Index documents so they can be used by the RAG system.
:param documents: List of documents to index in the RAG system
:param vector_db_id: ID of the vector database to store the document embeddings
:param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
"""
... ...
@webmethod(route="/tool-runtime/rag-tool/query", method="POST") @webmethod(route="/tool-runtime/rag-tool/query", method="POST")
@ -162,5 +189,11 @@ class RAGToolRuntime(Protocol):
vector_db_ids: list[str], vector_db_ids: list[str],
query_config: RAGQueryConfig | None = None, query_config: RAGQueryConfig | None = None,
) -> RAGQueryResult: ) -> RAGQueryResult:
"""Query the RAG system for context; typically invoked by the agent""" """Query the RAG system for context; typically invoked by the agent.
:param content: The query content to search for in the indexed documents
:param vector_db_ids: List of vector database IDs to search within
:param query_config: (Optional) Configuration parameters for the query operation
:returns: RAGQueryResult containing the retrieved content and metadata
"""
... ...