mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
rag_tool
This commit is contained in:
parent
c263eca62d
commit
3634bf05b4
4 changed files with 109 additions and 29 deletions
|
@ -224,7 +224,7 @@ Before finalizing documentation, verify:
|
||||||
[x] 3. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/safety/safety.py` - Safety and moderation
|
[x] 3. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/safety/safety.py` - Safety and moderation
|
||||||
[x] 4. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/models/models.py` - Model metadata and management
|
[x] 4. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/models/models.py` - Model metadata and management
|
||||||
[x] 5. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/tools.py` - Tool system APIs
|
[x] 5. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/tools.py` - Tool system APIs
|
||||||
6. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/rag_tool.py` - RAG tool runtime
|
[x] 6. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/tools/rag_tool.py` - RAG tool runtime
|
||||||
7. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_io/vector_io.py` - Vector database operations
|
7. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_io/vector_io.py` - Vector database operations
|
||||||
8. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_dbs/vector_dbs.py` - Vector database management
|
8. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/vector_dbs/vector_dbs.py` - Vector database management
|
||||||
9. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/files/files.py` - File management
|
9. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/files/files.py` - File management
|
||||||
|
|
56
docs/_static/llama-stack-spec.html
vendored
56
docs/_static/llama-stack-spec.html
vendored
|
@ -1973,7 +1973,7 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"ToolRuntime"
|
"ToolRuntime"
|
||||||
],
|
],
|
||||||
"description": "Index documents so they can be used by the RAG system",
|
"description": "Index documents so they can be used by the RAG system.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
@ -4345,7 +4345,7 @@
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "OK",
|
"description": "RAGQueryResult containing the retrieved content and metadata",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -4370,7 +4370,7 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"ToolRuntime"
|
"ToolRuntime"
|
||||||
],
|
],
|
||||||
"description": "Query the RAG system for context; typically invoked by the agent",
|
"description": "Query the RAG system for context; typically invoked by the agent.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
@ -11493,13 +11493,16 @@
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/RAGDocument"
|
"$ref": "#/components/schemas/RAGDocument"
|
||||||
}
|
},
|
||||||
|
"description": "List of documents to index in the RAG system"
|
||||||
},
|
},
|
||||||
"vector_db_id": {
|
"vector_db_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "ID of the vector database to store the document embeddings"
|
||||||
},
|
},
|
||||||
"chunk_size_in_tokens": {
|
"chunk_size_in_tokens": {
|
||||||
"type": "integer"
|
"type": "integer",
|
||||||
|
"description": "(Optional) Size in tokens for document chunking during indexing"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -14813,11 +14816,13 @@
|
||||||
"type": {
|
"type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"const": "default",
|
"const": "default",
|
||||||
"default": "default"
|
"default": "default",
|
||||||
|
"description": "Type of query generator, always 'default'"
|
||||||
},
|
},
|
||||||
"separator": {
|
"separator": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": " "
|
"default": " ",
|
||||||
|
"description": "String separator used to join query terms"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -14825,7 +14830,8 @@
|
||||||
"type",
|
"type",
|
||||||
"separator"
|
"separator"
|
||||||
],
|
],
|
||||||
"title": "DefaultRAGQueryGeneratorConfig"
|
"title": "DefaultRAGQueryGeneratorConfig",
|
||||||
|
"description": "Configuration for the default RAG query generator."
|
||||||
},
|
},
|
||||||
"LLMRAGQueryGeneratorConfig": {
|
"LLMRAGQueryGeneratorConfig": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -14833,13 +14839,16 @@
|
||||||
"type": {
|
"type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"const": "llm",
|
"const": "llm",
|
||||||
"default": "llm"
|
"default": "llm",
|
||||||
|
"description": "Type of query generator, always 'llm'"
|
||||||
},
|
},
|
||||||
"model": {
|
"model": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Name of the language model to use for query generation"
|
||||||
},
|
},
|
||||||
"template": {
|
"template": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Template string for formatting the query generation prompt"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -14848,7 +14857,8 @@
|
||||||
"model",
|
"model",
|
||||||
"template"
|
"template"
|
||||||
],
|
],
|
||||||
"title": "LLMRAGQueryGeneratorConfig"
|
"title": "LLMRAGQueryGeneratorConfig",
|
||||||
|
"description": "Configuration for the LLM-based RAG query generator."
|
||||||
},
|
},
|
||||||
"RAGQueryConfig": {
|
"RAGQueryConfig": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -14920,7 +14930,7 @@
|
||||||
"impact_factor": {
|
"impact_factor": {
|
||||||
"type": "number",
|
"type": "number",
|
||||||
"default": 60.0,
|
"default": 60.0,
|
||||||
"description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0. Default of 60 is from the original RRF paper (Cormack et al., 2009)."
|
"description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -14975,16 +14985,19 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"content": {
|
"content": {
|
||||||
"$ref": "#/components/schemas/InterleavedContent"
|
"$ref": "#/components/schemas/InterleavedContent",
|
||||||
|
"description": "The query content to search for in the indexed documents"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_db_ids": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
},
|
||||||
|
"description": "List of vector database IDs to search within"
|
||||||
},
|
},
|
||||||
"query_config": {
|
"query_config": {
|
||||||
"$ref": "#/components/schemas/RAGQueryConfig"
|
"$ref": "#/components/schemas/RAGQueryConfig",
|
||||||
|
"description": "(Optional) Configuration parameters for the query operation"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -14998,7 +15011,8 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"content": {
|
"content": {
|
||||||
"$ref": "#/components/schemas/InterleavedContent"
|
"$ref": "#/components/schemas/InterleavedContent",
|
||||||
|
"description": "(Optional) The retrieved content from the query"
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -15023,14 +15037,16 @@
|
||||||
"type": "object"
|
"type": "object"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
|
"description": "Additional metadata about the query result"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"metadata"
|
"metadata"
|
||||||
],
|
],
|
||||||
"title": "RAGQueryResult"
|
"title": "RAGQueryResult",
|
||||||
|
"description": "Result of a RAG query containing retrieved content and metadata."
|
||||||
},
|
},
|
||||||
"QueryChunksRequest": {
|
"QueryChunksRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|
41
docs/_static/llama-stack-spec.yaml
vendored
41
docs/_static/llama-stack-spec.yaml
vendored
|
@ -1360,7 +1360,7 @@ paths:
|
||||||
tags:
|
tags:
|
||||||
- ToolRuntime
|
- ToolRuntime
|
||||||
description: >-
|
description: >-
|
||||||
Index documents so they can be used by the RAG system
|
Index documents so they can be used by the RAG system.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
@ -3071,7 +3071,8 @@ paths:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: OK
|
description: >-
|
||||||
|
RAGQueryResult containing the retrieved content and metadata
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
|
@ -3089,7 +3090,7 @@ paths:
|
||||||
tags:
|
tags:
|
||||||
- ToolRuntime
|
- ToolRuntime
|
||||||
description: >-
|
description: >-
|
||||||
Query the RAG system for context; typically invoked by the agent
|
Query the RAG system for context; typically invoked by the agent.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
@ -8202,10 +8203,16 @@ components:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/RAGDocument'
|
$ref: '#/components/schemas/RAGDocument'
|
||||||
|
description: >-
|
||||||
|
List of documents to index in the RAG system
|
||||||
vector_db_id:
|
vector_db_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
ID of the vector database to store the document embeddings
|
||||||
chunk_size_in_tokens:
|
chunk_size_in_tokens:
|
||||||
type: integer
|
type: integer
|
||||||
|
description: >-
|
||||||
|
(Optional) Size in tokens for document chunking during indexing
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- documents
|
- documents
|
||||||
|
@ -10493,14 +10500,20 @@ components:
|
||||||
type: string
|
type: string
|
||||||
const: default
|
const: default
|
||||||
default: default
|
default: default
|
||||||
|
description: >-
|
||||||
|
Type of query generator, always 'default'
|
||||||
separator:
|
separator:
|
||||||
type: string
|
type: string
|
||||||
default: ' '
|
default: ' '
|
||||||
|
description: >-
|
||||||
|
String separator used to join query terms
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- separator
|
- separator
|
||||||
title: DefaultRAGQueryGeneratorConfig
|
title: DefaultRAGQueryGeneratorConfig
|
||||||
|
description: >-
|
||||||
|
Configuration for the default RAG query generator.
|
||||||
LLMRAGQueryGeneratorConfig:
|
LLMRAGQueryGeneratorConfig:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -10508,16 +10521,23 @@ components:
|
||||||
type: string
|
type: string
|
||||||
const: llm
|
const: llm
|
||||||
default: llm
|
default: llm
|
||||||
|
description: Type of query generator, always 'llm'
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
Name of the language model to use for query generation
|
||||||
template:
|
template:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
Template string for formatting the query generation prompt
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- model
|
- model
|
||||||
- template
|
- template
|
||||||
title: LLMRAGQueryGeneratorConfig
|
title: LLMRAGQueryGeneratorConfig
|
||||||
|
description: >-
|
||||||
|
Configuration for the LLM-based RAG query generator.
|
||||||
RAGQueryConfig:
|
RAGQueryConfig:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -10586,8 +10606,7 @@ components:
|
||||||
default: 60.0
|
default: 60.0
|
||||||
description: >-
|
description: >-
|
||||||
The impact factor for RRF scoring. Higher values give more weight to higher-ranked
|
The impact factor for RRF scoring. Higher values give more weight to higher-ranked
|
||||||
results. Must be greater than 0. Default of 60 is from the original RRF
|
results. Must be greater than 0
|
||||||
paper (Cormack et al., 2009).
|
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
|
@ -10630,12 +10649,18 @@ components:
|
||||||
properties:
|
properties:
|
||||||
content:
|
content:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
|
description: >-
|
||||||
|
The query content to search for in the indexed documents
|
||||||
vector_db_ids:
|
vector_db_ids:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
List of vector database IDs to search within
|
||||||
query_config:
|
query_config:
|
||||||
$ref: '#/components/schemas/RAGQueryConfig'
|
$ref: '#/components/schemas/RAGQueryConfig'
|
||||||
|
description: >-
|
||||||
|
(Optional) Configuration parameters for the query operation
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
|
@ -10646,6 +10671,8 @@ components:
|
||||||
properties:
|
properties:
|
||||||
content:
|
content:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
|
description: >-
|
||||||
|
(Optional) The retrieved content from the query
|
||||||
metadata:
|
metadata:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
@ -10656,10 +10683,14 @@ components:
|
||||||
- type: string
|
- type: string
|
||||||
- type: array
|
- type: array
|
||||||
- type: object
|
- type: object
|
||||||
|
description: >-
|
||||||
|
Additional metadata about the query result
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- metadata
|
- metadata
|
||||||
title: RAGQueryResult
|
title: RAGQueryResult
|
||||||
|
description: >-
|
||||||
|
Result of a RAG query containing retrieved content and metadata.
|
||||||
QueryChunksRequest:
|
QueryChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -22,7 +22,7 @@ class RRFRanker(BaseModel):
|
||||||
|
|
||||||
:param type: The type of ranker, always "rrf"
|
:param type: The type of ranker, always "rrf"
|
||||||
:param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
|
:param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
|
||||||
Must be greater than 0. Default of 60 is from the original RRF paper (Cormack et al., 2009).
|
Must be greater than 0
|
||||||
"""
|
"""
|
||||||
|
|
||||||
type: Literal["rrf"] = "rrf"
|
type: Literal["rrf"] = "rrf"
|
||||||
|
@ -76,12 +76,23 @@ class RAGDocument(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class RAGQueryResult(BaseModel):
|
class RAGQueryResult(BaseModel):
|
||||||
|
"""Result of a RAG query containing retrieved content and metadata.
|
||||||
|
|
||||||
|
:param content: (Optional) The retrieved content from the query
|
||||||
|
:param metadata: Additional metadata about the query result
|
||||||
|
"""
|
||||||
content: InterleavedContent | None = None
|
content: InterleavedContent | None = None
|
||||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class RAGQueryGenerator(Enum):
|
class RAGQueryGenerator(Enum):
|
||||||
|
"""Types of query generators for RAG systems.
|
||||||
|
|
||||||
|
:cvar default: Default query generator using simple text processing
|
||||||
|
:cvar llm: LLM-based query generator for enhanced query understanding
|
||||||
|
:cvar custom: Custom query generator implementation
|
||||||
|
"""
|
||||||
default = "default"
|
default = "default"
|
||||||
llm = "llm"
|
llm = "llm"
|
||||||
custom = "custom"
|
custom = "custom"
|
||||||
|
@ -89,12 +100,23 @@ class RAGQueryGenerator(Enum):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class DefaultRAGQueryGeneratorConfig(BaseModel):
|
class DefaultRAGQueryGeneratorConfig(BaseModel):
|
||||||
|
"""Configuration for the default RAG query generator.
|
||||||
|
|
||||||
|
:param type: Type of query generator, always 'default'
|
||||||
|
:param separator: String separator used to join query terms
|
||||||
|
"""
|
||||||
type: Literal["default"] = "default"
|
type: Literal["default"] = "default"
|
||||||
separator: str = " "
|
separator: str = " "
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class LLMRAGQueryGeneratorConfig(BaseModel):
|
class LLMRAGQueryGeneratorConfig(BaseModel):
|
||||||
|
"""Configuration for the LLM-based RAG query generator.
|
||||||
|
|
||||||
|
:param type: Type of query generator, always 'llm'
|
||||||
|
:param model: Name of the language model to use for query generation
|
||||||
|
:param template: Template string for formatting the query generation prompt
|
||||||
|
"""
|
||||||
type: Literal["llm"] = "llm"
|
type: Literal["llm"] = "llm"
|
||||||
model: str
|
model: str
|
||||||
template: str
|
template: str
|
||||||
|
@ -152,7 +174,12 @@ class RAGToolRuntime(Protocol):
|
||||||
vector_db_id: str,
|
vector_db_id: str,
|
||||||
chunk_size_in_tokens: int = 512,
|
chunk_size_in_tokens: int = 512,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Index documents so they can be used by the RAG system"""
|
"""Index documents so they can be used by the RAG system.
|
||||||
|
|
||||||
|
:param documents: List of documents to index in the RAG system
|
||||||
|
:param vector_db_id: ID of the vector database to store the document embeddings
|
||||||
|
:param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
|
||||||
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/tool-runtime/rag-tool/query", method="POST")
|
@webmethod(route="/tool-runtime/rag-tool/query", method="POST")
|
||||||
|
@ -162,5 +189,11 @@ class RAGToolRuntime(Protocol):
|
||||||
vector_db_ids: list[str],
|
vector_db_ids: list[str],
|
||||||
query_config: RAGQueryConfig | None = None,
|
query_config: RAGQueryConfig | None = None,
|
||||||
) -> RAGQueryResult:
|
) -> RAGQueryResult:
|
||||||
"""Query the RAG system for context; typically invoked by the agent"""
|
"""Query the RAG system for context; typically invoked by the agent.
|
||||||
|
|
||||||
|
:param content: The query content to search for in the indexed documents
|
||||||
|
:param vector_db_ids: List of vector database IDs to search within
|
||||||
|
:param query_config: (Optional) Configuration parameters for the query operation
|
||||||
|
:returns: RAGQueryResult containing the retrieved content and metadata
|
||||||
|
"""
|
||||||
...
|
...
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue