mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-17 18:38:11 +00:00
feat: Introduce weighted and rrf reranker implementations
Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
This commit is contained in:
parent
eab85a7121
commit
6ea5c10d48
14 changed files with 637 additions and 75 deletions
67
docs/_static/llama-stack-spec.html
vendored
67
docs/_static/llama-stack-spec.html
vendored
|
@ -13995,6 +13995,10 @@
|
|||
"mode": {
|
||||
"type": "string",
|
||||
"description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
|
||||
},
|
||||
"ranker": {
|
||||
"$ref": "#/components/schemas/Ranker",
|
||||
"description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -14024,6 +14028,69 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"RRFRanker": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "rrf",
|
||||
"default": "rrf",
|
||||
"description": "The type of ranker, always \"rrf\""
|
||||
},
|
||||
"impact_factor": {
|
||||
"type": "number",
|
||||
"default": 60.0,
|
||||
"description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0. Default of 60 is from the original RRF paper (Cormack et al., 2009)."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"impact_factor"
|
||||
],
|
||||
"title": "RRFRanker",
|
||||
"description": "Reciprocal Rank Fusion (RRF) ranker configuration."
|
||||
},
|
||||
"Ranker": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/RRFRanker"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/WeightedRanker"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"rrf": "#/components/schemas/RRFRanker",
|
||||
"weighted": "#/components/schemas/WeightedRanker"
|
||||
}
|
||||
}
|
||||
},
|
||||
"WeightedRanker": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "weighted",
|
||||
"default": "weighted",
|
||||
"description": "The type of ranker, always \"weighted\""
|
||||
},
|
||||
"alpha": {
|
||||
"type": "number",
|
||||
"default": 0.5,
|
||||
"description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"alpha"
|
||||
],
|
||||
"title": "WeightedRanker",
|
||||
"description": "Weighted ranker configuration that combines vector and keyword scores."
|
||||
},
|
||||
"QueryRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
57
docs/_static/llama-stack-spec.yaml
vendored
57
docs/_static/llama-stack-spec.yaml
vendored
|
@ -9758,6 +9758,11 @@ components:
|
|||
description: >-
|
||||
Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
|
||||
"vector".
|
||||
ranker:
|
||||
$ref: '#/components/schemas/Ranker'
|
||||
description: >-
|
||||
Configuration for the ranker to use in hybrid search. Defaults to RRF
|
||||
ranker.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- query_generator_config
|
||||
|
@ -9776,6 +9781,58 @@ components:
|
|||
mapping:
|
||||
default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
|
||||
llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
|
||||
RRFRanker:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: rrf
|
||||
default: rrf
|
||||
description: The type of ranker, always "rrf"
|
||||
impact_factor:
|
||||
type: number
|
||||
default: 60.0
|
||||
description: >-
|
||||
The impact factor for RRF scoring. Higher values give more weight to higher-ranked
|
||||
results. Must be greater than 0. Default of 60 is from the original RRF
|
||||
paper (Cormack et al., 2009).
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- impact_factor
|
||||
title: RRFRanker
|
||||
description: >-
|
||||
Reciprocal Rank Fusion (RRF) ranker configuration.
|
||||
Ranker:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/RRFRanker'
|
||||
- $ref: '#/components/schemas/WeightedRanker'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
rrf: '#/components/schemas/RRFRanker'
|
||||
weighted: '#/components/schemas/WeightedRanker'
|
||||
WeightedRanker:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: weighted
|
||||
default: weighted
|
||||
description: The type of ranker, always "weighted"
|
||||
alpha:
|
||||
type: number
|
||||
default: 0.5
|
||||
description: >-
|
||||
Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
|
||||
only use vector scores, values in between blend both scores.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- alpha
|
||||
title: WeightedRanker
|
||||
description: >-
|
||||
Weighted ranker configuration that combines vector and keyword scores.
|
||||
QueryRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
@ -79,6 +79,30 @@ response = await vector_io.query_chunks(
|
|||
query="your query here",
|
||||
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
||||
)
|
||||
|
||||
# Using RRF ranker
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
query="your query here",
|
||||
params={
|
||||
"mode": "hybrid",
|
||||
"max_chunks": 3,
|
||||
"score_threshold": 0.7,
|
||||
"ranker": {"type": "rrf", "impact_factor": 60.0},
|
||||
},
|
||||
)
|
||||
|
||||
# Using weighted ranker
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
query="your query here",
|
||||
params={
|
||||
"mode": "hybrid",
|
||||
"max_chunks": 3,
|
||||
"score_threshold": 0.7,
|
||||
"ranker": {"type": "weighted", "alpha": 0.7}, # 70% vector, 30% keyword
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
Example with explicit vector search:
|
||||
|
@ -101,23 +125,67 @@ response = await vector_io.query_chunks(
|
|||
|
||||
## Supported Search Modes
|
||||
|
||||
The sqlite-vec provider supports both vector-based and keyword-based (full-text) search modes.
|
||||
The SQLite vector store supports three search modes:
|
||||
|
||||
When using the RAGTool interface, you can specify the desired search behavior via the `mode` parameter in
|
||||
`RAGQueryConfig`. For example:
|
||||
1. **Vector Search** (`mode="vector"`): Uses vector similarity to find relevant chunks
|
||||
2. **Keyword Search** (`mode="keyword"`): Uses keyword matching to find relevant chunks
|
||||
3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword scores using a ranker
|
||||
|
||||
### Hybrid Search
|
||||
|
||||
Hybrid search combines the strengths of both vector and keyword search by:
|
||||
- Computing vector similarity scores
|
||||
- Computing keyword match scores
|
||||
- Using a ranker to combine these scores
|
||||
|
||||
Two ranker types are supported:
|
||||
|
||||
1. **RRF (Reciprocal Rank Fusion)**:
|
||||
- Combines ranks from both vector and keyword results
|
||||
- Uses an impact factor (default: 60.0) to control the weight of higher-ranked results
|
||||
- Good for balancing between vector and keyword results
|
||||
- The default impact factor of 60.0 comes from the original RRF paper by Cormack et al. (2009) [^1], which found this value to provide optimal performance across various retrieval tasks
|
||||
|
||||
2. **Weighted**:
|
||||
- Linearly combines normalized vector and keyword scores
|
||||
- Uses an alpha parameter (0-1) to control the blend:
|
||||
- alpha=0: Only use keyword scores
|
||||
- alpha=1: Only use vector scores
|
||||
- alpha=0.5: Equal weight to both (default)
|
||||
|
||||
Example using RAGQueryConfig with different search modes:
|
||||
|
||||
```python
|
||||
from llama_stack.apis.tool_runtime.rag import RAGQueryConfig
|
||||
from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
|
||||
|
||||
query_config = RAGQueryConfig(max_chunks=6, mode="vector")
|
||||
# Vector search
|
||||
config = RAGQueryConfig(mode="vector", max_chunks=5)
|
||||
|
||||
results = client.tool_runtime.rag_tool.query(
|
||||
vector_db_ids=[vector_db_id],
|
||||
content="what is torchtune",
|
||||
query_config=query_config,
|
||||
# Keyword search
|
||||
config = RAGQueryConfig(mode="keyword", max_chunks=5)
|
||||
|
||||
# Hybrid search with custom RRF ranker
|
||||
config = RAGQueryConfig(
|
||||
mode="hybrid",
|
||||
max_chunks=5,
|
||||
ranker=RRFRanker(impact_factor=50.0), # Custom impact factor
|
||||
)
|
||||
|
||||
# Hybrid search with weighted ranker
|
||||
config = RAGQueryConfig(
|
||||
mode="hybrid",
|
||||
max_chunks=5,
|
||||
ranker=WeightedRanker(alpha=0.7), # 70% vector, 30% keyword
|
||||
)
|
||||
|
||||
# Hybrid search with default RRF ranker
|
||||
config = RAGQueryConfig(
|
||||
mode="hybrid", max_chunks=5
|
||||
) # Will use RRF with impact_factor=60.0
|
||||
```
|
||||
|
||||
Note: The ranker configuration is only used in hybrid mode. For vector or keyword modes, the ranker parameter is ignored.
|
||||
|
||||
## Installation
|
||||
|
||||
You can install SQLite-Vec using pip:
|
||||
|
@ -129,3 +197,5 @@ pip install sqlite-vec
|
|||
## Documentation
|
||||
|
||||
See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general.
|
||||
|
||||
[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759).
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue