mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-09 13:14:39 +00:00
Merge branch 'main' into bugfix/model-type
This commit is contained in:
commit
41948027df
205 changed files with 22222 additions and 10702 deletions
|
@ -294,12 +294,12 @@ class VectorDBWithIndex:
|
|||
_validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
|
||||
|
||||
if chunks_to_embed:
|
||||
resp = await self.inference_api.embeddings(
|
||||
resp = await self.inference_api.openai_embeddings(
|
||||
self.vector_db.embedding_model,
|
||||
[c.content for c in chunks_to_embed],
|
||||
)
|
||||
for c, embedding in zip(chunks_to_embed, resp.embeddings, strict=False):
|
||||
c.embedding = embedding
|
||||
for c, data in zip(chunks_to_embed, resp.data, strict=False):
|
||||
c.embedding = data.embedding
|
||||
|
||||
embeddings = np.array([c.embedding for c in chunks], dtype=np.float32)
|
||||
await self.index.add_chunks(chunks, embeddings)
|
||||
|
@ -334,8 +334,8 @@ class VectorDBWithIndex:
|
|||
if mode == "keyword":
|
||||
return await self.index.query_keyword(query_string, k, score_threshold)
|
||||
|
||||
embeddings_response = await self.inference_api.embeddings(self.vector_db.embedding_model, [query_string])
|
||||
query_vector = np.array(embeddings_response.embeddings[0], dtype=np.float32)
|
||||
embeddings_response = await self.inference_api.openai_embeddings(self.vector_db.embedding_model, [query_string])
|
||||
query_vector = np.array(embeddings_response.data[0].embedding, dtype=np.float32)
|
||||
if mode == "hybrid":
|
||||
return await self.index.query_hybrid(
|
||||
query_vector, query_string, k, score_threshold, reranker_type, reranker_params
|
||||
|
|
|
@ -23,6 +23,7 @@ from sqlalchemy import (
|
|||
)
|
||||
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
||||
from sqlalchemy.ext.asyncio.engine import AsyncEngine
|
||||
from sqlalchemy.sql.elements import ColumnElement
|
||||
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.log import get_logger
|
||||
|
@ -43,6 +44,30 @@ TYPE_MAPPING: dict[ColumnType, Any] = {
|
|||
}
|
||||
|
||||
|
||||
def _build_where_expr(column: ColumnElement, value: Any) -> ColumnElement:
|
||||
"""Return a SQLAlchemy expression for a where condition.
|
||||
|
||||
`value` may be a simple scalar (equality) or a mapping like {">": 123}.
|
||||
The returned expression is a SQLAlchemy ColumnElement usable in query.where(...).
|
||||
"""
|
||||
if isinstance(value, Mapping):
|
||||
if len(value) != 1:
|
||||
raise ValueError(f"Operator mapping must have a single operator, got: {value}")
|
||||
op, operand = next(iter(value.items()))
|
||||
if op == "==" or op == "=":
|
||||
return column == operand
|
||||
if op == ">":
|
||||
return column > operand
|
||||
if op == "<":
|
||||
return column < operand
|
||||
if op == ">=":
|
||||
return column >= operand
|
||||
if op == "<=":
|
||||
return column <= operand
|
||||
raise ValueError(f"Unsupported operator '{op}' in where mapping")
|
||||
return column == value
|
||||
|
||||
|
||||
class SqlAlchemySqlStoreImpl(SqlStore):
|
||||
def __init__(self, config: SqlAlchemySqlStoreConfig):
|
||||
self.config = config
|
||||
|
@ -111,7 +136,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
|
|||
|
||||
if where:
|
||||
for key, value in where.items():
|
||||
query = query.where(table_obj.c[key] == value)
|
||||
query = query.where(_build_where_expr(table_obj.c[key], value))
|
||||
|
||||
if where_sql:
|
||||
query = query.where(text(where_sql))
|
||||
|
@ -222,7 +247,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
|
|||
async with self.async_session() as session:
|
||||
stmt = self.metadata.tables[table].update()
|
||||
for key, value in where.items():
|
||||
stmt = stmt.where(self.metadata.tables[table].c[key] == value)
|
||||
stmt = stmt.where(_build_where_expr(self.metadata.tables[table].c[key], value))
|
||||
await session.execute(stmt, data)
|
||||
await session.commit()
|
||||
|
||||
|
@ -233,7 +258,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
|
|||
async with self.async_session() as session:
|
||||
stmt = self.metadata.tables[table].delete()
|
||||
for key, value in where.items():
|
||||
stmt = stmt.where(self.metadata.tables[table].c[key] == value)
|
||||
stmt = stmt.where(_build_where_expr(self.metadata.tables[table].c[key], value))
|
||||
await session.execute(stmt)
|
||||
await session.commit()
|
||||
|
||||
|
|
|
@ -37,3 +37,122 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str:
|
|||
else:
|
||||
s = proper_case(re.sub(r"[^a-zA-Z0-9]", "", name))
|
||||
return s
|
||||
|
||||
|
||||
class WeightedInMemoryAggregator:
|
||||
@staticmethod
|
||||
def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
|
||||
"""
|
||||
Normalize scores to 0-1 range using min-max normalization.
|
||||
|
||||
Args:
|
||||
scores: dictionary of scores with document IDs as keys and scores as values
|
||||
|
||||
Returns:
|
||||
Normalized scores with document IDs as keys and normalized scores as values
|
||||
"""
|
||||
if not scores:
|
||||
return {}
|
||||
min_score, max_score = min(scores.values()), max(scores.values())
|
||||
score_range = max_score - min_score
|
||||
if score_range > 0:
|
||||
return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
|
||||
return dict.fromkeys(scores, 1.0)
|
||||
|
||||
@staticmethod
|
||||
def weighted_rerank(
|
||||
vector_scores: dict[str, float],
|
||||
keyword_scores: dict[str, float],
|
||||
alpha: float = 0.5,
|
||||
) -> dict[str, float]:
|
||||
"""
|
||||
Rerank via weighted average of scores.
|
||||
|
||||
Args:
|
||||
vector_scores: scores from vector search
|
||||
keyword_scores: scores from keyword search
|
||||
alpha: weight factor between 0 and 1 (default: 0.5)
|
||||
0 = keyword only, 1 = vector only, 0.5 = equal weight
|
||||
|
||||
Returns:
|
||||
All unique document IDs with weighted combined scores
|
||||
"""
|
||||
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
|
||||
normalized_vector_scores = WeightedInMemoryAggregator._normalize_scores(vector_scores)
|
||||
normalized_keyword_scores = WeightedInMemoryAggregator._normalize_scores(keyword_scores)
|
||||
|
||||
# Weighted formula: score = (1-alpha) * keyword_score + alpha * vector_score
|
||||
# alpha=0 means keyword only, alpha=1 means vector only
|
||||
return {
|
||||
doc_id: ((1 - alpha) * normalized_keyword_scores.get(doc_id, 0.0))
|
||||
+ (alpha * normalized_vector_scores.get(doc_id, 0.0))
|
||||
for doc_id in all_ids
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def rrf_rerank(
|
||||
vector_scores: dict[str, float],
|
||||
keyword_scores: dict[str, float],
|
||||
impact_factor: float = 60.0,
|
||||
) -> dict[str, float]:
|
||||
"""
|
||||
Rerank via Reciprocal Rank Fusion.
|
||||
|
||||
Args:
|
||||
vector_scores: scores from vector search
|
||||
keyword_scores: scores from keyword search
|
||||
impact_factor: impact factor for RRF (default: 60.0)
|
||||
|
||||
Returns:
|
||||
All unique document IDs with RRF combined scores
|
||||
"""
|
||||
|
||||
# Convert scores to ranks
|
||||
vector_ranks = {
|
||||
doc_id: i + 1
|
||||
for i, (doc_id, _) in enumerate(sorted(vector_scores.items(), key=lambda x: x[1], reverse=True))
|
||||
}
|
||||
keyword_ranks = {
|
||||
doc_id: i + 1
|
||||
for i, (doc_id, _) in enumerate(sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True))
|
||||
}
|
||||
|
||||
all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
|
||||
rrf_scores = {}
|
||||
for doc_id in all_ids:
|
||||
vector_rank = vector_ranks.get(doc_id, float("inf"))
|
||||
keyword_rank = keyword_ranks.get(doc_id, float("inf"))
|
||||
|
||||
# RRF formula: score = 1/(k + r) where k is impact_factor (default: 60.0) and r is the rank
|
||||
rrf_scores[doc_id] = (1.0 / (impact_factor + vector_rank)) + (1.0 / (impact_factor + keyword_rank))
|
||||
return rrf_scores
|
||||
|
||||
@staticmethod
|
||||
def combine_search_results(
|
||||
vector_scores: dict[str, float],
|
||||
keyword_scores: dict[str, float],
|
||||
reranker_type: str = "rrf",
|
||||
reranker_params: dict[str, float] | None = None,
|
||||
) -> dict[str, float]:
|
||||
"""
|
||||
Combine vector and keyword search results using specified reranking strategy.
|
||||
|
||||
Args:
|
||||
vector_scores: scores from vector search
|
||||
keyword_scores: scores from keyword search
|
||||
reranker_type: type of reranker to use (default: RERANKER_TYPE_RRF)
|
||||
reranker_params: parameters for the reranker
|
||||
|
||||
Returns:
|
||||
All unique document IDs with combined scores
|
||||
"""
|
||||
if reranker_params is None:
|
||||
reranker_params = {}
|
||||
|
||||
if reranker_type == "weighted":
|
||||
alpha = reranker_params.get("alpha", 0.5)
|
||||
return WeightedInMemoryAggregator.weighted_rerank(vector_scores, keyword_scores, alpha)
|
||||
else:
|
||||
# Default to RRF for None, RRF, or any unknown types
|
||||
impact_factor = reranker_params.get("impact_factor", 60.0)
|
||||
return WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue