mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
Merge c71bcd5479
into 2f58d87c22
This commit is contained in:
commit
c4f3d41c57
6 changed files with 109 additions and 24 deletions
|
@ -4,12 +4,12 @@
|
|||
|
||||
Agents API for creating and interacting with agentic systems.
|
||||
|
||||
Main functionalities provided by this API:
|
||||
- Create agents with specific instructions and ability to use tools.
|
||||
- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
|
||||
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
|
||||
- Agents can be provided with various shields (see the Safety API for more details).
|
||||
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
|
||||
Main functionalities provided by this API:
|
||||
- Create agents with specific instructions and ability to use tools.
|
||||
- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
|
||||
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
|
||||
- Agents can be provided with various shields (see the Safety API for more details).
|
||||
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
|
||||
|
||||
This section contains documentation for all available providers for the **agents** API.
|
||||
|
||||
|
|
|
@ -3,15 +3,15 @@
|
|||
## Overview
|
||||
|
||||
The Batches API enables efficient processing of multiple requests in a single operation,
|
||||
particularly useful for processing large datasets, batch evaluation workflows, and
|
||||
cost-effective inference at scale.
|
||||
particularly useful for processing large datasets, batch evaluation workflows, and
|
||||
cost-effective inference at scale.
|
||||
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
This API provides the following extensions:
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
|
||||
This section contains documentation for all available providers for the **batches** API.
|
||||
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
|
||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||
|
||||
This section contains documentation for all available providers for the **inference** API.
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import asyncio
|
||||
import heapq
|
||||
import json
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
@ -30,6 +31,7 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|||
EmbeddingIndex,
|
||||
VectorDBWithIndex,
|
||||
)
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
|
||||
|
||||
from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
|
||||
|
||||
|
@ -114,7 +116,38 @@ class ChromaIndex(EmbeddingIndex):
|
|||
k: int,
|
||||
score_threshold: float,
|
||||
) -> QueryChunksResponse:
|
||||
raise NotImplementedError("Keyword search is not supported in Chroma")
|
||||
results = await maybe_await(
|
||||
self.collection.query(
|
||||
query_texts=[query_string],
|
||||
where_document={"$contains": query_string},
|
||||
n_results=k,
|
||||
include=["documents", "distances"],
|
||||
)
|
||||
)
|
||||
|
||||
distances = results["distances"][0] if results["distances"] else []
|
||||
documents = results["documents"][0] if results["documents"] else []
|
||||
|
||||
chunks = []
|
||||
scores = []
|
||||
|
||||
for dist, doc in zip(distances, documents, strict=False):
|
||||
try:
|
||||
doc_data = json.loads(doc)
|
||||
chunk = Chunk(**doc_data)
|
||||
except Exception:
|
||||
log.exception(f"Failed to load chunk: {doc}")
|
||||
continue
|
||||
|
||||
score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0
|
||||
|
||||
if score < score_threshold:
|
||||
continue
|
||||
|
||||
chunks.append(chunk)
|
||||
scores.append(score)
|
||||
|
||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||
|
||||
async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
|
||||
"""Delete a single chunk from the Chroma collection by its ID."""
|
||||
|
@ -130,7 +163,57 @@ class ChromaIndex(EmbeddingIndex):
|
|||
reranker_type: str,
|
||||
reranker_params: dict[str, Any] | None = None,
|
||||
) -> QueryChunksResponse:
|
||||
raise NotImplementedError("Hybrid search is not supported in Chroma")
|
||||
"""
|
||||
Hybrid search combining vector similarity and keyword search using configurable reranking.
|
||||
Args:
|
||||
embedding: The query embedding vector
|
||||
query_string: The text query for keyword search
|
||||
k: Number of results to return
|
||||
score_threshold: Minimum similarity score threshold
|
||||
reranker_type: Type of reranker to use ("rrf" or "weighted")
|
||||
reranker_params: Parameters for the reranker
|
||||
Returns:
|
||||
QueryChunksResponse with combined results
|
||||
"""
|
||||
if reranker_params is None:
|
||||
reranker_params = {}
|
||||
|
||||
# Get results from both search methods
|
||||
vector_response = await self.query_vector(embedding, k, score_threshold)
|
||||
keyword_response = await self.query_keyword(query_string, k, score_threshold)
|
||||
|
||||
# Convert responses to score dictionaries using chunk_id
|
||||
vector_scores = {
|
||||
chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
|
||||
}
|
||||
keyword_scores = {
|
||||
chunk.chunk_id: score
|
||||
for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
|
||||
}
|
||||
|
||||
# Combine scores using the reranking utility
|
||||
combined_scores = WeightedInMemoryAggregator.combine_search_results(
|
||||
vector_scores, keyword_scores, reranker_type, reranker_params
|
||||
)
|
||||
|
||||
# Efficient top-k selection because it only tracks the k best candidates it's seen so far
|
||||
top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
|
||||
|
||||
# Filter by score threshold
|
||||
filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
|
||||
|
||||
# Create a map of chunk_id to chunk for both responses
|
||||
chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
|
||||
|
||||
# Use the map to look up chunks by their IDs
|
||||
chunks = []
|
||||
scores = []
|
||||
for doc_id, score in filtered_items:
|
||||
if doc_id in chunk_map:
|
||||
chunks.append(chunk_map[doc_id])
|
||||
scores.append(score)
|
||||
|
||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||
|
||||
|
||||
class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
|
||||
|
|
2
uv.lock
generated
2
uv.lock
generated
|
@ -1767,6 +1767,7 @@ dependencies = [
|
|||
{ name = "opentelemetry-exporter-otlp-proto-http" },
|
||||
{ name = "opentelemetry-sdk" },
|
||||
{ name = "pillow" },
|
||||
{ name = "pre-commit" },
|
||||
{ name = "prompt-toolkit" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "python-dotenv" },
|
||||
|
@ -1892,6 +1893,7 @@ requires-dist = [
|
|||
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" },
|
||||
{ name = "pandas", marker = "extra == 'ui'" },
|
||||
{ name = "pillow" },
|
||||
{ name = "pre-commit", specifier = ">=4.2.0" },
|
||||
{ name = "prompt-toolkit" },
|
||||
{ name = "pydantic", specifier = ">=2.11.9" },
|
||||
{ name = "python-dotenv" },
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue