chore(cleanup)!: remove tool_runtime.rag_tool (#3871)

Kill the `builtin::rag` tool group completely since it is no longer
targeted. We use the Responses implementation for knowledge_search which
uses the `openai_vector_stores` pathway.

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Ashwin Bharambe 2025-10-20 22:26:21 -07:00 committed by GitHub
parent 5aaf1a8bca
commit 0e96279bee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 17 additions and 3114 deletions

View file

@ -42,6 +42,7 @@ def available_providers() -> list[ProviderSpec]:
# CrossEncoder depends on torchao.quantization
pip_packages=[
"torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
"numpy tqdm transformers",
"sentence-transformers --no-deps",
# required by some SentenceTransformers architectures for tensor rearrange/merge ops
"einops",

View file

@ -7,33 +7,13 @@
from llama_stack.providers.datatypes import (
Api,
InlineProviderSpec,
ProviderSpec,
RemoteProviderSpec,
)
from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
def available_providers() -> list[ProviderSpec]:
return [
InlineProviderSpec(
api=Api.tool_runtime,
provider_type="inline::rag-runtime",
pip_packages=DEFAULT_VECTOR_IO_DEPS
+ [
"tqdm",
"numpy",
"scikit-learn",
"scipy",
"nltk",
"sentencepiece",
"transformers",
],
module="llama_stack.providers.inline.tool_runtime.rag",
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
api_dependencies=[Api.vector_io, Api.inference, Api.files],
description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.",
),
RemoteProviderSpec(
api=Api.tool_runtime,
adapter_type="brave-search",

View file

@ -119,7 +119,7 @@ Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, i
#### Empirical Example
Consider the histogram below in which 10,000 randomly generated strings were inserted
in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`.
in batches of 100 into both Faiss and sqlite-vec.
```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
:alt: Comparison of SQLite-Vec and Faiss write times