mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 10:54:19 +00:00
4 commits
Author | SHA1 | Message | Date | |
---|---|---|---|---|
|
e92301f2d7
|
feat(sqlite-vec): enable keyword search for sqlite-vec (#1439)
# What does this PR do? This PR introduces support for keyword based FTS5 search with BM25 relevance scoring. It makes changes to the existing EmbeddingIndex base class in order to support a search_mode and query_str parameter, that can be used for keyword based search implementations. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan run ``` pytest llama_stack/providers/tests/vector_io/test_sqlite_vec.py -v -s --tb=short --disable-warnings --asyncio-mode=auto ``` Output: ``` pytest llama_stack/providers/tests/vector_io/test_sqlite_vec.py -v -s --tb=short --disable-warnings --asyncio-mode=auto /Users/vnarsing/miniconda3/envs/stack-client/lib/python3.10/site-packages/pytest_asyncio/plugin.py:207: PytestDeprecationWarning: The configuration option "asyncio_default_fixture_loop_scope" is unset. The event loop scope for asynchronous fixtures will default to the fixture caching scope. Future versions of pytest-asyncio will default the loop scope for asynchronous fixtures to function scope. Set the default fixture loop scope explicitly in order to avoid unexpected behavior in the future. Valid fixture loop scopes are: "function", "class", "module", "package", "session" warnings.warn(PytestDeprecationWarning(_DEFAULT_FIXTURE_LOOP_SCOPE_UNSET)) ====================================================== test session starts ======================================================= platform darwin -- Python 3.10.16, pytest-8.3.4, pluggy-1.5.0 -- /Users/vnarsing/miniconda3/envs/stack-client/bin/python cachedir: .pytest_cache metadata: {'Python': '3.10.16', 'Platform': 'macOS-14.7.4-arm64-arm-64bit', 'Packages': {'pytest': '8.3.4', 'pluggy': '1.5.0'}, 'Plugins': {'html': '4.1.1', 'metadata': '3.1.1', 'asyncio': '0.25.3', 'anyio': '4.8.0'}} rootdir: /Users/vnarsing/go/src/github/meta-llama/llama-stack configfile: pyproject.toml plugins: html-4.1.1, metadata-3.1.1, asyncio-0.25.3, anyio-4.8.0 asyncio: mode=auto, asyncio_default_fixture_loop_scope=None collected 7 items llama_stack/providers/tests/vector_io/test_sqlite_vec.py::test_add_chunks PASSED llama_stack/providers/tests/vector_io/test_sqlite_vec.py::test_query_chunks_vector PASSED llama_stack/providers/tests/vector_io/test_sqlite_vec.py::test_query_chunks_fts PASSED llama_stack/providers/tests/vector_io/test_sqlite_vec.py::test_chunk_id_conflict PASSED llama_stack/providers/tests/vector_io/test_sqlite_vec.py::test_register_vector_db PASSED llama_stack/providers/tests/vector_io/test_sqlite_vec.py::test_unregister_vector_db PASSED llama_stack/providers/tests/vector_io/test_sqlite_vec.py::test_generate_chunk_id PASSED ``` For reference, with the implementation, the fts table looks like below: ``` Chunk ID: 9fbc39ce-c729-64a2-260f-c5ec9bb2a33e, Content: Sentence 0 from document 0 Chunk ID: 94062914-3e23-44cf-1e50-9e25821ba882, Content: Sentence 1 from document 0 Chunk ID: e6cfd559-4641-33ba-6ce1-7038226495eb, Content: Sentence 2 from document 0 Chunk ID: 1383af9b-f1f0-f417-4de5-65fe9456cc20, Content: Sentence 3 from document 0 Chunk ID: 2db19b1a-de14-353b-f4e1-085e8463361c, Content: Sentence 4 from document 0 Chunk ID: 9faf986a-f028-7714-068a-1c795e8f2598, Content: Sentence 5 from document 0 Chunk ID: ef593ead-5a4a-392f-7ad8-471a50f033e8, Content: Sentence 6 from document 0 Chunk ID: e161950f-021f-7300-4d05-3166738b94cf, Content: Sentence 7 from document 0 Chunk ID: 90610fc4-67c1-e740-f043-709c5978867a, Content: Sentence 8 from document 0 Chunk ID: 97712879-6fff-98ad-0558-e9f42e6b81d3, Content: Sentence 9 from document 0 Chunk ID: aea70411-51df-61ba-d2f0-cb2b5972c210, Content: Sentence 0 from document 1 Chunk ID: b678a463-7b84-92b8-abb2-27e9a1977e3c, Content: Sentence 1 from document 1 Chunk ID: 27bd63da-909c-1606-a109-75bdb9479882, Content: Sentence 2 from document 1 Chunk ID: a2ad49ad-f9be-5372-e0c7-7b0221d0b53e, Content: Sentence 3 from document 1 Chunk ID: cac53bcd-1965-082a-c0f4-ceee7323fc70, Content: Sentence 4 from document 1 ``` Query results: Result 1: Sentence 5 from document 0 Result 2: Sentence 5 from document 1 Result 3: Sentence 5 from document 2 [//]: # (## Documentation) --------- Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com> |
||
|
37b6da37ba
|
docs: Document sqlite-vec faiss comparison (#1821)
# What does this PR do? This PR documents and benchmarks the performance tradeoffs between sqlite-vec and FAISS inline VectorDB providers. # Closes https://github.com/meta-llama/llama-stack/issues/1165 ## Test Plan The test was run using this script: <details> <summary>CLICK TO SHOW SCRIPT 👋 </summary> ```python import cProfile import os import uuid import time import random import string import matplotlib.pyplot as plt import pandas as pd from termcolor import cprint from llama_stack_client.types import Document from llama_stack.distribution.library_client import LlamaStackAsLibraryClient from memory_profiler import profile from line_profiler import LineProfiler os.environ["INFERENCE_MODEL"] = "llama3.2:3b-instruct-fp16" os.environ["LLAMA_STACK_CONFIG"] = "ollama" def generate_random_chars(count=400): return ''.join(random.choices(string.ascii_letters, k=count)) def generate_documents(num_docs: int, num_chars: int): documents = [ Document( document_id=f"doc-{i}", content=f"Document content for document {i} - {generate_random_chars(count=num_chars)}", mime_type="text/plain", metadata={}, ) for i in range(num_docs) ] return documents @profile def benchmark_write(client, vector_db_id, documents, batch_size=100): write_times = [] for i in range(0, len(documents), batch_size): batch = documents[i:i + batch_size] start_time = time.time() client.tool_runtime.rag_tool.insert( documents=batch, vector_db_id=vector_db_id, chunk_size_in_tokens=512, ) end_time = time.time() write_times.append(end_time - start_time) return write_times @profile def benchmark_read(client, provider_id, vector_db_id, user_prompts): response_times = [] for prompt in user_prompts: start_time = time.time() response = client.vector_io.query( vector_db_id=vector_db_id, query=prompt, ) end_time = time.time() response_times.append(end_time - start_time) return response_times def profile_functions(): profiler = LineProfiler() profiler.add_function(benchmark_write) profiler.add_function(benchmark_read) return profiler def plot_results(output, batch_size): # Create a DataFrame for easy manipulation df_sqlite = pd.DataFrame(output['sqlite-vec']) df_faiss = pd.DataFrame(output['faiss']) df_sqlite['write_times'] *= 1000 df_faiss['write_times'] *= 1000 avg_write_sqlite = df_sqlite['write_times'].mean() avg_write_faiss = df_faiss['write_times'].mean() avg_read_sqlite = df_sqlite['read_times'].mean() avg_read_faiss = df_faiss['read_times'].mean() plt.figure(figsize=(12, 6)) plt.hist(df_sqlite['write_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Write Times') plt.hist(df_faiss['write_times'], bins=10, alpha=0.5, color='red', label='faiss Write Times') plt.axvline(avg_write_sqlite, color='blue', linestyle='--', label=f'Average Write Time (sqlite-vec): {avg_write_sqlite:.3f} ms') plt.axvline(avg_write_faiss, color='red', linestyle='--', label=f'Average Write Time (faiss): {avg_write_faiss:.3f} ms') plt.title(f'Histogram of Write Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]} with batch size = {batch_size}') plt.xlabel('Time (milliseconds)') plt.ylabel('Density') plt.legend() plt.savefig('write_time_comparison.png') plt.close() plt.figure(figsize=(12, 6)) plt.hist(df_sqlite['read_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Read Times') plt.hist(df_faiss['read_times'], bins=10, alpha=0.5, color='red', label='faiss Read Times') plt.axvline(avg_read_sqlite, color='blue', linestyle='--', label=f'Average Read Time (sqlite-vec): {avg_read_sqlite:.3f} ms') plt.axvline(avg_read_faiss, color='red', linestyle='--', label=f'Average Read Time (faiss): {avg_read_faiss:.3f} ms') plt.title(f'Histogram of Read Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]}') plt.xlabel('Time (milliseconds)') plt.ylabel('Density') plt.legend() plt.savefig('read_time_comparison.png') plt.close() plt.figure(figsize=(12, 6)) plt.hist(df_sqlite['read_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Read Times') plt.hist(df_faiss['read_times'], bins=10, alpha=0.5, color='red', label='faiss Read Times') plt.axvline(avg_read_sqlite, color='blue', linestyle='--', label=f'Average Read Time (sqlite-vec): {avg_read_sqlite:.3f} ms') plt.axvline(avg_read_faiss, color='red', linestyle='--', label=f'Average Read Time (faiss): {avg_read_faiss:.3f} ms') plt.title(f'Histogram of Read Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]}') plt.xlabel('Time (milliseconds)') plt.ylabel('Density') plt.legend() plt.savefig('read_time_comparison.png') plt.close() plt.figure(figsize=(12, 6)) plt.plot(df_sqlite.index, df_sqlite['write_times'], marker='o', markersize=4, linestyle='-', color='blue', label='sqlite-vec Write Times') plt.plot(df_faiss.index, df_faiss['write_times'], marker='x', markersize=4, linestyle='-', color='red', label='faiss Write Times') plt.title(f'Write Times by Operation Sequence\n(batch size = {batch_size})') plt.xlabel('Write Operation Sequence') plt.ylabel('Time (milliseconds)') plt.legend() plt.grid(True, linestyle='--', alpha=0.7) plt.tight_layout() plt.savefig('write_time_sequence.png') plt.close() # Print out the summary table print("\nPerformance Summary for sqlite-vec:") print(df_sqlite) # Print out the summary table print("\nPerformance Summary for faiss:") print(df_faiss) def main(): # Initialize the client client = LlamaStackAsLibraryClient("ollama") vector_db_id = f"test-vector-db-{uuid.uuid4().hex}" _ = client.initialize() # Generate a large dataset num_chars = 50 num_docs = 100 num_writes = 100 write_batch_size = 100 num_reads = 100 documents = generate_documents(num_docs * write_batch_size, num_chars) user_prompts = [ f"Tell me about document {i}" for i in range(1, num_reads + 1) ] providers = ["sqlite-vec", "faiss"] output = { provider_id: {"write_times": None, "read_times": None} for provider_id in providers } # Benchmark writes and reads for SQLite and Faiss for provider_id in providers: cprint(f"Benchmarking provider: {provider_id}", "yellow") client.vector_dbs.register( provider_id=provider_id, vector_db_id=vector_db_id, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, ) write_times = benchmark_write(client, vector_db_id, documents, write_batch_size) average_write_time_ms = sum(write_times) / len(write_times) * 1000. cprint(f"Average write time for {provider_id} is {average_write_time_ms:.2f} milliseconds for {num_writes} runs", "blue") cprint(f"Benchmarking reads for provider: {provider_id}", "yellow") read_times = benchmark_read(client, provider_id, vector_db_id, user_prompts) average_read_time_ms = sum(read_times) / len(read_times) * 1000. cprint(f"Average read time for {provider_id} is {average_read_time_ms:.2f} milliseconds for {num_reads} runs", "blue") client.vector_dbs.unregister(vector_db_id=vector_db_id) output[provider_id]['write_times'] = write_times output[provider_id]['read_times'] = read_times # Generate plots and summary plot_results(output, write_batch_size) if __name__ == "__main__": cProfile.run('main()', 'profile_output.prof') ``` </details> --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> |
||
|
8bbd52bb9f
|
chore: remove dependency on llama_models completely (#1344) | ||
|
19ae4b35d9
|
docs: Adding Provider sections to docs (#1195)
# What does this PR do? Adding Provider sections to docs (some of these will be empty and need updating). This PR is still a draft while I seek feedback from other contributors. I opened it to make the structure visible in the linked GitHub Issue. # Closes https://github.com/meta-llama/llama-stack/issues/1189 - Providers Overview Page  - SQLite-Vec specific page  ## Test Plan N/A [//]: # (## Documentation) --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> |