mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-25 09:05:37 +00:00 
			
		
		
		
	| # What does this PR do? This PR documents and benchmarks the performance tradeoffs between sqlite-vec and FAISS inline VectorDB providers. # Closes https://github.com/meta-llama/llama-stack/issues/1165 ## Test Plan The test was run using this script: <details> <summary>CLICK TO SHOW SCRIPT 👋 </summary> ```python import cProfile import os import uuid import time import random import string import matplotlib.pyplot as plt import pandas as pd from termcolor import cprint from llama_stack_client.types import Document from llama_stack.distribution.library_client import LlamaStackAsLibraryClient from memory_profiler import profile from line_profiler import LineProfiler os.environ["INFERENCE_MODEL"] = "llama3.2:3b-instruct-fp16" os.environ["LLAMA_STACK_CONFIG"] = "ollama" def generate_random_chars(count=400): return ''.join(random.choices(string.ascii_letters, k=count)) def generate_documents(num_docs: int, num_chars: int): documents = [ Document( document_id=f"doc-{i}", content=f"Document content for document {i} - {generate_random_chars(count=num_chars)}", mime_type="text/plain", metadata={}, ) for i in range(num_docs) ] return documents @profile def benchmark_write(client, vector_db_id, documents, batch_size=100): write_times = [] for i in range(0, len(documents), batch_size): batch = documents[i:i + batch_size] start_time = time.time() client.tool_runtime.rag_tool.insert( documents=batch, vector_db_id=vector_db_id, chunk_size_in_tokens=512, ) end_time = time.time() write_times.append(end_time - start_time) return write_times @profile def benchmark_read(client, provider_id, vector_db_id, user_prompts): response_times = [] for prompt in user_prompts: start_time = time.time() response = client.vector_io.query( vector_db_id=vector_db_id, query=prompt, ) end_time = time.time() response_times.append(end_time - start_time) return response_times def profile_functions(): profiler = LineProfiler() profiler.add_function(benchmark_write) profiler.add_function(benchmark_read) return profiler def plot_results(output, batch_size): # Create a DataFrame for easy manipulation df_sqlite = pd.DataFrame(output['sqlite-vec']) df_faiss = pd.DataFrame(output['faiss']) df_sqlite['write_times'] *= 1000 df_faiss['write_times'] *= 1000 avg_write_sqlite = df_sqlite['write_times'].mean() avg_write_faiss = df_faiss['write_times'].mean() avg_read_sqlite = df_sqlite['read_times'].mean() avg_read_faiss = df_faiss['read_times'].mean() plt.figure(figsize=(12, 6)) plt.hist(df_sqlite['write_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Write Times') plt.hist(df_faiss['write_times'], bins=10, alpha=0.5, color='red', label='faiss Write Times') plt.axvline(avg_write_sqlite, color='blue', linestyle='--', label=f'Average Write Time (sqlite-vec): {avg_write_sqlite:.3f} ms') plt.axvline(avg_write_faiss, color='red', linestyle='--', label=f'Average Write Time (faiss): {avg_write_faiss:.3f} ms') plt.title(f'Histogram of Write Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]} with batch size = {batch_size}') plt.xlabel('Time (milliseconds)') plt.ylabel('Density') plt.legend() plt.savefig('write_time_comparison.png') plt.close() plt.figure(figsize=(12, 6)) plt.hist(df_sqlite['read_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Read Times') plt.hist(df_faiss['read_times'], bins=10, alpha=0.5, color='red', label='faiss Read Times') plt.axvline(avg_read_sqlite, color='blue', linestyle='--', label=f'Average Read Time (sqlite-vec): {avg_read_sqlite:.3f} ms') plt.axvline(avg_read_faiss, color='red', linestyle='--', label=f'Average Read Time (faiss): {avg_read_faiss:.3f} ms') plt.title(f'Histogram of Read Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]}') plt.xlabel('Time (milliseconds)') plt.ylabel('Density') plt.legend() plt.savefig('read_time_comparison.png') plt.close() plt.figure(figsize=(12, 6)) plt.hist(df_sqlite['read_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Read Times') plt.hist(df_faiss['read_times'], bins=10, alpha=0.5, color='red', label='faiss Read Times') plt.axvline(avg_read_sqlite, color='blue', linestyle='--', label=f'Average Read Time (sqlite-vec): {avg_read_sqlite:.3f} ms') plt.axvline(avg_read_faiss, color='red', linestyle='--', label=f'Average Read Time (faiss): {avg_read_faiss:.3f} ms') plt.title(f'Histogram of Read Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]}') plt.xlabel('Time (milliseconds)') plt.ylabel('Density') plt.legend() plt.savefig('read_time_comparison.png') plt.close() plt.figure(figsize=(12, 6)) plt.plot(df_sqlite.index, df_sqlite['write_times'], marker='o', markersize=4, linestyle='-', color='blue', label='sqlite-vec Write Times') plt.plot(df_faiss.index, df_faiss['write_times'], marker='x', markersize=4, linestyle='-', color='red', label='faiss Write Times') plt.title(f'Write Times by Operation Sequence\n(batch size = {batch_size})') plt.xlabel('Write Operation Sequence') plt.ylabel('Time (milliseconds)') plt.legend() plt.grid(True, linestyle='--', alpha=0.7) plt.tight_layout() plt.savefig('write_time_sequence.png') plt.close() # Print out the summary table print("\nPerformance Summary for sqlite-vec:") print(df_sqlite) # Print out the summary table print("\nPerformance Summary for faiss:") print(df_faiss) def main(): # Initialize the client client = LlamaStackAsLibraryClient("ollama") vector_db_id = f"test-vector-db-{uuid.uuid4().hex}" _ = client.initialize() # Generate a large dataset num_chars = 50 num_docs = 100 num_writes = 100 write_batch_size = 100 num_reads = 100 documents = generate_documents(num_docs * write_batch_size, num_chars) user_prompts = [ f"Tell me about document {i}" for i in range(1, num_reads + 1) ] providers = ["sqlite-vec", "faiss"] output = { provider_id: {"write_times": None, "read_times": None} for provider_id in providers } # Benchmark writes and reads for SQLite and Faiss for provider_id in providers: cprint(f"Benchmarking provider: {provider_id}", "yellow") client.vector_dbs.register( provider_id=provider_id, vector_db_id=vector_db_id, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, ) write_times = benchmark_write(client, vector_db_id, documents, write_batch_size) average_write_time_ms = sum(write_times) / len(write_times) * 1000. cprint(f"Average write time for {provider_id} is {average_write_time_ms:.2f} milliseconds for {num_writes} runs", "blue") cprint(f"Benchmarking reads for provider: {provider_id}", "yellow") read_times = benchmark_read(client, provider_id, vector_db_id, user_prompts) average_read_time_ms = sum(read_times) / len(read_times) * 1000. cprint(f"Average read time for {provider_id} is {average_read_time_ms:.2f} milliseconds for {num_reads} runs", "blue") client.vector_dbs.unregister(vector_db_id=vector_db_id) output[provider_id]['write_times'] = write_times output[provider_id]['read_times'] = read_times # Generate plots and summary plot_results(output, write_batch_size) if __name__ == "__main__": cProfile.run('main()', 'profile_output.prof') ``` </details> --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> | ||
|---|---|---|
| .. | ||
| vector_io | ||