Francisco Arceo
|
37b6da37ba
|
docs: Document sqlite-vec faiss comparison (#1821)
# What does this PR do?
This PR documents and benchmarks the performance tradeoffs between
sqlite-vec and FAISS inline VectorDB providers.
# Closes https://github.com/meta-llama/llama-stack/issues/1165
## Test Plan
The test was run using this script:
<details>
<summary>CLICK TO SHOW SCRIPT 👋 </summary>
```python
import cProfile
import os
import uuid
import time
import random
import string
import matplotlib.pyplot as plt
import pandas as pd
from termcolor import cprint
from llama_stack_client.types import Document
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
from memory_profiler import profile
from line_profiler import LineProfiler
os.environ["INFERENCE_MODEL"] = "llama3.2:3b-instruct-fp16"
os.environ["LLAMA_STACK_CONFIG"] = "ollama"
def generate_random_chars(count=400):
return ''.join(random.choices(string.ascii_letters, k=count))
def generate_documents(num_docs: int, num_chars: int):
documents = [
Document(
document_id=f"doc-{i}",
content=f"Document content for document {i} - {generate_random_chars(count=num_chars)}",
mime_type="text/plain",
metadata={},
)
for i in range(num_docs)
]
return documents
@profile
def benchmark_write(client, vector_db_id, documents, batch_size=100):
write_times = []
for i in range(0, len(documents), batch_size):
batch = documents[i:i + batch_size]
start_time = time.time()
client.tool_runtime.rag_tool.insert(
documents=batch,
vector_db_id=vector_db_id,
chunk_size_in_tokens=512,
)
end_time = time.time()
write_times.append(end_time - start_time)
return write_times
@profile
def benchmark_read(client, provider_id, vector_db_id, user_prompts):
response_times = []
for prompt in user_prompts:
start_time = time.time()
response = client.vector_io.query(
vector_db_id=vector_db_id,
query=prompt,
)
end_time = time.time()
response_times.append(end_time - start_time)
return response_times
def profile_functions():
profiler = LineProfiler()
profiler.add_function(benchmark_write)
profiler.add_function(benchmark_read)
return profiler
def plot_results(output, batch_size):
# Create a DataFrame for easy manipulation
df_sqlite = pd.DataFrame(output['sqlite-vec'])
df_faiss = pd.DataFrame(output['faiss'])
df_sqlite['write_times'] *= 1000
df_faiss['write_times'] *= 1000
avg_write_sqlite = df_sqlite['write_times'].mean()
avg_write_faiss = df_faiss['write_times'].mean()
avg_read_sqlite = df_sqlite['read_times'].mean()
avg_read_faiss = df_faiss['read_times'].mean()
plt.figure(figsize=(12, 6))
plt.hist(df_sqlite['write_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Write Times')
plt.hist(df_faiss['write_times'], bins=10, alpha=0.5, color='red', label='faiss Write Times')
plt.axvline(avg_write_sqlite, color='blue', linestyle='--',
label=f'Average Write Time (sqlite-vec): {avg_write_sqlite:.3f} ms')
plt.axvline(avg_write_faiss, color='red', linestyle='--',
label=f'Average Write Time (faiss): {avg_write_faiss:.3f} ms')
plt.title(f'Histogram of Write Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]} with batch size = {batch_size}')
plt.xlabel('Time (milliseconds)')
plt.ylabel('Density')
plt.legend()
plt.savefig('write_time_comparison.png')
plt.close()
plt.figure(figsize=(12, 6))
plt.hist(df_sqlite['read_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Read Times')
plt.hist(df_faiss['read_times'], bins=10, alpha=0.5, color='red', label='faiss Read Times')
plt.axvline(avg_read_sqlite, color='blue', linestyle='--',
label=f'Average Read Time (sqlite-vec): {avg_read_sqlite:.3f} ms')
plt.axvline(avg_read_faiss, color='red', linestyle='--',
label=f'Average Read Time (faiss): {avg_read_faiss:.3f} ms')
plt.title(f'Histogram of Read Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]}')
plt.xlabel('Time (milliseconds)')
plt.ylabel('Density')
plt.legend()
plt.savefig('read_time_comparison.png')
plt.close()
plt.figure(figsize=(12, 6))
plt.hist(df_sqlite['read_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Read Times')
plt.hist(df_faiss['read_times'], bins=10, alpha=0.5, color='red', label='faiss Read Times')
plt.axvline(avg_read_sqlite, color='blue', linestyle='--',
label=f'Average Read Time (sqlite-vec): {avg_read_sqlite:.3f} ms')
plt.axvline(avg_read_faiss, color='red', linestyle='--',
label=f'Average Read Time (faiss): {avg_read_faiss:.3f} ms')
plt.title(f'Histogram of Read Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]}')
plt.xlabel('Time (milliseconds)')
plt.ylabel('Density')
plt.legend()
plt.savefig('read_time_comparison.png')
plt.close()
plt.figure(figsize=(12, 6))
plt.plot(df_sqlite.index, df_sqlite['write_times'],
marker='o', markersize=4, linestyle='-', color='blue',
label='sqlite-vec Write Times')
plt.plot(df_faiss.index, df_faiss['write_times'],
marker='x', markersize=4, linestyle='-', color='red',
label='faiss Write Times')
plt.title(f'Write Times by Operation Sequence\n(batch size = {batch_size})')
plt.xlabel('Write Operation Sequence')
plt.ylabel('Time (milliseconds)')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('write_time_sequence.png')
plt.close()
# Print out the summary table
print("\nPerformance Summary for sqlite-vec:")
print(df_sqlite)
# Print out the summary table
print("\nPerformance Summary for faiss:")
print(df_faiss)
def main():
# Initialize the client
client = LlamaStackAsLibraryClient("ollama")
vector_db_id = f"test-vector-db-{uuid.uuid4().hex}"
_ = client.initialize()
# Generate a large dataset
num_chars = 50
num_docs = 100
num_writes = 100
write_batch_size = 100
num_reads = 100
documents = generate_documents(num_docs * write_batch_size, num_chars)
user_prompts = [
f"Tell me about document {i}" for i in range(1, num_reads + 1)
]
providers = ["sqlite-vec", "faiss"]
output = {
provider_id: {"write_times": None, "read_times": None} for provider_id in providers
}
# Benchmark writes and reads for SQLite and Faiss
for provider_id in providers:
cprint(f"Benchmarking provider: {provider_id}", "yellow")
client.vector_dbs.register(
provider_id=provider_id,
vector_db_id=vector_db_id,
embedding_model="all-MiniLM-L6-v2",
embedding_dimension=384,
)
write_times = benchmark_write(client, vector_db_id, documents, write_batch_size)
average_write_time_ms = sum(write_times) / len(write_times) * 1000.
cprint(f"Average write time for {provider_id} is {average_write_time_ms:.2f} milliseconds for {num_writes} runs", "blue")
cprint(f"Benchmarking reads for provider: {provider_id}", "yellow")
read_times = benchmark_read(client, provider_id, vector_db_id, user_prompts)
average_read_time_ms = sum(read_times) / len(read_times) * 1000.
cprint(f"Average read time for {provider_id} is {average_read_time_ms:.2f} milliseconds for {num_reads} runs", "blue")
client.vector_dbs.unregister(vector_db_id=vector_db_id)
output[provider_id]['write_times'] = write_times
output[provider_id]['read_times'] = read_times
# Generate plots and summary
plot_results(output, write_batch_size)
if __name__ == "__main__":
cProfile.run('main()', 'profile_output.prof')
```
</details>
---------
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
|
2025-03-28 17:41:33 +01:00 |
|