llama-stack-mirror/docs
Francisco Arceo 37b6da37ba
docs: Document sqlite-vec faiss comparison (#1821)
# What does this PR do?
This PR documents and benchmarks the performance tradeoffs between
sqlite-vec and FAISS inline VectorDB providers.

# Closes https://github.com/meta-llama/llama-stack/issues/1165

## Test Plan

The test was run using this script:

<details>
<summary>CLICK TO SHOW SCRIPT 👋  </summary>

```python

import cProfile
import os
import uuid
import time
import random
import string
import matplotlib.pyplot as plt
import pandas as pd
from termcolor import cprint
from llama_stack_client.types import Document
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
from memory_profiler import profile
from line_profiler import LineProfiler

os.environ["INFERENCE_MODEL"] = "llama3.2:3b-instruct-fp16"
os.environ["LLAMA_STACK_CONFIG"] = "ollama"

def generate_random_chars(count=400):
    return ''.join(random.choices(string.ascii_letters, k=count))

def generate_documents(num_docs: int, num_chars: int):
    documents = [
        Document(
            document_id=f"doc-{i}",
            content=f"Document content for document {i} - {generate_random_chars(count=num_chars)}",
            mime_type="text/plain",
            metadata={},
        )
        for i in range(num_docs)
    ]
    return documents


@profile
def benchmark_write(client, vector_db_id, documents, batch_size=100):
    write_times = []
    for i in range(0, len(documents), batch_size):
        batch = documents[i:i + batch_size]
        start_time = time.time()
        client.tool_runtime.rag_tool.insert(
            documents=batch,
            vector_db_id=vector_db_id,
            chunk_size_in_tokens=512,
        )
        end_time = time.time()
        write_times.append(end_time - start_time)

    return write_times

@profile
def benchmark_read(client, provider_id, vector_db_id, user_prompts):
    response_times = []
    for prompt in user_prompts:
        start_time = time.time()
        response = client.vector_io.query(
            vector_db_id=vector_db_id,
            query=prompt,
        )
        end_time = time.time()
        response_times.append(end_time - start_time)
    return response_times

def profile_functions():
    profiler = LineProfiler()
    profiler.add_function(benchmark_write)
    profiler.add_function(benchmark_read)
    return profiler


def plot_results(output, batch_size):
    # Create a DataFrame for easy manipulation
    df_sqlite = pd.DataFrame(output['sqlite-vec'])
    df_faiss = pd.DataFrame(output['faiss'])

    df_sqlite['write_times'] *= 1000
    df_faiss['write_times'] *= 1000

    avg_write_sqlite = df_sqlite['write_times'].mean()
    avg_write_faiss = df_faiss['write_times'].mean()
    avg_read_sqlite = df_sqlite['read_times'].mean()
    avg_read_faiss = df_faiss['read_times'].mean()

    plt.figure(figsize=(12, 6))
    plt.hist(df_sqlite['write_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Write Times')
    plt.hist(df_faiss['write_times'], bins=10, alpha=0.5, color='red', label='faiss Write Times')
    plt.axvline(avg_write_sqlite, color='blue', linestyle='--',
                label=f'Average Write Time (sqlite-vec): {avg_write_sqlite:.3f} ms')
    plt.axvline(avg_write_faiss, color='red', linestyle='--',
                label=f'Average Write Time (faiss): {avg_write_faiss:.3f} ms')
    plt.title(f'Histogram of Write Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]} with batch size = {batch_size}')
    plt.xlabel('Time (milliseconds)')
    plt.ylabel('Density')
    plt.legend()
    plt.savefig('write_time_comparison.png')
    plt.close()

    plt.figure(figsize=(12, 6))
    plt.hist(df_sqlite['read_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Read Times')
    plt.hist(df_faiss['read_times'], bins=10, alpha=0.5, color='red', label='faiss Read Times')
    plt.axvline(avg_read_sqlite, color='blue', linestyle='--',
                label=f'Average Read Time (sqlite-vec): {avg_read_sqlite:.3f} ms')
    plt.axvline(avg_read_faiss, color='red', linestyle='--',
                label=f'Average Read Time (faiss): {avg_read_faiss:.3f} ms')
    plt.title(f'Histogram of Read Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]}')
    plt.xlabel('Time (milliseconds)')
    plt.ylabel('Density')
    plt.legend()
    plt.savefig('read_time_comparison.png')
    plt.close()

    plt.figure(figsize=(12, 6))
    plt.hist(df_sqlite['read_times'], bins=10, alpha=0.5, color='blue', label='sqlite-vec Read Times')
    plt.hist(df_faiss['read_times'], bins=10, alpha=0.5, color='red', label='faiss Read Times')
    plt.axvline(avg_read_sqlite, color='blue', linestyle='--',
                label=f'Average Read Time (sqlite-vec): {avg_read_sqlite:.3f} ms')
    plt.axvline(avg_read_faiss, color='red', linestyle='--',
                label=f'Average Read Time (faiss): {avg_read_faiss:.3f} ms')
    plt.title(f'Histogram of Read Times for sqlite-vec and faiss\nn = {df_faiss.shape[0]}')
    plt.xlabel('Time (milliseconds)')
    plt.ylabel('Density')
    plt.legend()
    plt.savefig('read_time_comparison.png')
    plt.close()

    plt.figure(figsize=(12, 6))
    plt.plot(df_sqlite.index, df_sqlite['write_times'],
             marker='o', markersize=4, linestyle='-', color='blue',
             label='sqlite-vec Write Times')
    plt.plot(df_faiss.index, df_faiss['write_times'],
             marker='x', markersize=4, linestyle='-', color='red',
             label='faiss Write Times')

    plt.title(f'Write Times by Operation Sequence\n(batch size = {batch_size})')
    plt.xlabel('Write Operation Sequence')
    plt.ylabel('Time (milliseconds)')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.savefig('write_time_sequence.png')
    plt.close()
    # Print out the summary table
    print("\nPerformance Summary for sqlite-vec:")
    print(df_sqlite)

    # Print out the summary table
    print("\nPerformance Summary for faiss:")
    print(df_faiss)


def main():
    # Initialize the client
    client = LlamaStackAsLibraryClient("ollama")
    vector_db_id = f"test-vector-db-{uuid.uuid4().hex}"
    _ = client.initialize()

    # Generate a large dataset
    num_chars = 50
    num_docs = 100
    num_writes = 100
    write_batch_size = 100
    num_reads = 100

    documents = generate_documents(num_docs * write_batch_size, num_chars)
    user_prompts = [
        f"Tell me about document {i}" for i in range(1, num_reads + 1)
    ]

    providers = ["sqlite-vec", "faiss"]
    output = {
        provider_id: {"write_times": None, "read_times": None} for provider_id in providers
    }

    # Benchmark writes and reads for SQLite and Faiss
    for provider_id in providers:
        cprint(f"Benchmarking provider: {provider_id}", "yellow")
        client.vector_dbs.register(
            provider_id=provider_id,
            vector_db_id=vector_db_id,
            embedding_model="all-MiniLM-L6-v2",
            embedding_dimension=384,
        )
        write_times = benchmark_write(client, vector_db_id, documents, write_batch_size)

        average_write_time_ms = sum(write_times) / len(write_times) * 1000.
        cprint(f"Average write time for {provider_id} is {average_write_time_ms:.2f} milliseconds for {num_writes} runs", "blue")

        cprint(f"Benchmarking reads for provider: {provider_id}", "yellow")
        read_times = benchmark_read(client, provider_id, vector_db_id, user_prompts)

        average_read_time_ms = sum(read_times) / len(read_times) * 1000.
        cprint(f"Average read time for {provider_id} is {average_read_time_ms:.2f} milliseconds for {num_reads} runs", "blue")

        client.vector_dbs.unregister(vector_db_id=vector_db_id)
        output[provider_id]['write_times'] = write_times
        output[provider_id]['read_times'] = read_times
    # Generate plots and summary
    plot_results(output, write_batch_size)


if __name__ == "__main__":
    cProfile.run('main()', 'profile_output.prof')
```
</details>

---------

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-03-28 17:41:33 +01:00
..
_static docs: Document sqlite-vec faiss comparison (#1821) 2025-03-28 17:41:33 +01:00
notebooks fix: Misleading code in Llama Stack Benchmark Evals notebook (#1774) 2025-03-25 07:04:47 -07:00
openapi_generator chore: require data field for all List*Response models (#1799) 2025-03-27 18:15:16 +01:00
resources Several documentation fixes and fix link to API reference 2025-02-04 14:00:43 -08:00
source docs: Document sqlite-vec faiss comparison (#1821) 2025-03-28 17:41:33 +01:00
zero_to_hero_guide fix: Default to port 8321 everywhere (#1734) 2025-03-20 15:50:41 -07:00
conftest.py fix: sleep after notebook test 2025-03-23 14:03:35 -07:00
contbuild.sh Fix broken links with docs 2024-11-22 20:42:17 -08:00
dog.jpg Support for Llama3.2 models and Swift SDK (#98) 2024-09-25 10:29:58 -07:00
getting_started.ipynb fix: update mcp commands in getting_started.ipynb (#1800) 2025-03-26 14:47:32 -07:00
license_header.txt Initial commit 2024-07-23 08:32:33 -07:00
make.bat first version of readthedocs (#278) 2024-10-22 10:15:58 +05:30
Makefile first version of readthedocs (#278) 2024-10-22 10:15:58 +05:30
readme.md Fix README.md notebook links (#976) 2025-02-05 14:33:46 -08:00
requirements.txt fix: add tomli to requirements.txt for docs; ideally we need to move this to uv 2025-03-03 11:11:17 -08:00

Llama Stack Documentation

Here's a collection of comprehensive guides, examples, and resources for building AI applications with Llama Stack. For the complete documentation, visit our ReadTheDocs page.

Content

Try out Llama Stack's capabilities through our detailed Jupyter notebooks: