diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..74ddcc623
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,33 @@
+ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
+OS := linux
+ifeq ($(shell uname -s), Darwin)
+	OS = osx
+endif
+
+PYTHON_VERSION = ${shell python --version | grep -Eo '[0-9]\.[0-9]+'}
+PYTHON_VERSIONS := 3.10 3.11
+
+build-dev:
+	uv sync --extra dev --extra test
+	uv pip install -e .
+	. .venv/bin/activate
+	uv pip install sqlite-vec chardet datasets sentence_transformers pypdf
+
+build-ollama: fix-line-endings
+	llama stack build --template ollama --image-type venv
+
+fix-line-endings:
+	sed -i '' 's/\r$$//' llama_stack/distribution/common.sh
+	sed -i '' 's/\r$$//' llama_stack/distribution/build_venv.sh
+
+test-sqlite-vec:
+	pytest tests/unit/providers/vector_io/test_sqlite_vec.py \
+	-v -s --tb=short --disable-warnings --asyncio-mode=auto
+
+test-ollama-vector-integration:
+	INFERENCE_MODEL=llama3.2:3b-instruct-fp16 LLAMA_STACK_CONFIG=ollama \
+	pytest -s -v tests/client-sdk/vector_io/test_vector_io.py
+
+
+make serve-ollama:
+	ollama run llama3.2:3b-instruct-fp16 --keepalive 24h
diff --git a/docs/_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png b/docs/_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
new file mode 100644
index 000000000..bec796939
Binary files /dev/null and b/docs/_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png differ
diff --git a/docs/_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png b/docs/_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png
new file mode 100644
index 000000000..192d5e96e
Binary files /dev/null and b/docs/_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png differ
diff --git a/docs/source/providers/vector_io/sqlite-vec.md b/docs/source/providers/vector_io/sqlite-vec.md
index 9bbc4170d..e61cc06fd 100644
--- a/docs/source/providers/vector_io/sqlite-vec.md
+++ b/docs/source/providers/vector_io/sqlite-vec.md
@@ -10,11 +10,40 @@ That means you're not limited to storing vectors in memory or in a separate serv
 ## Features
 
 - Lightweight and easy to use
-- Fully integrated with Llama Stack
+- Fully integrated with Llama Stacks
+- Uses disk-based storage for persistence, allowing for larger vector storage
+
+### Comparison to faiss 
+
+SQLite-Vec is a lightweight alternative to Faiss, which is a popular vector database provider.
+While faiss is a powerful, fast, and lightweight in line provider, faiss reindexes the 
+entire database when a new vector is added. SQLite-Vec is a disk-based storage provider 
+that allows for larger vector storage and handles incremental writes more efficiently.
+
+sqlite-vec is a great alternative to faiss when you need to execute several writes to the 
+database.
+
+Consider the histogram below in which 10,000 randomly generated strings were inserted 
+in batches of 100 into both `faiss` and `sqlite-vec` using `client.tool_runtime.rag_tool.insert()`.
+
+```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
+:alt: Comparison of SQLite-Vec and Faiss write times
+:width: 400px
+```
+
+You will notice that the average write time for `sqlite-vec` was 788ms, compared to 
+47,640ms for faiss. While the number is jarring, if you look at the distribution, you'll notice that it is rather uniformly spread across the [1500, 100000] interval.
+
+```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png
+:alt: Comparison of SQLite-Vec and Faiss write times
+:width: 400px
+```
+For more information about this discussion see [the GitHub Issue](https://github.com/meta-llama/llama-stack/issues/1165) 
+where this was discussed.
 
 ## Usage
 
-To use SQLite-Vec in your Llama Stack project, follow these steps:
+To use sqlite-vec in your Llama Stack project, follow these steps:
 
 1. Install the necessary dependencies.
 2. Configure your Llama Stack project to use SQLite-Vec.
diff --git a/sqlite_vec_test.py b/sqlite_vec_test.py
new file mode 100644
index 000000000..452e4a146
--- /dev/null
+++ b/sqlite_vec_test.py
@@ -0,0 +1,119 @@
+
+import os
+import os
+import uuid
+
+from termcolor import cprint
+
+# Set environment variables
+os.environ["INFERENCE_MODEL"] = "llama3.2:3b-instruct-fp16"
+os.environ["LLAMA_STACK_CONFIG"] = "ollama"
+
+# Import libraries after setting environment variables
+from llama_stack_client.lib.agents.agent import Agent
+from llama_stack_client.lib.agents.event_logger import EventLogger
+from llama_stack_client.types import Document
+from llama_stack_client.types.agent_create_params import AgentConfig
+
+from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+
+
+def main():
+    # Initialize the client
+    client = LlamaStackAsLibraryClient("ollama")
+    vector_db_id = f"test-vector-db-{uuid.uuid4().hex}"
+
+    _ = client.initialize()
+
+    model_id = "llama3.2:3b-instruct-fp16"
+
+    # Define the list of document URLs and create Document objects
+    urls = [
+        "chat.rst",
+        "llama3.rst",
+        "memory_optimizations.rst",
+        "lora_finetune.rst",
+    ]
+    documents = [
+        Document(
+            document_id=f"num-{i}",
+            content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
+            mime_type="text/plain",
+            metadata={},
+        )
+        for i, url in enumerate(urls)
+    ]
+    # (Optional) Use the documents as needed with your client here
+
+    client.vector_dbs.register(
+        provider_id="sqlite-vec",
+        vector_db_id=vector_db_id,
+        embedding_model="all-MiniLM-L6-v2",
+        embedding_dimension=384,
+    )
+
+    client.tool_runtime.rag_tool.insert(
+        documents=documents,
+        vector_db_id=vector_db_id,
+        chunk_size_in_tokens=512,
+    )
+    # List of user prompts
+    user_prompts = [
+        "What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.",
+        "Was anything related to 'Llama3' discussed, if so what?",
+        "Tell me how to use LoRA",
+        "What about Quantization?",
+    ]
+
+    # Process each prompt and display the output
+    for prompt in user_prompts:
+        cprint(f"User> {prompt}", "green")
+        response = client.vector_io.query(
+            vector_db_id=vector_db_id,
+            query=prompt,
+        )
+        cprint(f"Response> {response}", "blue")
+
+    # # Create agent configuration
+    # agent_config = AgentConfig(
+    #     model=model_id,
+    #     instructions="You are a helpful assistant",
+    #     enable_session_persistence=False,
+    #     toolgroups=[
+    #         {
+    #             "name": "builtin::rag",
+    #             "args": {
+    #                 "vector_db_ids": [vector_db_id],
+    #             },
+    #         }
+    #     ],
+    # )
+    #
+    # # Instantiate the Agent
+    # agent = Agent(client, agent_config)
+    #
+    #
+    # # Create a session for the agent
+    # session_id = agent.create_session("test-session")
+    #
+    # # Process each prompt and display the output
+    # for prompt in user_prompts:
+    #     cprint(f"User> {prompt}", "green")
+    #     response = agent.create_turn(
+    #         messages=[
+    #             {
+    #                 "role": "user",
+    #                 "content": prompt,
+    #             }
+    #         ],
+    #         session_id=session_id,
+    #     )
+    #     # Log and print events from the response
+    #     for log in EventLogger().log(response):
+    #         log.print()
+
+
+if __name__ == "__main__":
+    main()
+
+