diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..74ddcc623 --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +OS := linux +ifeq ($(shell uname -s), Darwin) + OS = osx +endif + +PYTHON_VERSION = ${shell python --version | grep -Eo '[0-9]\.[0-9]+'} +PYTHON_VERSIONS := 3.10 3.11 + +build-dev: + uv sync --extra dev --extra test + uv pip install -e . + . .venv/bin/activate + uv pip install sqlite-vec chardet datasets sentence_transformers pypdf + +build-ollama: fix-line-endings + llama stack build --template ollama --image-type venv + +fix-line-endings: + sed -i '' 's/\r$$//' llama_stack/distribution/common.sh + sed -i '' 's/\r$$//' llama_stack/distribution/build_venv.sh + +test-sqlite-vec: + pytest tests/unit/providers/vector_io/test_sqlite_vec.py \ + -v -s --tb=short --disable-warnings --asyncio-mode=auto + +test-ollama-vector-integration: + INFERENCE_MODEL=llama3.2:3b-instruct-fp16 LLAMA_STACK_CONFIG=ollama \ + pytest -s -v tests/client-sdk/vector_io/test_vector_io.py + + +make serve-ollama: + ollama run llama3.2:3b-instruct-fp16 --keepalive 24h diff --git a/docs/_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png b/docs/_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png new file mode 100644 index 000000000..bec796939 Binary files /dev/null and b/docs/_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png differ diff --git a/docs/_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png b/docs/_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png new file mode 100644 index 000000000..192d5e96e Binary files /dev/null and b/docs/_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png differ diff --git a/docs/source/providers/vector_io/sqlite-vec.md b/docs/source/providers/vector_io/sqlite-vec.md index 9bbc4170d..e61cc06fd 100644 --- a/docs/source/providers/vector_io/sqlite-vec.md +++ b/docs/source/providers/vector_io/sqlite-vec.md @@ -10,11 +10,40 @@ That means you're not limited to storing vectors in memory or in a separate serv ## Features - Lightweight and easy to use -- Fully integrated with Llama Stack +- Fully integrated with Llama Stacks +- Uses disk-based storage for persistence, allowing for larger vector storage + +### Comparison to faiss + +SQLite-Vec is a lightweight alternative to Faiss, which is a popular vector database provider. +While faiss is a powerful, fast, and lightweight in line provider, faiss reindexes the +entire database when a new vector is added. SQLite-Vec is a disk-based storage provider +that allows for larger vector storage and handles incremental writes more efficiently. + +sqlite-vec is a great alternative to faiss when you need to execute several writes to the +database. + +Consider the histogram below in which 10,000 randomly generated strings were inserted +in batches of 100 into both `faiss` and `sqlite-vec` using `client.tool_runtime.rag_tool.insert()`. + +```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png +:alt: Comparison of SQLite-Vec and Faiss write times +:width: 400px +``` + +You will notice that the average write time for `sqlite-vec` was 788ms, compared to +47,640ms for faiss. While the number is jarring, if you look at the distribution, you'll notice that it is rather uniformly spread across the [1500, 100000] interval. + +```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png +:alt: Comparison of SQLite-Vec and Faiss write times +:width: 400px +``` +For more information about this discussion see [the GitHub Issue](https://github.com/meta-llama/llama-stack/issues/1165) +where this was discussed. ## Usage -To use SQLite-Vec in your Llama Stack project, follow these steps: +To use sqlite-vec in your Llama Stack project, follow these steps: 1. Install the necessary dependencies. 2. Configure your Llama Stack project to use SQLite-Vec. diff --git a/sqlite_vec_test.py b/sqlite_vec_test.py new file mode 100644 index 000000000..452e4a146 --- /dev/null +++ b/sqlite_vec_test.py @@ -0,0 +1,119 @@ + +import os +import os +import uuid + +from termcolor import cprint + +# Set environment variables +os.environ["INFERENCE_MODEL"] = "llama3.2:3b-instruct-fp16" +os.environ["LLAMA_STACK_CONFIG"] = "ollama" + +# Import libraries after setting environment variables +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.lib.agents.event_logger import EventLogger +from llama_stack_client.types import Document +from llama_stack_client.types.agent_create_params import AgentConfig + +from llama_stack.distribution.library_client import LlamaStackAsLibraryClient + + +def main(): + # Initialize the client + client = LlamaStackAsLibraryClient("ollama") + vector_db_id = f"test-vector-db-{uuid.uuid4().hex}" + + _ = client.initialize() + + model_id = "llama3.2:3b-instruct-fp16" + + # Define the list of document URLs and create Document objects + urls = [ + "chat.rst", + "llama3.rst", + "memory_optimizations.rst", + "lora_finetune.rst", + ] + documents = [ + Document( + document_id=f"num-{i}", + content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}", + mime_type="text/plain", + metadata={}, + ) + for i, url in enumerate(urls) + ] + # (Optional) Use the documents as needed with your client here + + client.vector_dbs.register( + provider_id="sqlite-vec", + vector_db_id=vector_db_id, + embedding_model="all-MiniLM-L6-v2", + embedding_dimension=384, + ) + + client.tool_runtime.rag_tool.insert( + documents=documents, + vector_db_id=vector_db_id, + chunk_size_in_tokens=512, + ) + # List of user prompts + user_prompts = [ + "What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.", + "Was anything related to 'Llama3' discussed, if so what?", + "Tell me how to use LoRA", + "What about Quantization?", + ] + + # Process each prompt and display the output + for prompt in user_prompts: + cprint(f"User> {prompt}", "green") + response = client.vector_io.query( + vector_db_id=vector_db_id, + query=prompt, + ) + cprint(f"Response> {response}", "blue") + + # # Create agent configuration + # agent_config = AgentConfig( + # model=model_id, + # instructions="You are a helpful assistant", + # enable_session_persistence=False, + # toolgroups=[ + # { + # "name": "builtin::rag", + # "args": { + # "vector_db_ids": [vector_db_id], + # }, + # } + # ], + # ) + # + # # Instantiate the Agent + # agent = Agent(client, agent_config) + # + # + # # Create a session for the agent + # session_id = agent.create_session("test-session") + # + # # Process each prompt and display the output + # for prompt in user_prompts: + # cprint(f"User> {prompt}", "green") + # response = agent.create_turn( + # messages=[ + # { + # "role": "user", + # "content": prompt, + # } + # ], + # session_id=session_id, + # ) + # # Log and print events from the response + # for log in EventLogger().log(response): + # log.print() + + +if __name__ == "__main__": + main() + +