From 1ac05d3a2ac55133fa3f5143ec54e529c0533598 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Thu, 27 Mar 2025 21:58:57 -0400 Subject: [PATCH] updated copy and cleaned up files Signed-off-by: Francisco Javier Arceo --- Makefile | 33 ----- docs/source/providers/vector_io/sqlite-vec.md | 19 +-- sqlite_vec_test.py | 119 ------------------ 3 files changed, 10 insertions(+), 161 deletions(-) delete mode 100644 Makefile delete mode 100644 sqlite_vec_test.py diff --git a/Makefile b/Makefile deleted file mode 100644 index 74ddcc623..000000000 --- a/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) -OS := linux -ifeq ($(shell uname -s), Darwin) - OS = osx -endif - -PYTHON_VERSION = ${shell python --version | grep -Eo '[0-9]\.[0-9]+'} -PYTHON_VERSIONS := 3.10 3.11 - -build-dev: - uv sync --extra dev --extra test - uv pip install -e . - . .venv/bin/activate - uv pip install sqlite-vec chardet datasets sentence_transformers pypdf - -build-ollama: fix-line-endings - llama stack build --template ollama --image-type venv - -fix-line-endings: - sed -i '' 's/\r$$//' llama_stack/distribution/common.sh - sed -i '' 's/\r$$//' llama_stack/distribution/build_venv.sh - -test-sqlite-vec: - pytest tests/unit/providers/vector_io/test_sqlite_vec.py \ - -v -s --tb=short --disable-warnings --asyncio-mode=auto - -test-ollama-vector-integration: - INFERENCE_MODEL=llama3.2:3b-instruct-fp16 LLAMA_STACK_CONFIG=ollama \ - pytest -s -v tests/client-sdk/vector_io/test_vector_io.py - - -make serve-ollama: - ollama run llama3.2:3b-instruct-fp16 --keepalive 24h diff --git a/docs/source/providers/vector_io/sqlite-vec.md b/docs/source/providers/vector_io/sqlite-vec.md index e61cc06fd..191c87cb6 100644 --- a/docs/source/providers/vector_io/sqlite-vec.md +++ b/docs/source/providers/vector_io/sqlite-vec.md @@ -13,32 +13,33 @@ That means you're not limited to storing vectors in memory or in a separate serv - Fully integrated with Llama Stacks - Uses disk-based storage for persistence, allowing for larger vector storage -### Comparison to faiss +### Comparison to Faiss SQLite-Vec is a lightweight alternative to Faiss, which is a popular vector database provider. -While faiss is a powerful, fast, and lightweight in line provider, faiss reindexes the -entire database when a new vector is added. SQLite-Vec is a disk-based storage provider +While Faiss is a fast, lightweight and powerful inline provider, Faiss reindexes the +entire database when a new vector is added. SQLite-Vec is a disk-based storage provider that allows for larger vector storage and handles incremental writes more efficiently. -sqlite-vec is a great alternative to faiss when you need to execute several writes to the +SQLite-vec is a great alternative to Faiss when you need to execute several writes to the database. -Consider the histogram below in which 10,000 randomly generated strings were inserted -in batches of 100 into both `faiss` and `sqlite-vec` using `client.tool_runtime.rag_tool.insert()`. +Consider the histogram below in which 10,000 randomly generated strings were inserted +in batches of 100 into both Faiss and SQLite-vec using `client.tool_runtime.rag_tool.insert()`. ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png :alt: Comparison of SQLite-Vec and Faiss write times :width: 400px ``` -You will notice that the average write time for `sqlite-vec` was 788ms, compared to -47,640ms for faiss. While the number is jarring, if you look at the distribution, you'll notice that it is rather uniformly spread across the [1500, 100000] interval. +You will notice that the average write time for `sqlite-vec` was 788ms, compared to +47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather +uniformly spread across the [1500, 100000] interval. ```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png :alt: Comparison of SQLite-Vec and Faiss write times :width: 400px ``` -For more information about this discussion see [the GitHub Issue](https://github.com/meta-llama/llama-stack/issues/1165) +For more information about this topic see [the GitHub Issue](https://github.com/meta-llama/llama-stack/issues/1165) where this was discussed. ## Usage diff --git a/sqlite_vec_test.py b/sqlite_vec_test.py deleted file mode 100644 index 452e4a146..000000000 --- a/sqlite_vec_test.py +++ /dev/null @@ -1,119 +0,0 @@ - -import os -import os -import uuid - -from termcolor import cprint - -# Set environment variables -os.environ["INFERENCE_MODEL"] = "llama3.2:3b-instruct-fp16" -os.environ["LLAMA_STACK_CONFIG"] = "ollama" - -# Import libraries after setting environment variables -from llama_stack_client.lib.agents.agent import Agent -from llama_stack_client.lib.agents.event_logger import EventLogger -from llama_stack_client.types import Document -from llama_stack_client.types.agent_create_params import AgentConfig - -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient - - -def main(): - # Initialize the client - client = LlamaStackAsLibraryClient("ollama") - vector_db_id = f"test-vector-db-{uuid.uuid4().hex}" - - _ = client.initialize() - - model_id = "llama3.2:3b-instruct-fp16" - - # Define the list of document URLs and create Document objects - urls = [ - "chat.rst", - "llama3.rst", - "memory_optimizations.rst", - "lora_finetune.rst", - ] - documents = [ - Document( - document_id=f"num-{i}", - content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}", - mime_type="text/plain", - metadata={}, - ) - for i, url in enumerate(urls) - ] - # (Optional) Use the documents as needed with your client here - - client.vector_dbs.register( - provider_id="sqlite-vec", - vector_db_id=vector_db_id, - embedding_model="all-MiniLM-L6-v2", - embedding_dimension=384, - ) - - client.tool_runtime.rag_tool.insert( - documents=documents, - vector_db_id=vector_db_id, - chunk_size_in_tokens=512, - ) - # List of user prompts - user_prompts = [ - "What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.", - "Was anything related to 'Llama3' discussed, if so what?", - "Tell me how to use LoRA", - "What about Quantization?", - ] - - # Process each prompt and display the output - for prompt in user_prompts: - cprint(f"User> {prompt}", "green") - response = client.vector_io.query( - vector_db_id=vector_db_id, - query=prompt, - ) - cprint(f"Response> {response}", "blue") - - # # Create agent configuration - # agent_config = AgentConfig( - # model=model_id, - # instructions="You are a helpful assistant", - # enable_session_persistence=False, - # toolgroups=[ - # { - # "name": "builtin::rag", - # "args": { - # "vector_db_ids": [vector_db_id], - # }, - # } - # ], - # ) - # - # # Instantiate the Agent - # agent = Agent(client, agent_config) - # - # - # # Create a session for the agent - # session_id = agent.create_session("test-session") - # - # # Process each prompt and display the output - # for prompt in user_prompts: - # cprint(f"User> {prompt}", "green") - # response = agent.create_turn( - # messages=[ - # { - # "role": "user", - # "content": prompt, - # } - # ], - # session_id=session_id, - # ) - # # Log and print events from the response - # for log in EventLogger().log(response): - # log.print() - - -if __name__ == "__main__": - main() - -