From 5a05553e930ca806de128a6aa80b953ae2875828 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Tue, 25 Feb 2025 12:45:53 -0500 Subject: [PATCH] feat: adding a Makefile and a test script for sqlite-vec Signed-off-by: Francisco Javier Arceo --- Makefile | 33 +++++++++++++ sqlite_vec_test.py | 112 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 Makefile create mode 100644 sqlite_vec_test.py diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..b076eb0da --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +OS := linux +ifeq ($(shell uname -s), Darwin) + OS = osx +endif + +PYTHON_VERSION = ${shell python --version | grep -Eo '[0-9]\.[0-9]+'} +PYTHON_VERSIONS := 3.10 3.11 + +build-dev: + uv sync --extra dev --extra test + uv pip install -e . + . .venv/bin/activate + uv pip install sqlite-vec chardet datasets sentence_transformers pypdf + +build-ollama: fix-line-endings + llama stack build --template ollama --image-type venv + +fix-line-endings: + sed -i '' 's/\r$$//' llama_stack/distribution/common.sh + sed -i '' 's/\r$$//' llama_stack/distribution/build_venv.sh + +test-sqlite-vec: + pytest llama_stack/providers/tests/vector_io/test_sqlite_vec.py \ + -v -s --tb=short --disable-warnings --asyncio-mode=auto + +test-ollama-vector-integration: + INFERENCE_MODEL=llama3.2:3b-instruct-fp16 LLAMA_STACK_CONFIG=ollama \ + pytest -s -v tests/client-sdk/vector_io/test_vector_io.py + + +make serve-ollama: + ollama run llama3.2:3b-instruct-fp16 --keepalive 24h diff --git a/sqlite_vec_test.py b/sqlite_vec_test.py new file mode 100644 index 000000000..7aeff3577 --- /dev/null +++ b/sqlite_vec_test.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os +import uuid + +from termcolor import cprint + +# Set environment variables +os.environ["INFERENCE_MODEL"] = "llama3.2:3b-instruct-fp16" +os.environ["LLAMA_STACK_CONFIG"] = "ollama" + +# Import libraries after setting environment variables +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.lib.agents.event_logger import EventLogger +from llama_stack_client.types import Document +from llama_stack_client.types.agent_create_params import AgentConfig + +from llama_stack.distribution.library_client import LlamaStackAsLibraryClient + + +def main(): + # Initialize the client + client = LlamaStackAsLibraryClient("ollama") + vector_db_id = f"test-vector-db-{uuid.uuid4().hex}" + + _ = client.initialize() + + model_id = "llama3.2:3b-instruct-fp16" + + # Define the list of document URLs and create Document objects + urls = [ + "chat.rst", + "llama3.rst", + "memory_optimizations.rst", + "lora_finetune.rst", + ] + documents = [ + Document( + document_id=f"num-{i}", + content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}", + mime_type="text/plain", + metadata={}, + ) + for i, url in enumerate(urls) + ] + # (Optional) Use the documents as needed with your client here + + client.vector_dbs.register( + provider_id="sqlite_vec", + vector_db_id=vector_db_id, + embedding_model="all-MiniLM-L6-v2", + embedding_dimension=384, + ) + + client.tool_runtime.rag_tool.insert( + documents=documents, + vector_db_id=vector_db_id, + chunk_size_in_tokens=512, + ) + # Create agent configuration + agent_config = AgentConfig( + model=model_id, + instructions="You are a helpful assistant", + enable_session_persistence=False, + toolgroups=[ + { + "name": "builtin::rag", + "args": { + "vector_db_ids": [vector_db_id], + }, + } + ], + ) + + # Instantiate the Agent + agent = Agent(client, agent_config) + + # List of user prompts + user_prompts = [ + "What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.", + "Was anything related to 'Llama3' discussed, if so what?", + "Tell me how to use LoRA", + "What about Quantization?", + ] + + # Create a session for the agent + session_id = agent.create_session("test-session") + + # Process each prompt and display the output + for prompt in user_prompts: + cprint(f"User> {prompt}", "green") + response = agent.create_turn( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + session_id=session_id, + ) + # Log and print events from the response + for log in EventLogger().log(response): + log.print() + + +if __name__ == "__main__": + main()