updated copy and cleaned up files

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-12-30 23:13:53 +00:00 · 2025-03-27 21:58:57 -04:00 · 2025-03-27 21:58:57 -04:00 · 1ac05d3a2a
commit 1ac05d3a2a
parent 1d9e74e373
3 changed files with 10 additions and 161 deletions
--- a/33
+++ b/33
@ -1,33 +0,0 @@
-ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
-OS := linux
-ifeq ($(shell uname -s), Darwin)
-	OS = osx
-endif
-
-PYTHON_VERSION = ${shell python --version | grep -Eo '[0-9]\.[0-9]+'}
-PYTHON_VERSIONS := 3.10 3.11
-
-build-dev:
-	uv sync --extra dev --extra test
-	uv pip install -e .
-	. .venv/bin/activate
-	uv pip install sqlite-vec chardet datasets sentence_transformers pypdf
-
-build-ollama: fix-line-endings
-	llama stack build --template ollama --image-type venv
-
-fix-line-endings:
-	sed -i '' 's/\r$$//' llama_stack/distribution/common.sh
-	sed -i '' 's/\r$$//' llama_stack/distribution/build_venv.sh
-
-test-sqlite-vec:
-	pytest tests/unit/providers/vector_io/test_sqlite_vec.py \
-	-v -s --tb=short --disable-warnings --asyncio-mode=auto
-
-test-ollama-vector-integration:
-	INFERENCE_MODEL=llama3.2:3b-instruct-fp16 LLAMA_STACK_CONFIG=ollama \
-	pytest -s -v tests/client-sdk/vector_io/test_vector_io.py
-
-
-make serve-ollama:
-	ollama run llama3.2:3b-instruct-fp16 --keepalive 24h
--- a/docs/source/providers/vector_io/sqlite-vec.md
+++ b/docs/source/providers/vector_io/sqlite-vec.md
@ -13,32 +13,33 @@ That means you're not limited to storing vectors in memory or in a separate serv
 - Fully integrated with Llama Stacks
 - Uses disk-based storage for persistence, allowing for larger vector storage

-### Comparison to faiss 
+### Comparison to Faiss

 SQLite-Vec is a lightweight alternative to Faiss, which is a popular vector database provider.
-While faiss is a powerful, fast, and lightweight in line provider, faiss reindexes the 
-entire database when a new vector is added. SQLite-Vec is a disk-based storage provider 
+While Faiss is a fast, lightweight and powerful inline provider, Faiss reindexes the
+entire database when a new vector is added. SQLite-Vec is a disk-based storage provider
 that allows for larger vector storage and handles incremental writes more efficiently.

-sqlite-vec is a great alternative to faiss when you need to execute several writes to the 
+SQLite-vec is a great alternative to Faiss when you need to execute several writes to the
 database.

-Consider the histogram below in which 10,000 randomly generated strings were inserted 
-in batches of 100 into both `faiss` and `sqlite-vec` using `client.tool_runtime.rag_tool.insert()`.
+Consider the histogram below in which 10,000 randomly generated strings were inserted
+in batches of 100 into both Faiss and SQLite-vec using `client.tool_runtime.rag_tool.insert()`.

 ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
 :alt: Comparison of SQLite-Vec and Faiss write times
 :width: 400px
 ```

-You will notice that the average write time for `sqlite-vec` was 788ms, compared to 
-47,640ms for faiss. While the number is jarring, if you look at the distribution, you'll notice that it is rather uniformly spread across the [1500, 100000] interval.
+You will notice that the average write time for `sqlite-vec` was 788ms, compared to
+47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather
+uniformly spread across the [1500, 100000] interval.

 ```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png
 :alt: Comparison of SQLite-Vec and Faiss write times
 :width: 400px
 ```
-For more information about this discussion see [the GitHub Issue](https://github.com/meta-llama/llama-stack/issues/1165) 
+For more information about this topic see [the GitHub Issue](https://github.com/meta-llama/llama-stack/issues/1165)
 where this was discussed.

 ## Usage
--- a/sqlite_vec_test.py
+++ b/sqlite_vec_test.py
@ -1,119 +0,0 @@
-
-import os
-import os
-import uuid
-
-from termcolor import cprint
-
-# Set environment variables
-os.environ["INFERENCE_MODEL"] = "llama3.2:3b-instruct-fp16"
-os.environ["LLAMA_STACK_CONFIG"] = "ollama"
-
-# Import libraries after setting environment variables
-from llama_stack_client.lib.agents.agent import Agent
-from llama_stack_client.lib.agents.event_logger import EventLogger
-from llama_stack_client.types import Document
-from llama_stack_client.types.agent_create_params import AgentConfig
-
-from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
-
-
-def main():
-    # Initialize the client
-    client = LlamaStackAsLibraryClient("ollama")
-    vector_db_id = f"test-vector-db-{uuid.uuid4().hex}"
-
-    _ = client.initialize()
-
-    model_id = "llama3.2:3b-instruct-fp16"
-
-    # Define the list of document URLs and create Document objects
-    urls = [
-        "chat.rst",
-        "llama3.rst",
-        "memory_optimizations.rst",
-        "lora_finetune.rst",
-    ]
-    documents = [
-        Document(
-            document_id=f"num-{i}",
-            content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
-            mime_type="text/plain",
-            metadata={},
-        )
-        for i, url in enumerate(urls)
-    ]
-    # (Optional) Use the documents as needed with your client here
-
-    client.vector_dbs.register(
-        provider_id="sqlite-vec",
-        vector_db_id=vector_db_id,
-        embedding_model="all-MiniLM-L6-v2",
-        embedding_dimension=384,
-    )
-
-    client.tool_runtime.rag_tool.insert(
-        documents=documents,
-        vector_db_id=vector_db_id,
-        chunk_size_in_tokens=512,
-    )
-    # List of user prompts
-    user_prompts = [
-        "What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.",
-        "Was anything related to 'Llama3' discussed, if so what?",
-        "Tell me how to use LoRA",
-        "What about Quantization?",
-    ]
-
-    # Process each prompt and display the output
-    for prompt in user_prompts:
-        cprint(f"User> {prompt}", "green")
-        response = client.vector_io.query(
-            vector_db_id=vector_db_id,
-            query=prompt,
-        )
-        cprint(f"Response> {response}", "blue")
-
-    # # Create agent configuration
-    # agent_config = AgentConfig(
-    #     model=model_id,
-    #     instructions="You are a helpful assistant",
-    #     enable_session_persistence=False,
-    #     toolgroups=[
-    #         {
-    #             "name": "builtin::rag",
-    #             "args": {
-    #                 "vector_db_ids": [vector_db_id],
-    #             },
-    #         }
-    #     ],
-    # )
-    #
-    # # Instantiate the Agent
-    # agent = Agent(client, agent_config)
-    #
-    #
-    # # Create a session for the agent
-    # session_id = agent.create_session("test-session")
-    #
-    # # Process each prompt and display the output
-    # for prompt in user_prompts:
-    #     cprint(f"User> {prompt}", "green")
-    #     response = agent.create_turn(
-    #         messages=[
-    #             {
-    #                 "role": "user",
-    #                 "content": prompt,
-    #             }
-    #         ],
-    #         session_id=session_id,
-    #     )
-    #     # Log and print events from the response
-    #     for log in EventLogger().log(response):
-    #         log.print()
-
-
-if __name__ == "__main__":
-    main()
-
-