updated copy and cleaned up files

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Javier Arceo 2025-03-27 21:58:57 -04:00
parent 1d9e74e373
commit 1ac05d3a2a
3 changed files with 10 additions and 161 deletions

View file

@ -1,33 +0,0 @@
ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
OS := linux
ifeq ($(shell uname -s), Darwin)
OS = osx
endif
PYTHON_VERSION = ${shell python --version | grep -Eo '[0-9]\.[0-9]+'}
PYTHON_VERSIONS := 3.10 3.11
build-dev:
uv sync --extra dev --extra test
uv pip install -e .
. .venv/bin/activate
uv pip install sqlite-vec chardet datasets sentence_transformers pypdf
build-ollama: fix-line-endings
llama stack build --template ollama --image-type venv
fix-line-endings:
sed -i '' 's/\r$$//' llama_stack/distribution/common.sh
sed -i '' 's/\r$$//' llama_stack/distribution/build_venv.sh
test-sqlite-vec:
pytest tests/unit/providers/vector_io/test_sqlite_vec.py \
-v -s --tb=short --disable-warnings --asyncio-mode=auto
test-ollama-vector-integration:
INFERENCE_MODEL=llama3.2:3b-instruct-fp16 LLAMA_STACK_CONFIG=ollama \
pytest -s -v tests/client-sdk/vector_io/test_vector_io.py
make serve-ollama:
ollama run llama3.2:3b-instruct-fp16 --keepalive 24h

View file

@ -13,32 +13,33 @@ That means you're not limited to storing vectors in memory or in a separate serv
- Fully integrated with Llama Stacks - Fully integrated with Llama Stacks
- Uses disk-based storage for persistence, allowing for larger vector storage - Uses disk-based storage for persistence, allowing for larger vector storage
### Comparison to faiss ### Comparison to Faiss
SQLite-Vec is a lightweight alternative to Faiss, which is a popular vector database provider. SQLite-Vec is a lightweight alternative to Faiss, which is a popular vector database provider.
While faiss is a powerful, fast, and lightweight in line provider, faiss reindexes the While Faiss is a fast, lightweight and powerful inline provider, Faiss reindexes the
entire database when a new vector is added. SQLite-Vec is a disk-based storage provider entire database when a new vector is added. SQLite-Vec is a disk-based storage provider
that allows for larger vector storage and handles incremental writes more efficiently. that allows for larger vector storage and handles incremental writes more efficiently.
sqlite-vec is a great alternative to faiss when you need to execute several writes to the SQLite-vec is a great alternative to Faiss when you need to execute several writes to the
database. database.
Consider the histogram below in which 10,000 randomly generated strings were inserted Consider the histogram below in which 10,000 randomly generated strings were inserted
in batches of 100 into both `faiss` and `sqlite-vec` using `client.tool_runtime.rag_tool.insert()`. in batches of 100 into both Faiss and SQLite-vec using `client.tool_runtime.rag_tool.insert()`.
```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
:alt: Comparison of SQLite-Vec and Faiss write times :alt: Comparison of SQLite-Vec and Faiss write times
:width: 400px :width: 400px
``` ```
You will notice that the average write time for `sqlite-vec` was 788ms, compared to You will notice that the average write time for `sqlite-vec` was 788ms, compared to
47,640ms for faiss. While the number is jarring, if you look at the distribution, you'll notice that it is rather uniformly spread across the [1500, 100000] interval. 47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather
uniformly spread across the [1500, 100000] interval.
```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png ```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png
:alt: Comparison of SQLite-Vec and Faiss write times :alt: Comparison of SQLite-Vec and Faiss write times
:width: 400px :width: 400px
``` ```
For more information about this discussion see [the GitHub Issue](https://github.com/meta-llama/llama-stack/issues/1165) For more information about this topic see [the GitHub Issue](https://github.com/meta-llama/llama-stack/issues/1165)
where this was discussed. where this was discussed.
## Usage ## Usage

View file

@ -1,119 +0,0 @@
import os
import os
import uuid
from termcolor import cprint
# Set environment variables
os.environ["INFERENCE_MODEL"] = "llama3.2:3b-instruct-fp16"
os.environ["LLAMA_STACK_CONFIG"] = "ollama"
# Import libraries after setting environment variables
from llama_stack_client.lib.agents.agent import Agent
from llama_stack_client.lib.agents.event_logger import EventLogger
from llama_stack_client.types import Document
from llama_stack_client.types.agent_create_params import AgentConfig
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
def main():
# Initialize the client
client = LlamaStackAsLibraryClient("ollama")
vector_db_id = f"test-vector-db-{uuid.uuid4().hex}"
_ = client.initialize()
model_id = "llama3.2:3b-instruct-fp16"
# Define the list of document URLs and create Document objects
urls = [
"chat.rst",
"llama3.rst",
"memory_optimizations.rst",
"lora_finetune.rst",
]
documents = [
Document(
document_id=f"num-{i}",
content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
mime_type="text/plain",
metadata={},
)
for i, url in enumerate(urls)
]
# (Optional) Use the documents as needed with your client here
client.vector_dbs.register(
provider_id="sqlite-vec",
vector_db_id=vector_db_id,
embedding_model="all-MiniLM-L6-v2",
embedding_dimension=384,
)
client.tool_runtime.rag_tool.insert(
documents=documents,
vector_db_id=vector_db_id,
chunk_size_in_tokens=512,
)
# List of user prompts
user_prompts = [
"What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.",
"Was anything related to 'Llama3' discussed, if so what?",
"Tell me how to use LoRA",
"What about Quantization?",
]
# Process each prompt and display the output
for prompt in user_prompts:
cprint(f"User> {prompt}", "green")
response = client.vector_io.query(
vector_db_id=vector_db_id,
query=prompt,
)
cprint(f"Response> {response}", "blue")
# # Create agent configuration
# agent_config = AgentConfig(
# model=model_id,
# instructions="You are a helpful assistant",
# enable_session_persistence=False,
# toolgroups=[
# {
# "name": "builtin::rag",
# "args": {
# "vector_db_ids": [vector_db_id],
# },
# }
# ],
# )
#
# # Instantiate the Agent
# agent = Agent(client, agent_config)
#
#
# # Create a session for the agent
# session_id = agent.create_session("test-session")
#
# # Process each prompt and display the output
# for prompt in user_prompts:
# cprint(f"User> {prompt}", "green")
# response = agent.create_turn(
# messages=[
# {
# "role": "user",
# "content": prompt,
# }
# ],
# session_id=session_id,
# )
# # Log and print events from the response
# for log in EventLogger().log(response):
# log.print()
if __name__ == "__main__":
main()