mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-05 10:13:05 +00:00
updated copy and cleaned up files
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
1d9e74e373
commit
1ac05d3a2a
3 changed files with 10 additions and 161 deletions
33
Makefile
33
Makefile
|
@ -1,33 +0,0 @@
|
|||
ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
|
||||
OS := linux
|
||||
ifeq ($(shell uname -s), Darwin)
|
||||
OS = osx
|
||||
endif
|
||||
|
||||
PYTHON_VERSION = ${shell python --version | grep -Eo '[0-9]\.[0-9]+'}
|
||||
PYTHON_VERSIONS := 3.10 3.11
|
||||
|
||||
build-dev:
|
||||
uv sync --extra dev --extra test
|
||||
uv pip install -e .
|
||||
. .venv/bin/activate
|
||||
uv pip install sqlite-vec chardet datasets sentence_transformers pypdf
|
||||
|
||||
build-ollama: fix-line-endings
|
||||
llama stack build --template ollama --image-type venv
|
||||
|
||||
fix-line-endings:
|
||||
sed -i '' 's/\r$$//' llama_stack/distribution/common.sh
|
||||
sed -i '' 's/\r$$//' llama_stack/distribution/build_venv.sh
|
||||
|
||||
test-sqlite-vec:
|
||||
pytest tests/unit/providers/vector_io/test_sqlite_vec.py \
|
||||
-v -s --tb=short --disable-warnings --asyncio-mode=auto
|
||||
|
||||
test-ollama-vector-integration:
|
||||
INFERENCE_MODEL=llama3.2:3b-instruct-fp16 LLAMA_STACK_CONFIG=ollama \
|
||||
pytest -s -v tests/client-sdk/vector_io/test_vector_io.py
|
||||
|
||||
|
||||
make serve-ollama:
|
||||
ollama run llama3.2:3b-instruct-fp16 --keepalive 24h
|
|
@ -13,32 +13,33 @@ That means you're not limited to storing vectors in memory or in a separate serv
|
|||
- Fully integrated with Llama Stacks
|
||||
- Uses disk-based storage for persistence, allowing for larger vector storage
|
||||
|
||||
### Comparison to faiss
|
||||
### Comparison to Faiss
|
||||
|
||||
SQLite-Vec is a lightweight alternative to Faiss, which is a popular vector database provider.
|
||||
While faiss is a powerful, fast, and lightweight in line provider, faiss reindexes the
|
||||
entire database when a new vector is added. SQLite-Vec is a disk-based storage provider
|
||||
While Faiss is a fast, lightweight and powerful inline provider, Faiss reindexes the
|
||||
entire database when a new vector is added. SQLite-Vec is a disk-based storage provider
|
||||
that allows for larger vector storage and handles incremental writes more efficiently.
|
||||
|
||||
sqlite-vec is a great alternative to faiss when you need to execute several writes to the
|
||||
SQLite-vec is a great alternative to Faiss when you need to execute several writes to the
|
||||
database.
|
||||
|
||||
Consider the histogram below in which 10,000 randomly generated strings were inserted
|
||||
in batches of 100 into both `faiss` and `sqlite-vec` using `client.tool_runtime.rag_tool.insert()`.
|
||||
Consider the histogram below in which 10,000 randomly generated strings were inserted
|
||||
in batches of 100 into both Faiss and SQLite-vec using `client.tool_runtime.rag_tool.insert()`.
|
||||
|
||||
```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png
|
||||
:alt: Comparison of SQLite-Vec and Faiss write times
|
||||
:width: 400px
|
||||
```
|
||||
|
||||
You will notice that the average write time for `sqlite-vec` was 788ms, compared to
|
||||
47,640ms for faiss. While the number is jarring, if you look at the distribution, you'll notice that it is rather uniformly spread across the [1500, 100000] interval.
|
||||
You will notice that the average write time for `sqlite-vec` was 788ms, compared to
|
||||
47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather
|
||||
uniformly spread across the [1500, 100000] interval.
|
||||
|
||||
```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png
|
||||
:alt: Comparison of SQLite-Vec and Faiss write times
|
||||
:width: 400px
|
||||
```
|
||||
For more information about this discussion see [the GitHub Issue](https://github.com/meta-llama/llama-stack/issues/1165)
|
||||
For more information about this topic see [the GitHub Issue](https://github.com/meta-llama/llama-stack/issues/1165)
|
||||
where this was discussed.
|
||||
|
||||
## Usage
|
||||
|
|
|
@ -1,119 +0,0 @@
|
|||
|
||||
import os
|
||||
import os
|
||||
import uuid
|
||||
|
||||
from termcolor import cprint
|
||||
|
||||
# Set environment variables
|
||||
os.environ["INFERENCE_MODEL"] = "llama3.2:3b-instruct-fp16"
|
||||
os.environ["LLAMA_STACK_CONFIG"] = "ollama"
|
||||
|
||||
# Import libraries after setting environment variables
|
||||
from llama_stack_client.lib.agents.agent import Agent
|
||||
from llama_stack_client.lib.agents.event_logger import EventLogger
|
||||
from llama_stack_client.types import Document
|
||||
from llama_stack_client.types.agent_create_params import AgentConfig
|
||||
|
||||
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
||||
|
||||
|
||||
def main():
|
||||
# Initialize the client
|
||||
client = LlamaStackAsLibraryClient("ollama")
|
||||
vector_db_id = f"test-vector-db-{uuid.uuid4().hex}"
|
||||
|
||||
_ = client.initialize()
|
||||
|
||||
model_id = "llama3.2:3b-instruct-fp16"
|
||||
|
||||
# Define the list of document URLs and create Document objects
|
||||
urls = [
|
||||
"chat.rst",
|
||||
"llama3.rst",
|
||||
"memory_optimizations.rst",
|
||||
"lora_finetune.rst",
|
||||
]
|
||||
documents = [
|
||||
Document(
|
||||
document_id=f"num-{i}",
|
||||
content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
|
||||
mime_type="text/plain",
|
||||
metadata={},
|
||||
)
|
||||
for i, url in enumerate(urls)
|
||||
]
|
||||
# (Optional) Use the documents as needed with your client here
|
||||
|
||||
client.vector_dbs.register(
|
||||
provider_id="sqlite-vec",
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
embedding_dimension=384,
|
||||
)
|
||||
|
||||
client.tool_runtime.rag_tool.insert(
|
||||
documents=documents,
|
||||
vector_db_id=vector_db_id,
|
||||
chunk_size_in_tokens=512,
|
||||
)
|
||||
# List of user prompts
|
||||
user_prompts = [
|
||||
"What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.",
|
||||
"Was anything related to 'Llama3' discussed, if so what?",
|
||||
"Tell me how to use LoRA",
|
||||
"What about Quantization?",
|
||||
]
|
||||
|
||||
# Process each prompt and display the output
|
||||
for prompt in user_prompts:
|
||||
cprint(f"User> {prompt}", "green")
|
||||
response = client.vector_io.query(
|
||||
vector_db_id=vector_db_id,
|
||||
query=prompt,
|
||||
)
|
||||
cprint(f"Response> {response}", "blue")
|
||||
|
||||
# # Create agent configuration
|
||||
# agent_config = AgentConfig(
|
||||
# model=model_id,
|
||||
# instructions="You are a helpful assistant",
|
||||
# enable_session_persistence=False,
|
||||
# toolgroups=[
|
||||
# {
|
||||
# "name": "builtin::rag",
|
||||
# "args": {
|
||||
# "vector_db_ids": [vector_db_id],
|
||||
# },
|
||||
# }
|
||||
# ],
|
||||
# )
|
||||
#
|
||||
# # Instantiate the Agent
|
||||
# agent = Agent(client, agent_config)
|
||||
#
|
||||
#
|
||||
# # Create a session for the agent
|
||||
# session_id = agent.create_session("test-session")
|
||||
#
|
||||
# # Process each prompt and display the output
|
||||
# for prompt in user_prompts:
|
||||
# cprint(f"User> {prompt}", "green")
|
||||
# response = agent.create_turn(
|
||||
# messages=[
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": prompt,
|
||||
# }
|
||||
# ],
|
||||
# session_id=session_id,
|
||||
# )
|
||||
# # Log and print events from the response
|
||||
# for log in EventLogger().log(response):
|
||||
# log.print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue