forked from phoenix-oss/llama-stack-mirror
# What does this PR do? This PR adds `sqlite_vec` as an additional inline vectordb. Tested with `ollama` by adding the `vector_io` object in `./llama_stack/templates/ollama/run.yaml` : ```yaml vector_io: - provider_id: sqlite_vec provider_type: inline::sqlite_vec config: kvstore: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db ``` I also updated the `./tests/client-sdk/vector_io/test_vector_io.py` test file with: ```python INLINE_VECTOR_DB_PROVIDERS = ["faiss", "sqlite_vec"] ``` And parameterized the relevant tests. [//]: # (If resolving an issue, uncomment and update the line below) # Closes https://github.com/meta-llama/llama-stack/issues/1005 ## Test Plan I ran the tests with: ```bash INFERENCE_MODEL=llama3.2:3b-instruct-fp16 LLAMA_STACK_CONFIG=ollama pytest -s -v tests/client-sdk/vector_io/test_vector_io.py ``` Which outputs: ```python ... PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_retrieve[all-MiniLM-L6-v2-sqlite_vec] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_list PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_register[all-MiniLM-L6-v2-faiss] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_register[all-MiniLM-L6-v2-sqlite_vec] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_unregister[faiss] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_unregister[sqlite_vec] PASSED ``` In addition, I ran the `rag_with_vector_db.py` [example](https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/rag_with_vector_db.py) using the script below with `uv run rag_example.py`. <details> <summary>CLICK TO SHOW SCRIPT 👋 </summary> ```python #!/usr/bin/env python3 import os import uuid from termcolor import cprint # Set environment variables os.environ['INFERENCE_MODEL'] = 'llama3.2:3b-instruct-fp16' os.environ['LLAMA_STACK_CONFIG'] = 'ollama' # Import libraries after setting environment variables from llama_stack.distribution.library_client import LlamaStackAsLibraryClient from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.lib.agents.event_logger import EventLogger from llama_stack_client.types.agent_create_params import AgentConfig from llama_stack_client.types import Document def main(): # Initialize the client client = LlamaStackAsLibraryClient("ollama") vector_db_id = f"test-vector-db-{uuid.uuid4().hex}" _ = client.initialize() model_id = 'llama3.2:3b-instruct-fp16' # Define the list of document URLs and create Document objects urls = [ "chat.rst", "llama3.rst", "memory_optimizations.rst", "lora_finetune.rst", ] documents = [ Document( document_id=f"num-{i}", content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}", mime_type="text/plain", metadata={}, ) for i, url in enumerate(urls) ] # (Optional) Use the documents as needed with your client here client.vector_dbs.register( provider_id='sqlite_vec', vector_db_id=vector_db_id, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, ) client.tool_runtime.rag_tool.insert( documents=documents, vector_db_id=vector_db_id, chunk_size_in_tokens=512, ) # Create agent configuration agent_config = AgentConfig( model=model_id, instructions="You are a helpful assistant", enable_session_persistence=False, toolgroups=[ { "name": "builtin::rag", "args": { "vector_db_ids": [vector_db_id], } } ], ) # Instantiate the Agent agent = Agent(client, agent_config) # List of user prompts user_prompts = [ "What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.", "Was anything related to 'Llama3' discussed, if so what?", "Tell me how to use LoRA", "What about Quantization?", ] # Create a session for the agent session_id = agent.create_session("test-session") # Process each prompt and display the output for prompt in user_prompts: cprint(f"User> {prompt}", "green") response = agent.create_turn( messages=[ { "role": "user", "content": prompt, } ], session_id=session_id, ) # Log and print events from the response for log in EventLogger().log(response): log.print() if __name__ == "__main__": main() ``` </details> Which outputs a large summary of RAG generation. # Documentation Will handle documentation updates in follow-up PR. # (- [ ] Added a Changelog entry if the change is significant) --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
124 lines
4.9 KiB
Python
124 lines
4.9 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from typing import List
|
|
|
|
from llama_stack.providers.datatypes import (
|
|
AdapterSpec,
|
|
Api,
|
|
InlineProviderSpec,
|
|
ProviderSpec,
|
|
remote_provider_spec,
|
|
)
|
|
|
|
EMBEDDING_DEPS = [
|
|
"blobfile",
|
|
"chardet",
|
|
"pypdf",
|
|
"tqdm",
|
|
"numpy",
|
|
"scikit-learn",
|
|
"scipy",
|
|
"nltk",
|
|
"sentencepiece",
|
|
"transformers",
|
|
# this happens to work because special dependencies are always installed last
|
|
# so if there was a regular torch installed first, this would be ignored
|
|
# we need a better way to do this to identify potential conflicts, etc.
|
|
# for now, this lets us significantly reduce the size of the container which
|
|
# does not have any "local" inference code (and hence does not need GPU-enabled torch)
|
|
"torch torchvision --index-url https://download.pytorch.org/whl/cpu",
|
|
"sentence-transformers --no-deps",
|
|
]
|
|
|
|
|
|
def available_providers() -> List[ProviderSpec]:
|
|
return [
|
|
InlineProviderSpec(
|
|
api=Api.vector_io,
|
|
provider_type="inline::meta-reference",
|
|
pip_packages=EMBEDDING_DEPS + ["faiss-cpu"],
|
|
module="llama_stack.providers.inline.vector_io.faiss",
|
|
config_class="llama_stack.providers.inline.vector_io.faiss.FaissImplConfig",
|
|
deprecation_warning="Please use the `inline::faiss` provider instead.",
|
|
api_dependencies=[Api.inference],
|
|
),
|
|
InlineProviderSpec(
|
|
api=Api.vector_io,
|
|
provider_type="inline::faiss",
|
|
pip_packages=EMBEDDING_DEPS + ["faiss-cpu"],
|
|
module="llama_stack.providers.inline.vector_io.faiss",
|
|
config_class="llama_stack.providers.inline.vector_io.faiss.FaissImplConfig",
|
|
api_dependencies=[Api.inference],
|
|
),
|
|
InlineProviderSpec(
|
|
api=Api.vector_io,
|
|
provider_type="inline::sqlite_vec",
|
|
pip_packages=EMBEDDING_DEPS + ["sqlite-vec"],
|
|
module="llama_stack.providers.inline.vector_io.sqlite_vec",
|
|
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
|
|
api_dependencies=[Api.inference],
|
|
),
|
|
remote_provider_spec(
|
|
Api.vector_io,
|
|
AdapterSpec(
|
|
adapter_type="chromadb",
|
|
pip_packages=EMBEDDING_DEPS + ["chromadb-client"],
|
|
module="llama_stack.providers.remote.vector_io.chroma",
|
|
config_class="llama_stack.providers.remote.vector_io.chroma.ChromaRemoteImplConfig",
|
|
),
|
|
api_dependencies=[Api.inference],
|
|
),
|
|
InlineProviderSpec(
|
|
api=Api.vector_io,
|
|
provider_type="inline::chromadb",
|
|
pip_packages=EMBEDDING_DEPS + ["chromadb"],
|
|
module="llama_stack.providers.inline.vector_io.chroma",
|
|
config_class="llama_stack.providers.inline.vector_io.chroma.ChromaInlineImplConfig",
|
|
api_dependencies=[Api.inference],
|
|
),
|
|
remote_provider_spec(
|
|
Api.vector_io,
|
|
AdapterSpec(
|
|
adapter_type="pgvector",
|
|
pip_packages=EMBEDDING_DEPS + ["psycopg2-binary"],
|
|
module="llama_stack.providers.remote.vector_io.pgvector",
|
|
config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorConfig",
|
|
),
|
|
api_dependencies=[Api.inference],
|
|
),
|
|
remote_provider_spec(
|
|
Api.vector_io,
|
|
AdapterSpec(
|
|
adapter_type="weaviate",
|
|
pip_packages=EMBEDDING_DEPS + ["weaviate-client"],
|
|
module="llama_stack.providers.remote.vector_io.weaviate",
|
|
config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateConfig",
|
|
provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
|
|
),
|
|
api_dependencies=[Api.inference],
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.vector_io,
|
|
adapter=AdapterSpec(
|
|
adapter_type="sample",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.vector_io.sample",
|
|
config_class="llama_stack.providers.remote.vector_io.sample.SampleConfig",
|
|
),
|
|
api_dependencies=[],
|
|
),
|
|
remote_provider_spec(
|
|
Api.vector_io,
|
|
AdapterSpec(
|
|
adapter_type="qdrant",
|
|
pip_packages=EMBEDDING_DEPS + ["qdrant-client"],
|
|
module="llama_stack.providers.remote.vector_io.qdrant",
|
|
config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantConfig",
|
|
),
|
|
api_dependencies=[Api.inference],
|
|
),
|
|
]
|