forked from phoenix-oss/llama-stack-mirror
# What does this PR do? This PR adds `sqlite_vec` as an additional inline vectordb. Tested with `ollama` by adding the `vector_io` object in `./llama_stack/templates/ollama/run.yaml` : ```yaml vector_io: - provider_id: sqlite_vec provider_type: inline::sqlite_vec config: kvstore: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db ``` I also updated the `./tests/client-sdk/vector_io/test_vector_io.py` test file with: ```python INLINE_VECTOR_DB_PROVIDERS = ["faiss", "sqlite_vec"] ``` And parameterized the relevant tests. [//]: # (If resolving an issue, uncomment and update the line below) # Closes https://github.com/meta-llama/llama-stack/issues/1005 ## Test Plan I ran the tests with: ```bash INFERENCE_MODEL=llama3.2:3b-instruct-fp16 LLAMA_STACK_CONFIG=ollama pytest -s -v tests/client-sdk/vector_io/test_vector_io.py ``` Which outputs: ```python ... PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_retrieve[all-MiniLM-L6-v2-sqlite_vec] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_list PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_register[all-MiniLM-L6-v2-faiss] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_register[all-MiniLM-L6-v2-sqlite_vec] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_unregister[faiss] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_unregister[sqlite_vec] PASSED ``` In addition, I ran the `rag_with_vector_db.py` [example](https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/rag_with_vector_db.py) using the script below with `uv run rag_example.py`. <details> <summary>CLICK TO SHOW SCRIPT 👋 </summary> ```python #!/usr/bin/env python3 import os import uuid from termcolor import cprint # Set environment variables os.environ['INFERENCE_MODEL'] = 'llama3.2:3b-instruct-fp16' os.environ['LLAMA_STACK_CONFIG'] = 'ollama' # Import libraries after setting environment variables from llama_stack.distribution.library_client import LlamaStackAsLibraryClient from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.lib.agents.event_logger import EventLogger from llama_stack_client.types.agent_create_params import AgentConfig from llama_stack_client.types import Document def main(): # Initialize the client client = LlamaStackAsLibraryClient("ollama") vector_db_id = f"test-vector-db-{uuid.uuid4().hex}" _ = client.initialize() model_id = 'llama3.2:3b-instruct-fp16' # Define the list of document URLs and create Document objects urls = [ "chat.rst", "llama3.rst", "memory_optimizations.rst", "lora_finetune.rst", ] documents = [ Document( document_id=f"num-{i}", content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}", mime_type="text/plain", metadata={}, ) for i, url in enumerate(urls) ] # (Optional) Use the documents as needed with your client here client.vector_dbs.register( provider_id='sqlite_vec', vector_db_id=vector_db_id, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, ) client.tool_runtime.rag_tool.insert( documents=documents, vector_db_id=vector_db_id, chunk_size_in_tokens=512, ) # Create agent configuration agent_config = AgentConfig( model=model_id, instructions="You are a helpful assistant", enable_session_persistence=False, toolgroups=[ { "name": "builtin::rag", "args": { "vector_db_ids": [vector_db_id], } } ], ) # Instantiate the Agent agent = Agent(client, agent_config) # List of user prompts user_prompts = [ "What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.", "Was anything related to 'Llama3' discussed, if so what?", "Tell me how to use LoRA", "What about Quantization?", ] # Create a session for the agent session_id = agent.create_session("test-session") # Process each prompt and display the output for prompt in user_prompts: cprint(f"User> {prompt}", "green") response = agent.create_turn( messages=[ { "role": "user", "content": prompt, } ], session_id=session_id, ) # Log and print events from the response for log in EventLogger().log(response): log.print() if __name__ == "__main__": main() ``` </details> Which outputs a large summary of RAG generation. # Documentation Will handle documentation updates in follow-up PR. # (- [ ] Added a Changelog entry if the change is significant) --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
110 lines
3 KiB
Python
110 lines
3 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import pytest
|
|
|
|
from ..conftest import (
|
|
get_provider_fixture_overrides,
|
|
get_provider_fixture_overrides_from_test_config,
|
|
get_test_config_for_api,
|
|
)
|
|
|
|
from ..inference.fixtures import INFERENCE_FIXTURES
|
|
from .fixtures import VECTOR_IO_FIXTURES
|
|
|
|
|
|
DEFAULT_PROVIDER_COMBINATIONS = [
|
|
pytest.param(
|
|
{
|
|
"inference": "sentence_transformers",
|
|
"vector_io": "faiss",
|
|
},
|
|
id="sentence_transformers",
|
|
marks=pytest.mark.sentence_transformers,
|
|
),
|
|
pytest.param(
|
|
{
|
|
"inference": "ollama",
|
|
"vector_io": "pgvector",
|
|
},
|
|
id="pgvector",
|
|
marks=pytest.mark.pgvector,
|
|
),
|
|
pytest.param(
|
|
{
|
|
"inference": "ollama",
|
|
"vector_io": "faiss",
|
|
},
|
|
id="ollama",
|
|
marks=pytest.mark.ollama,
|
|
),
|
|
pytest.param(
|
|
{
|
|
"inference": "ollama",
|
|
"vector_io": "sqlite_vec",
|
|
},
|
|
id="sqlite_vec",
|
|
marks=pytest.mark.ollama,
|
|
),
|
|
pytest.param(
|
|
{
|
|
"inference": "sentence_transformers",
|
|
"vector_io": "chroma",
|
|
},
|
|
id="chroma",
|
|
marks=pytest.mark.chroma,
|
|
),
|
|
pytest.param(
|
|
{
|
|
"inference": "bedrock",
|
|
"vector_io": "qdrant",
|
|
},
|
|
id="qdrant",
|
|
marks=pytest.mark.qdrant,
|
|
),
|
|
pytest.param(
|
|
{
|
|
"inference": "fireworks",
|
|
"vector_io": "weaviate",
|
|
},
|
|
id="weaviate",
|
|
marks=pytest.mark.weaviate,
|
|
),
|
|
]
|
|
|
|
|
|
def pytest_configure(config):
|
|
for fixture_name in VECTOR_IO_FIXTURES:
|
|
config.addinivalue_line(
|
|
"markers",
|
|
f"{fixture_name}: marks tests as {fixture_name} specific",
|
|
)
|
|
|
|
|
|
def pytest_generate_tests(metafunc):
|
|
test_config = get_test_config_for_api(metafunc.config, "vector_io")
|
|
if "embedding_model" in metafunc.fixturenames:
|
|
model = getattr(test_config, "embedding_model", None)
|
|
# Fall back to the default if not specified by the config file
|
|
model = model or metafunc.config.getoption("--embedding-model")
|
|
if model:
|
|
params = [pytest.param(model, id="")]
|
|
else:
|
|
params = [pytest.param("all-minilm:l6-v2", id="")]
|
|
|
|
metafunc.parametrize("embedding_model", params, indirect=True)
|
|
|
|
if "vector_io_stack" in metafunc.fixturenames:
|
|
available_fixtures = {
|
|
"inference": INFERENCE_FIXTURES,
|
|
"vector_io": VECTOR_IO_FIXTURES,
|
|
}
|
|
combinations = (
|
|
get_provider_fixture_overrides_from_test_config(metafunc.config, "vector_io", DEFAULT_PROVIDER_COMBINATIONS)
|
|
or get_provider_fixture_overrides(metafunc.config, available_fixtures)
|
|
or DEFAULT_PROVIDER_COMBINATIONS
|
|
)
|
|
metafunc.parametrize("vector_io_stack", combinations, indirect=True)
|