mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-23 04:53:14 +00:00
# What does this PR do? This PR adds `sqlite_vec` as an additional inline vectordb. Tested with `ollama` by adding the `vector_io` object in `./llama_stack/templates/ollama/run.yaml` : ```yaml vector_io: - provider_id: sqlite_vec provider_type: inline::sqlite_vec config: kvstore: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db ``` I also updated the `./tests/client-sdk/vector_io/test_vector_io.py` test file with: ```python INLINE_VECTOR_DB_PROVIDERS = ["faiss", "sqlite_vec"] ``` And parameterized the relevant tests. [//]: # (If resolving an issue, uncomment and update the line below) # Closes https://github.com/meta-llama/llama-stack/issues/1005 ## Test Plan I ran the tests with: ```bash INFERENCE_MODEL=llama3.2:3b-instruct-fp16 LLAMA_STACK_CONFIG=ollama pytest -s -v tests/client-sdk/vector_io/test_vector_io.py ``` Which outputs: ```python ... PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_retrieve[all-MiniLM-L6-v2-sqlite_vec] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_list PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_register[all-MiniLM-L6-v2-faiss] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_register[all-MiniLM-L6-v2-sqlite_vec] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_unregister[faiss] PASSED tests/client-sdk/vector_io/test_vector_io.py::test_vector_db_unregister[sqlite_vec] PASSED ``` In addition, I ran the `rag_with_vector_db.py` [example](https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/rag_with_vector_db.py) using the script below with `uv run rag_example.py`. <details> <summary>CLICK TO SHOW SCRIPT 👋 </summary> ```python #!/usr/bin/env python3 import os import uuid from termcolor import cprint # Set environment variables os.environ['INFERENCE_MODEL'] = 'llama3.2:3b-instruct-fp16' os.environ['LLAMA_STACK_CONFIG'] = 'ollama' # Import libraries after setting environment variables from llama_stack.distribution.library_client import LlamaStackAsLibraryClient from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.lib.agents.event_logger import EventLogger from llama_stack_client.types.agent_create_params import AgentConfig from llama_stack_client.types import Document def main(): # Initialize the client client = LlamaStackAsLibraryClient("ollama") vector_db_id = f"test-vector-db-{uuid.uuid4().hex}" _ = client.initialize() model_id = 'llama3.2:3b-instruct-fp16' # Define the list of document URLs and create Document objects urls = [ "chat.rst", "llama3.rst", "memory_optimizations.rst", "lora_finetune.rst", ] documents = [ Document( document_id=f"num-{i}", content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}", mime_type="text/plain", metadata={}, ) for i, url in enumerate(urls) ] # (Optional) Use the documents as needed with your client here client.vector_dbs.register( provider_id='sqlite_vec', vector_db_id=vector_db_id, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, ) client.tool_runtime.rag_tool.insert( documents=documents, vector_db_id=vector_db_id, chunk_size_in_tokens=512, ) # Create agent configuration agent_config = AgentConfig( model=model_id, instructions="You are a helpful assistant", enable_session_persistence=False, toolgroups=[ { "name": "builtin::rag", "args": { "vector_db_ids": [vector_db_id], } } ], ) # Instantiate the Agent agent = Agent(client, agent_config) # List of user prompts user_prompts = [ "What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.", "Was anything related to 'Llama3' discussed, if so what?", "Tell me how to use LoRA", "What about Quantization?", ] # Create a session for the agent session_id = agent.create_session("test-session") # Process each prompt and display the output for prompt in user_prompts: cprint(f"User> {prompt}", "green") response = agent.create_turn( messages=[ { "role": "user", "content": prompt, } ], session_id=session_id, ) # Log and print events from the response for log in EventLogger().log(response): log.print() if __name__ == "__main__": main() ``` </details> Which outputs a large summary of RAG generation. # Documentation Will handle documentation updates in follow-up PR. # (- [ ] Added a Changelog entry if the change is significant) --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
122 lines
3.2 KiB
YAML
122 lines
3.2 KiB
YAML
version: '2'
|
|
image_name: ollama
|
|
apis:
|
|
- agents
|
|
- datasetio
|
|
- eval
|
|
- inference
|
|
- safety
|
|
- scoring
|
|
- telemetry
|
|
- tool_runtime
|
|
- vector_io
|
|
providers:
|
|
inference:
|
|
- provider_id: ollama
|
|
provider_type: remote::ollama
|
|
config:
|
|
url: ${env.OLLAMA_URL:http://localhost:11434}
|
|
- provider_id: sentence-transformers
|
|
provider_type: inline::sentence-transformers
|
|
config: {}
|
|
vector_io:
|
|
- provider_id: faiss
|
|
provider_type: inline::faiss
|
|
config:
|
|
kvstore:
|
|
type: sqlite
|
|
namespace: null
|
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
|
|
- provider_id: sqlite_vec
|
|
provider_type: inline::sqlite_vec
|
|
config:
|
|
kvstore:
|
|
type: sqlite
|
|
namespace: null
|
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
|
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
|
|
safety:
|
|
- provider_id: llama-guard
|
|
provider_type: inline::llama-guard
|
|
config: {}
|
|
agents:
|
|
- provider_id: meta-reference
|
|
provider_type: inline::meta-reference
|
|
config:
|
|
persistence_store:
|
|
type: sqlite
|
|
namespace: null
|
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
|
|
telemetry:
|
|
- provider_id: meta-reference
|
|
provider_type: inline::meta-reference
|
|
config:
|
|
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
|
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
|
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
|
|
eval:
|
|
- provider_id: meta-reference
|
|
provider_type: inline::meta-reference
|
|
config: {}
|
|
datasetio:
|
|
- provider_id: huggingface
|
|
provider_type: remote::huggingface
|
|
config: {}
|
|
- provider_id: localfs
|
|
provider_type: inline::localfs
|
|
config: {}
|
|
scoring:
|
|
- provider_id: basic
|
|
provider_type: inline::basic
|
|
config: {}
|
|
- provider_id: llm-as-judge
|
|
provider_type: inline::llm-as-judge
|
|
config: {}
|
|
- provider_id: braintrust
|
|
provider_type: inline::braintrust
|
|
config:
|
|
openai_api_key: ${env.OPENAI_API_KEY:}
|
|
tool_runtime:
|
|
- provider_id: brave-search
|
|
provider_type: remote::brave-search
|
|
config:
|
|
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
|
max_results: 3
|
|
- provider_id: tavily-search
|
|
provider_type: remote::tavily-search
|
|
config:
|
|
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
|
max_results: 3
|
|
- provider_id: code-interpreter
|
|
provider_type: inline::code-interpreter
|
|
config: {}
|
|
- provider_id: rag-runtime
|
|
provider_type: inline::rag-runtime
|
|
config: {}
|
|
metadata_store:
|
|
type: sqlite
|
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
|
|
models:
|
|
- metadata: {}
|
|
model_id: ${env.INFERENCE_MODEL}
|
|
provider_id: ollama
|
|
model_type: llm
|
|
- metadata:
|
|
embedding_dimension: 384
|
|
model_id: all-MiniLM-L6-v2
|
|
provider_id: sentence-transformers
|
|
model_type: embedding
|
|
shields: []
|
|
vector_dbs: []
|
|
datasets: []
|
|
scoring_fns: []
|
|
eval_tasks: []
|
|
tool_groups:
|
|
- toolgroup_id: builtin::websearch
|
|
provider_id: tavily-search
|
|
- toolgroup_id: builtin::rag
|
|
provider_id: rag-runtime
|
|
- toolgroup_id: builtin::code_interpreter
|
|
provider_id: code-interpreter
|
|
server:
|
|
port: 8321
|