diff --git a/distributions/dependencies.json b/distributions/dependencies.json index 59b0c9e62..97aecc719 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -427,6 +427,7 @@ "chardet", "chromadb-client", "datasets", + "faiss-cpu", "fastapi", "fire", "httpx", @@ -448,7 +449,6 @@ "scikit-learn", "scipy", "sentencepiece", - "sqlite-vec", "tqdm", "transformers", "uvicorn" diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index a6390de34..9bfa4211c 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -23,7 +23,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` | -| vector_io | `inline::sqlite-vec`, `remote::chromadb`, `remote::pgvector` | +| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration. diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index b15b71622..8471748d8 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -34,6 +34,8 @@ def available_providers() -> List[ProviderSpec]: config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", api_dependencies=[Api.inference], ), + # NOTE: sqlite-vec cannot be bundled into the container image because it does not have a + # source distribution and the wheels are not available for all platforms. InlineProviderSpec( api=Api.vector_io, provider_type="inline::sqlite-vec", diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index 58bd8e854..37b72fc1f 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -5,7 +5,7 @@ distribution_spec: inference: - remote::ollama vector_io: - - inline::sqlite-vec + - inline::faiss - remote::chromadb - remote::pgvector safety: diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 16d8a259f..2d753d3e4 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -13,7 +13,7 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig +from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.ollama import OllamaImplConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -21,7 +21,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["remote::ollama"], - "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], + "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], @@ -43,10 +43,10 @@ def get_distribution_template() -> DistributionTemplate: provider_type="remote::ollama", config=OllamaImplConfig.sample_run_config(), ) - vector_io_provider_sqlite = Provider( - provider_id="sqlite-vec", - provider_type="inline::sqlite-vec", - config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + vector_io_provider_faiss = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), ) inference_model = ModelInput( @@ -96,7 +96,7 @@ def get_distribution_template() -> DistributionTemplate: "run.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], - "vector_io": [vector_io_provider_sqlite], + "vector_io": [vector_io_provider_faiss], }, default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, @@ -104,7 +104,7 @@ def get_distribution_template() -> DistributionTemplate: "run-with-safety.yaml": RunConfigSettings( provider_overrides={ "inference": [inference_provider], - "vector_io": [vector_io_provider_sqlite], + "vector_io": [vector_io_provider_faiss], "safety": [ Provider( provider_id="llama-guard", diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index c8d5a22a4..a96031272 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -17,10 +17,13 @@ providers: config: url: ${env.OLLAMA_URL:http://localhost:11434} vector_io: - - provider_id: sqlite-vec - provider_type: inline::sqlite-vec + - provider_id: faiss + provider_type: inline::faiss config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index fa21170d2..661d880a7 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -17,10 +17,13 @@ providers: config: url: ${env.OLLAMA_URL:http://localhost:11434} vector_io: - - provider_id: sqlite-vec - provider_type: inline::sqlite-vec + - provider_id: faiss + provider_type: inline::faiss config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard provider_type: inline::llama-guard