fix: revert to using faiss for ollama distro

This commit is contained in:
Ashwin Bharambe 2025-03-10 15:04:50 -07:00
parent bc8daf7fea
commit bf44381044
7 changed files with 26 additions and 18 deletions

View file

@ -427,6 +427,7 @@
"chardet", "chardet",
"chromadb-client", "chromadb-client",
"datasets", "datasets",
"faiss-cpu",
"fastapi", "fastapi",
"fire", "fire",
"httpx", "httpx",
@ -448,7 +449,6 @@
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentencepiece", "sentencepiece",
"sqlite-vec",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn" "uvicorn"

View file

@ -23,7 +23,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
| vector_io | `inline::sqlite-vec`, `remote::chromadb`, `remote::pgvector` | | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration. You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.

View file

@ -34,6 +34,8 @@ def available_providers() -> List[ProviderSpec]:
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
), ),
# NOTE: sqlite-vec cannot be bundled into the container image because it does not have a
# source distribution and the wheels are not available for all platforms.
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
provider_type="inline::sqlite-vec", provider_type="inline::sqlite-vec",
@ -45,7 +47,7 @@ def available_providers() -> List[ProviderSpec]:
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
provider_type="inline::sqlite_vec", provider_type="inline::sqlite_vec",
pip_packages=["sqlite-vec"], pip_packages=["sqlite-vec --no-binary=sqlite-vec"],
module="llama_stack.providers.inline.vector_io.sqlite_vec", module="llama_stack.providers.inline.vector_io.sqlite_vec",
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.", deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",

View file

@ -5,7 +5,7 @@ distribution_spec:
inference: inference:
- remote::ollama - remote::ollama
vector_io: vector_io:
- inline::sqlite-vec - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector
safety: safety:

View file

@ -13,7 +13,7 @@ from llama_stack.distribution.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -21,7 +21,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::ollama"], "inference": ["remote::ollama"],
"vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -43,10 +43,10 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="remote::ollama", provider_type="remote::ollama",
config=OllamaImplConfig.sample_run_config(), config=OllamaImplConfig.sample_run_config(),
) )
vector_io_provider_sqlite = Provider( vector_io_provider_faiss = Provider(
provider_id="sqlite-vec", provider_id="faiss",
provider_type="inline::sqlite-vec", provider_type="inline::faiss",
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
) )
inference_model = ModelInput( inference_model = ModelInput(
@ -96,7 +96,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider], "inference": [inference_provider],
"vector_io": [vector_io_provider_sqlite], "vector_io": [vector_io_provider_faiss],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
@ -104,7 +104,7 @@ def get_distribution_template() -> DistributionTemplate:
"run-with-safety.yaml": RunConfigSettings( "run-with-safety.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider], "inference": [inference_provider],
"vector_io": [vector_io_provider_sqlite], "vector_io": [vector_io_provider_faiss],
"safety": [ "safety": [
Provider( Provider(
provider_id="llama-guard", provider_id="llama-guard",

View file

@ -17,10 +17,13 @@ providers:
config: config:
url: ${env.OLLAMA_URL:http://localhost:11434} url: ${env.OLLAMA_URL:http://localhost:11434}
vector_io: vector_io:
- provider_id: sqlite-vec - provider_id: faiss
provider_type: inline::sqlite-vec provider_type: inline::faiss
config: config:
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -17,10 +17,13 @@ providers:
config: config:
url: ${env.OLLAMA_URL:http://localhost:11434} url: ${env.OLLAMA_URL:http://localhost:11434}
vector_io: vector_io:
- provider_id: sqlite-vec - provider_id: faiss
provider_type: inline::sqlite-vec provider_type: inline::faiss
config: config:
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard