chore: move embedding deps to RAG tool where they are needed (#1210)

`EMBEDDING_DEPS` were wrongly associated with `vector_io` providers.
They are needed by
https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/utils/memory/vector_store.py#L142
and related code and is used by the RAG tool and as such should only be
needed by the `inline::rag-runtime` provider.
This commit is contained in:
Ashwin Bharambe 2025-02-21 11:33:41 -08:00 committed by GitHub
parent 11697f85c5
commit 992f865b2e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 85 additions and 132 deletions

View file

@ -30,9 +30,7 @@
"sentencepiece", "sentencepiece",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"cerebras": [ "cerebras": [
"aiosqlite", "aiosqlite",
@ -170,9 +168,7 @@
"sentencepiece", "sentencepiece",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"hf-serverless": [ "hf-serverless": [
"aiohttp", "aiohttp",
@ -247,9 +243,7 @@
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn",
"zmq", "zmq"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"meta-reference-quantized-gpu": [ "meta-reference-quantized-gpu": [
"accelerate", "accelerate",
@ -290,9 +284,7 @@
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn",
"zmq", "zmq"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"nvidia": [ "nvidia": [
"aiosqlite", "aiosqlite",
@ -323,9 +315,7 @@
"sentencepiece", "sentencepiece",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"ollama": [ "ollama": [
"aiohttp", "aiohttp",
@ -335,7 +325,6 @@
"chardet", "chardet",
"chromadb-client", "chromadb-client",
"datasets", "datasets",
"faiss-cpu",
"fastapi", "fastapi",
"fire", "fire",
"httpx", "httpx",
@ -359,9 +348,7 @@
"sqlite-vec", "sqlite-vec",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"remote-vllm": [ "remote-vllm": [
"aiosqlite", "aiosqlite",
@ -424,9 +411,7 @@
"sentencepiece", "sentencepiece",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"tgi": [ "tgi": [
"aiohttp", "aiohttp",

View file

@ -8,7 +8,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::cerebras` | | inference | `remote::cerebras`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -19,7 +19,7 @@ The `llamastack/distribution-dell` distribution consists of the following provid
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::tgi` | | inference | `remote::tgi`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -18,7 +18,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::fireworks` | | inference | `remote::fireworks`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -23,7 +23,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
| vector_io | `inline::faiss`, `inline::sqlite_vec`, `remote::chromadb`, `remote::pgvector` | | vector_io | `inline::sqlite-vec`, `remote::chromadb`, `remote::pgvector` |
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration. You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.

View file

@ -17,7 +17,7 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::vllm` | | inference | `remote::vllm`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -19,7 +19,7 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::tgi` | | inference | `remote::tgi`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -18,7 +18,7 @@ The `llamastack/distribution-together` distribution consists of the following pr
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::together` | | inference | `remote::together`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -178,6 +178,12 @@ class StackRun(Subcommand):
# else must be venv since that is the only valid option left. # else must be venv since that is the only valid option left.
current_venv = os.environ.get("VIRTUAL_ENV") current_venv = os.environ.get("VIRTUAL_ENV")
venv = args.image_name or current_venv venv = args.image_name or current_venv
if not venv:
cprint(
"No current virtual environment detected, please specify a virtual environment name with --image-name",
color="red",
)
return
script = importlib.resources.files("llama_stack") / "distribution/start_venv.sh" script = importlib.resources.files("llama_stack") / "distribution/start_venv.sh"
run_args = [ run_args = [
script, script,
@ -206,5 +212,4 @@ class StackRun(Subcommand):
if args.tls_keyfile and args.tls_certfile: if args.tls_keyfile and args.tls_certfile:
run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile]) run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
run_with_pty(run_args) run_with_pty(run_args)

View file

@ -44,7 +44,6 @@ class SentenceTransformersInferenceImpl(
pass pass
async def register_model(self, model: Model) -> None: async def register_model(self, model: Model) -> None:
_ = self._load_sentence_transformer_model(model.provider_resource_id)
return model return model
async def unregister_model(self, model_id: str) -> None: async def unregister_model(self, model_id: str) -> None:

View file

@ -61,7 +61,10 @@ def available_providers() -> List[ProviderSpec]:
InlineProviderSpec( InlineProviderSpec(
api=Api.inference, api=Api.inference,
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
pip_packages=["sentence-transformers"], pip_packages=[
"torch torchvision --index-url https://download.pytorch.org/whl/cpu",
"sentence-transformers --no-deps",
],
module="llama_stack.providers.inline.inference.sentence_transformers", module="llama_stack.providers.inline.inference.sentence_transformers",
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig", config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
), ),

View file

@ -20,7 +20,18 @@ def available_providers() -> List[ProviderSpec]:
InlineProviderSpec( InlineProviderSpec(
api=Api.tool_runtime, api=Api.tool_runtime,
provider_type="inline::rag-runtime", provider_type="inline::rag-runtime",
pip_packages=[], pip_packages=[
"blobfile",
"chardet",
"pypdf",
"tqdm",
"numpy",
"scikit-learn",
"scipy",
"nltk",
"sentencepiece",
"transformers",
],
module="llama_stack.providers.inline.tool_runtime.rag", module="llama_stack.providers.inline.tool_runtime.rag",
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig", config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
api_dependencies=[Api.vector_io, Api.inference], api_dependencies=[Api.vector_io, Api.inference],

View file

@ -14,33 +14,13 @@ from llama_stack.providers.datatypes import (
remote_provider_spec, remote_provider_spec,
) )
EMBEDDING_DEPS = [
"blobfile",
"chardet",
"pypdf",
"tqdm",
"numpy",
"scikit-learn",
"scipy",
"nltk",
"sentencepiece",
"transformers",
# this happens to work because special dependencies are always installed last
# so if there was a regular torch installed first, this would be ignored
# we need a better way to do this to identify potential conflicts, etc.
# for now, this lets us significantly reduce the size of the container which
# does not have any "local" inference code (and hence does not need GPU-enabled torch)
"torch torchvision --index-url https://download.pytorch.org/whl/cpu",
"sentence-transformers --no-deps",
]
def available_providers() -> List[ProviderSpec]: def available_providers() -> List[ProviderSpec]:
return [ return [
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
provider_type="inline::meta-reference", provider_type="inline::meta-reference",
pip_packages=EMBEDDING_DEPS + ["faiss-cpu"], pip_packages=["faiss-cpu"],
module="llama_stack.providers.inline.vector_io.faiss", module="llama_stack.providers.inline.vector_io.faiss",
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
deprecation_warning="Please use the `inline::faiss` provider instead.", deprecation_warning="Please use the `inline::faiss` provider instead.",
@ -49,24 +29,33 @@ def available_providers() -> List[ProviderSpec]:
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
provider_type="inline::faiss", provider_type="inline::faiss",
pip_packages=EMBEDDING_DEPS + ["faiss-cpu"], pip_packages=["faiss-cpu"],
module="llama_stack.providers.inline.vector_io.faiss", module="llama_stack.providers.inline.vector_io.faiss",
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
provider_type="inline::sqlite_vec", provider_type="inline::sqlite-vec",
pip_packages=EMBEDDING_DEPS + ["sqlite-vec"], pip_packages=["sqlite-vec"],
module="llama_stack.providers.inline.vector_io.sqlite_vec", module="llama_stack.providers.inline.vector_io.sqlite_vec",
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
), ),
InlineProviderSpec(
api=Api.vector_io,
provider_type="inline::sqlite_vec",
pip_packages=["sqlite-vec"],
module="llama_stack.providers.inline.vector_io.sqlite_vec",
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
api_dependencies=[Api.inference],
),
remote_provider_spec( remote_provider_spec(
Api.vector_io, Api.vector_io,
AdapterSpec( AdapterSpec(
adapter_type="chromadb", adapter_type="chromadb",
pip_packages=EMBEDDING_DEPS + ["chromadb-client"], pip_packages=["chromadb-client"],
module="llama_stack.providers.remote.vector_io.chroma", module="llama_stack.providers.remote.vector_io.chroma",
config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig", config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig",
), ),
@ -75,7 +64,7 @@ def available_providers() -> List[ProviderSpec]:
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
provider_type="inline::chromadb", provider_type="inline::chromadb",
pip_packages=EMBEDDING_DEPS + ["chromadb"], pip_packages=["chromadb"],
module="llama_stack.providers.inline.vector_io.chroma", module="llama_stack.providers.inline.vector_io.chroma",
config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
@ -84,7 +73,7 @@ def available_providers() -> List[ProviderSpec]:
Api.vector_io, Api.vector_io,
AdapterSpec( AdapterSpec(
adapter_type="pgvector", adapter_type="pgvector",
pip_packages=EMBEDDING_DEPS + ["psycopg2-binary"], pip_packages=["psycopg2-binary"],
module="llama_stack.providers.remote.vector_io.pgvector", module="llama_stack.providers.remote.vector_io.pgvector",
config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig", config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig",
), ),
@ -94,7 +83,7 @@ def available_providers() -> List[ProviderSpec]:
Api.vector_io, Api.vector_io,
AdapterSpec( AdapterSpec(
adapter_type="weaviate", adapter_type="weaviate",
pip_packages=EMBEDDING_DEPS + ["weaviate-client"], pip_packages=["weaviate-client"],
module="llama_stack.providers.remote.vector_io.weaviate", module="llama_stack.providers.remote.vector_io.weaviate",
config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig", config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig",
provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData", provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
@ -115,7 +104,7 @@ def available_providers() -> List[ProviderSpec]:
Api.vector_io, Api.vector_io,
AdapterSpec( AdapterSpec(
adapter_type="qdrant", adapter_type="qdrant",
pip_packages=EMBEDDING_DEPS + ["qdrant-client"], pip_packages=["qdrant-client"],
module="llama_stack.providers.remote.vector_io.qdrant", module="llama_stack.providers.remote.vector_io.qdrant",
config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig", config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig",
), ),

View file

@ -61,7 +61,7 @@ def vector_io_sqlite_vec() -> ProviderFixture:
providers=[ providers=[
Provider( Provider(
provider_id="sqlite_vec", provider_id="sqlite_vec",
provider_type="inline::sqlite_vec", provider_type="inline::sqlite-vec",
config=SQLiteVectorIOConfig( config=SQLiteVectorIOConfig(
kvstore=SqliteKVStoreConfig(db_path=temp_file.name).model_dump(), kvstore=SqliteKVStoreConfig(db_path=temp_file.name).model_dump(),
).model_dump(), ).model_dump(),

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::cerebras - remote::cerebras
- inline::sentence-transformers
safety: safety:
- inline::llama-guard - inline::llama-guard
vector_io: vector_io:

View file

@ -20,7 +20,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::cerebras"], "inference": ["remote::cerebras", "inline::sentence-transformers"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -5,6 +5,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::tgi - remote::tgi
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -20,7 +20,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::tgi"], "inference": ["remote::tgi", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::fireworks - remote::fireworks
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -25,7 +25,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::fireworks"], "inference": ["remote::fireworks", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::hf::serverless - remote::hf::serverless
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -21,7 +21,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::hf::serverless"], "inference": ["remote::hf::serverless", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -5,8 +5,7 @@ distribution_spec:
inference: inference:
- remote::ollama - remote::ollama
vector_io: vector_io:
- inline::faiss - inline::sqlite-vec
- inline::sqlite_vec
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector
safety: safety:

View file

@ -13,10 +13,6 @@ from llama_stack.distribution.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -25,7 +21,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::ollama"], "inference": ["remote::ollama"],
"vector_io": ["inline::faiss", "inline::sqlite_vec", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -45,19 +41,9 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="remote::ollama", provider_type="remote::ollama",
config=OllamaImplConfig.sample_run_config(), config=OllamaImplConfig.sample_run_config(),
) )
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
vector_io_provider_faiss = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
)
vector_io_provider_sqlite = Provider( vector_io_provider_sqlite = Provider(
provider_id="sqlite_vec", provider_id="sqlite-vec",
provider_type="inline::sqlite_vec", provider_type="inline::sqlite-vec",
config=SQLiteVectorIOConfig.sample_run_config(f"distributions/{name}"), config=SQLiteVectorIOConfig.sample_run_config(f"distributions/{name}"),
) )
@ -104,19 +90,16 @@ def get_distribution_template() -> DistributionTemplate:
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider],
"vector_io": [vector_io_provider_faiss, vector_io_provider_sqlite], "vector_io": [vector_io_provider_sqlite],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
), ),
"run-with-safety.yaml": RunConfigSettings( "run-with-safety.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [ "inference": [inference_provider],
inference_provider, "vector_io": [vector_io_provider_sqlite],
embedding_provider,
],
"vector_io": [vector_io_provider_faiss, vector_io_provider_faiss],
"safety": [ "safety": [
Provider( Provider(
provider_id="llama-guard", provider_id="llama-guard",

View file

@ -16,24 +16,11 @@ providers:
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:http://localhost:11434} url: ${env.OLLAMA_URL:http://localhost:11434}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io: vector_io:
- provider_id: faiss - provider_id: sqlite-vec
provider_type: inline::faiss provider_type: inline::sqlite-vec
config: config:
kvstore: db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -16,19 +16,9 @@ providers:
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:http://localhost:11434} url: ${env.OLLAMA_URL:http://localhost:11434}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io: vector_io:
- provider_id: faiss - provider_id: sqlite-vec
provider_type: inline::faiss provider_type: inline::sqlite-vec
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
- provider_id: sqlite_vec
provider_type: inline::sqlite_vec
config: config:
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
safety: safety:
@ -97,12 +87,6 @@ models:
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: ollama provider_id: ollama
model_type: llm model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: ollama
provider_model_id: all-minilm:latest
model_type: embedding
shields: [] shields: []
vector_dbs: [] vector_dbs: []
datasets: [] datasets: []

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::vllm - remote::vllm
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -23,7 +23,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::vllm"], "inference": ["remote::vllm", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::tgi - remote::tgi
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -23,7 +23,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::tgi"], "inference": ["remote::tgi", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::together - remote::together
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -25,7 +25,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::together"], "inference": ["remote::together", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- inline::vllm - inline::vllm
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -20,7 +20,7 @@ from llama_stack.templates.template import (
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["inline::vllm"], "inference": ["inline::vllm", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],