diff --git a/distributions/dependencies.json b/distributions/dependencies.json index df63c0773..9e468f08d 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -30,9 +30,7 @@ "sentencepiece", "tqdm", "transformers", - "uvicorn", - "sentence-transformers --no-deps", - "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + "uvicorn" ], "cerebras": [ "aiosqlite", @@ -170,9 +168,7 @@ "sentencepiece", "tqdm", "transformers", - "uvicorn", - "sentence-transformers --no-deps", - "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + "uvicorn" ], "hf-serverless": [ "aiohttp", @@ -247,9 +243,7 @@ "tqdm", "transformers", "uvicorn", - "zmq", - "sentence-transformers --no-deps", - "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + "zmq" ], "meta-reference-quantized-gpu": [ "accelerate", @@ -290,9 +284,7 @@ "tqdm", "transformers", "uvicorn", - "zmq", - "sentence-transformers --no-deps", - "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + "zmq" ], "nvidia": [ "aiosqlite", @@ -323,9 +315,7 @@ "sentencepiece", "tqdm", "transformers", - "uvicorn", - "sentence-transformers --no-deps", - "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + "uvicorn" ], "ollama": [ "aiohttp", @@ -335,7 +325,6 @@ "chardet", "chromadb-client", "datasets", - "faiss-cpu", "fastapi", "fire", "httpx", @@ -359,9 +348,7 @@ "sqlite-vec", "tqdm", "transformers", - "uvicorn", - "sentence-transformers --no-deps", - "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + "uvicorn" ], "remote-vllm": [ "aiosqlite", @@ -424,9 +411,7 @@ "sentencepiece", "tqdm", "transformers", - "uvicorn", - "sentence-transformers --no-deps", - "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + "uvicorn" ], "tgi": [ "aiohttp", diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md index a0c9eb263..6e2af14fd 100644 --- a/docs/source/distributions/self_hosted_distro/cerebras.md +++ b/docs/source/distributions/self_hosted_distro/cerebras.md @@ -8,7 +8,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr | agents | `inline::meta-reference` | | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | -| inference | `remote::cerebras` | +| inference | `remote::cerebras`, `inline::sentence-transformers` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/dell.md b/docs/source/distributions/self_hosted_distro/dell.md index aef3ecf58..f49b332a9 100644 --- a/docs/source/distributions/self_hosted_distro/dell.md +++ b/docs/source/distributions/self_hosted_distro/dell.md @@ -19,7 +19,7 @@ The `llamastack/distribution-dell` distribution consists of the following provid | agents | `inline::meta-reference` | | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | -| inference | `remote::tgi` | +| inference | `remote::tgi`, `inline::sentence-transformers` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index 7951e148e..f69e6d963 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -18,7 +18,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p | agents | `inline::meta-reference` | | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | -| inference | `remote::fireworks` | +| inference | `remote::fireworks`, `inline::sentence-transformers` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index b800b4a43..a487109c8 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -23,7 +23,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` | -| vector_io | `inline::faiss`, `inline::sqlite_vec`, `remote::chromadb`, `remote::pgvector` | +| vector_io | `inline::sqlite-vec`, `remote::chromadb`, `remote::pgvector` | You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration. diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md index 6c3bbd1d0..01f38807b 100644 --- a/docs/source/distributions/self_hosted_distro/remote-vllm.md +++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md @@ -17,7 +17,7 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following | agents | `inline::meta-reference` | | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | -| inference | `remote::vllm` | +| inference | `remote::vllm`, `inline::sentence-transformers` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md index f4eecf2cd..80baf9c81 100644 --- a/docs/source/distributions/self_hosted_distro/tgi.md +++ b/docs/source/distributions/self_hosted_distro/tgi.md @@ -19,7 +19,7 @@ The `llamastack/distribution-tgi` distribution consists of the following provide | agents | `inline::meta-reference` | | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | -| inference | `remote::tgi` | +| inference | `remote::tgi`, `inline::sentence-transformers` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index 936ae58f5..7af0dcf4d 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -18,7 +18,7 @@ The `llamastack/distribution-together` distribution consists of the following pr | agents | `inline::meta-reference` | | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | -| inference | `remote::together` | +| inference | `remote::together`, `inline::sentence-transformers` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 73536491b..0c9c74518 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -178,6 +178,12 @@ class StackRun(Subcommand): # else must be venv since that is the only valid option left. current_venv = os.environ.get("VIRTUAL_ENV") venv = args.image_name or current_venv + if not venv: + cprint( + "No current virtual environment detected, please specify a virtual environment name with --image-name", + color="red", + ) + return script = importlib.resources.files("llama_stack") / "distribution/start_venv.sh" run_args = [ script, @@ -206,5 +212,4 @@ class StackRun(Subcommand): if args.tls_keyfile and args.tls_certfile: run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile]) - run_with_pty(run_args) diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py index 6a83836e6..bfb09af53 100644 --- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py @@ -44,7 +44,6 @@ class SentenceTransformersInferenceImpl( pass async def register_model(self, model: Model) -> None: - _ = self._load_sentence_transformer_model(model.provider_resource_id) return model async def unregister_model(self, model_id: str) -> None: diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 346a2bd73..b0402f6a5 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -61,7 +61,10 @@ def available_providers() -> List[ProviderSpec]: InlineProviderSpec( api=Api.inference, provider_type="inline::sentence-transformers", - pip_packages=["sentence-transformers"], + pip_packages=[ + "torch torchvision --index-url https://download.pytorch.org/whl/cpu", + "sentence-transformers --no-deps", + ], module="llama_stack.providers.inline.inference.sentence_transformers", config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig", ), diff --git a/llama_stack/providers/registry/tool_runtime.py b/llama_stack/providers/registry/tool_runtime.py index 33d880f30..95ea2dcf9 100644 --- a/llama_stack/providers/registry/tool_runtime.py +++ b/llama_stack/providers/registry/tool_runtime.py @@ -20,7 +20,18 @@ def available_providers() -> List[ProviderSpec]: InlineProviderSpec( api=Api.tool_runtime, provider_type="inline::rag-runtime", - pip_packages=[], + pip_packages=[ + "blobfile", + "chardet", + "pypdf", + "tqdm", + "numpy", + "scikit-learn", + "scipy", + "nltk", + "sentencepiece", + "transformers", + ], module="llama_stack.providers.inline.tool_runtime.rag", config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig", api_dependencies=[Api.vector_io, Api.inference], diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index 88a65397a..ff4f9caf5 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -14,33 +14,13 @@ from llama_stack.providers.datatypes import ( remote_provider_spec, ) -EMBEDDING_DEPS = [ - "blobfile", - "chardet", - "pypdf", - "tqdm", - "numpy", - "scikit-learn", - "scipy", - "nltk", - "sentencepiece", - "transformers", - # this happens to work because special dependencies are always installed last - # so if there was a regular torch installed first, this would be ignored - # we need a better way to do this to identify potential conflicts, etc. - # for now, this lets us significantly reduce the size of the container which - # does not have any "local" inference code (and hence does not need GPU-enabled torch) - "torch torchvision --index-url https://download.pytorch.org/whl/cpu", - "sentence-transformers --no-deps", -] - def available_providers() -> List[ProviderSpec]: return [ InlineProviderSpec( api=Api.vector_io, provider_type="inline::meta-reference", - pip_packages=EMBEDDING_DEPS + ["faiss-cpu"], + pip_packages=["faiss-cpu"], module="llama_stack.providers.inline.vector_io.faiss", config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", deprecation_warning="Please use the `inline::faiss` provider instead.", @@ -49,24 +29,33 @@ def available_providers() -> List[ProviderSpec]: InlineProviderSpec( api=Api.vector_io, provider_type="inline::faiss", - pip_packages=EMBEDDING_DEPS + ["faiss-cpu"], + pip_packages=["faiss-cpu"], module="llama_stack.providers.inline.vector_io.faiss", config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", api_dependencies=[Api.inference], ), InlineProviderSpec( api=Api.vector_io, - provider_type="inline::sqlite_vec", - pip_packages=EMBEDDING_DEPS + ["sqlite-vec"], + provider_type="inline::sqlite-vec", + pip_packages=["sqlite-vec"], module="llama_stack.providers.inline.vector_io.sqlite_vec", config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", api_dependencies=[Api.inference], ), + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::sqlite_vec", + pip_packages=["sqlite-vec"], + module="llama_stack.providers.inline.vector_io.sqlite_vec", + config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", + deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.", + api_dependencies=[Api.inference], + ), remote_provider_spec( Api.vector_io, AdapterSpec( adapter_type="chromadb", - pip_packages=EMBEDDING_DEPS + ["chromadb-client"], + pip_packages=["chromadb-client"], module="llama_stack.providers.remote.vector_io.chroma", config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig", ), @@ -75,7 +64,7 @@ def available_providers() -> List[ProviderSpec]: InlineProviderSpec( api=Api.vector_io, provider_type="inline::chromadb", - pip_packages=EMBEDDING_DEPS + ["chromadb"], + pip_packages=["chromadb"], module="llama_stack.providers.inline.vector_io.chroma", config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig", api_dependencies=[Api.inference], @@ -84,7 +73,7 @@ def available_providers() -> List[ProviderSpec]: Api.vector_io, AdapterSpec( adapter_type="pgvector", - pip_packages=EMBEDDING_DEPS + ["psycopg2-binary"], + pip_packages=["psycopg2-binary"], module="llama_stack.providers.remote.vector_io.pgvector", config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig", ), @@ -94,7 +83,7 @@ def available_providers() -> List[ProviderSpec]: Api.vector_io, AdapterSpec( adapter_type="weaviate", - pip_packages=EMBEDDING_DEPS + ["weaviate-client"], + pip_packages=["weaviate-client"], module="llama_stack.providers.remote.vector_io.weaviate", config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig", provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData", @@ -115,7 +104,7 @@ def available_providers() -> List[ProviderSpec]: Api.vector_io, AdapterSpec( adapter_type="qdrant", - pip_packages=EMBEDDING_DEPS + ["qdrant-client"], + pip_packages=["qdrant-client"], module="llama_stack.providers.remote.vector_io.qdrant", config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig", ), diff --git a/llama_stack/providers/tests/vector_io/fixtures.py b/llama_stack/providers/tests/vector_io/fixtures.py index 1797d47a5..c29717a27 100644 --- a/llama_stack/providers/tests/vector_io/fixtures.py +++ b/llama_stack/providers/tests/vector_io/fixtures.py @@ -61,7 +61,7 @@ def vector_io_sqlite_vec() -> ProviderFixture: providers=[ Provider( provider_id="sqlite_vec", - provider_type="inline::sqlite_vec", + provider_type="inline::sqlite-vec", config=SQLiteVectorIOConfig( kvstore=SqliteKVStoreConfig(db_path=temp_file.name).model_dump(), ).model_dump(), diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml index 9d5ab1a52..ef6c43212 100644 --- a/llama_stack/templates/cerebras/build.yaml +++ b/llama_stack/templates/cerebras/build.yaml @@ -4,6 +4,7 @@ distribution_spec: providers: inference: - remote::cerebras + - inline::sentence-transformers safety: - inline::llama-guard vector_io: diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index c467579ac..544a50c03 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -20,7 +20,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["remote::cerebras"], + "inference": ["remote::cerebras", "inline::sentence-transformers"], "safety": ["inline::llama-guard"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "agents": ["inline::meta-reference"], diff --git a/llama_stack/templates/dell/build.yaml b/llama_stack/templates/dell/build.yaml index e2edb9386..05b98d56f 100644 --- a/llama_stack/templates/dell/build.yaml +++ b/llama_stack/templates/dell/build.yaml @@ -5,6 +5,7 @@ distribution_spec: providers: inference: - remote::tgi + - inline::sentence-transformers vector_io: - inline::faiss - remote::chromadb diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py index 116fbd285..8348beafd 100644 --- a/llama_stack/templates/dell/dell.py +++ b/llama_stack/templates/dell/dell.py @@ -20,7 +20,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["remote::tgi"], + "inference": ["remote::tgi", "inline::sentence-transformers"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml index cdd60ec2a..a9c472c53 100644 --- a/llama_stack/templates/fireworks/build.yaml +++ b/llama_stack/templates/fireworks/build.yaml @@ -4,6 +4,7 @@ distribution_spec: providers: inference: - remote::fireworks + - inline::sentence-transformers vector_io: - inline::faiss - remote::chromadb diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index 06b851551..4457296b0 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -25,7 +25,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["remote::fireworks"], + "inference": ["remote::fireworks", "inline::sentence-transformers"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml index f9303cfab..c0cc1e2c2 100644 --- a/llama_stack/templates/hf-serverless/build.yaml +++ b/llama_stack/templates/hf-serverless/build.yaml @@ -4,6 +4,7 @@ distribution_spec: providers: inference: - remote::hf::serverless + - inline::sentence-transformers vector_io: - inline::faiss - remote::chromadb diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index 46efb6f0b..af04e39d4 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -21,7 +21,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["remote::hf::serverless"], + "inference": ["remote::hf::serverless", "inline::sentence-transformers"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index 48960c5ba..52a50b38a 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -5,8 +5,7 @@ distribution_spec: inference: - remote::ollama vector_io: - - inline::faiss - - inline::sqlite_vec + - inline::sqlite-vec - remote::chromadb - remote::pgvector safety: diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 2b135c008..4f644c270 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -13,10 +13,6 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.providers.inline.inference.sentence_transformers import ( - SentenceTransformersInferenceConfig, -) -from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig from llama_stack.providers.remote.inference.ollama import OllamaImplConfig from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -25,7 +21,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["remote::ollama"], - "vector_io": ["inline::faiss", "inline::sqlite_vec", "remote::chromadb", "remote::pgvector"], + "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], @@ -45,19 +41,9 @@ def get_distribution_template() -> DistributionTemplate: provider_type="remote::ollama", config=OllamaImplConfig.sample_run_config(), ) - embedding_provider = Provider( - provider_id="sentence-transformers", - provider_type="inline::sentence-transformers", - config=SentenceTransformersInferenceConfig.sample_run_config(), - ) - vector_io_provider_faiss = Provider( - provider_id="faiss", - provider_type="inline::faiss", - config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), - ) vector_io_provider_sqlite = Provider( - provider_id="sqlite_vec", - provider_type="inline::sqlite_vec", + provider_id="sqlite-vec", + provider_type="inline::sqlite-vec", config=SQLiteVectorIOConfig.sample_run_config(f"distributions/{name}"), ) @@ -104,19 +90,16 @@ def get_distribution_template() -> DistributionTemplate: run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ - "inference": [inference_provider, embedding_provider], - "vector_io": [vector_io_provider_faiss, vector_io_provider_sqlite], + "inference": [inference_provider], + "vector_io": [vector_io_provider_sqlite], }, - default_models=[inference_model, embedding_model], + default_models=[inference_model], default_tool_groups=default_tool_groups, ), "run-with-safety.yaml": RunConfigSettings( provider_overrides={ - "inference": [ - inference_provider, - embedding_provider, - ], - "vector_io": [vector_io_provider_faiss, vector_io_provider_faiss], + "inference": [inference_provider], + "vector_io": [vector_io_provider_sqlite], "safety": [ Provider( provider_id="llama-guard", diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 7cf527c04..063840a50 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -16,24 +16,11 @@ providers: provider_type: remote::ollama config: url: ${env.OLLAMA_URL:http://localhost:11434} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} vector_io: - - provider_id: faiss - provider_type: inline::faiss + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 1f45fc228..d64e07347 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -16,19 +16,9 @@ providers: provider_type: remote::ollama config: url: ${env.OLLAMA_URL:http://localhost:11434} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db - - provider_id: sqlite_vec - provider_type: inline::sqlite_vec + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec config: db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db safety: @@ -97,12 +87,6 @@ models: model_id: ${env.INFERENCE_MODEL} provider_id: ollama model_type: llm -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: ollama - provider_model_id: all-minilm:latest - model_type: embedding shields: [] vector_dbs: [] datasets: [] diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml index 74d9f32d9..ccb328c1c 100644 --- a/llama_stack/templates/remote-vllm/build.yaml +++ b/llama_stack/templates/remote-vllm/build.yaml @@ -4,6 +4,7 @@ distribution_spec: providers: inference: - remote::vllm + - inline::sentence-transformers vector_io: - inline::faiss - remote::chromadb diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 40a2d541d..10d291456 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -23,7 +23,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["remote::vllm"], + "inference": ["remote::vllm", "inline::sentence-transformers"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml index 8bc628158..9fe79647c 100644 --- a/llama_stack/templates/tgi/build.yaml +++ b/llama_stack/templates/tgi/build.yaml @@ -4,6 +4,7 @@ distribution_spec: providers: inference: - remote::tgi + - inline::sentence-transformers vector_io: - inline::faiss - remote::chromadb diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index 71718a93d..9b80414f9 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -23,7 +23,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["remote::tgi"], + "inference": ["remote::tgi", "inline::sentence-transformers"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml index 90ee5bcee..a8a6de28d 100644 --- a/llama_stack/templates/together/build.yaml +++ b/llama_stack/templates/together/build.yaml @@ -4,6 +4,7 @@ distribution_spec: providers: inference: - remote::together + - inline::sentence-transformers vector_io: - inline::faiss - remote::chromadb diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index d275b7238..8d0e2353c 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -25,7 +25,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["remote::together"], + "inference": ["remote::together", "inline::sentence-transformers"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml index d24046613..8eb44dc1b 100644 --- a/llama_stack/templates/vllm-gpu/build.yaml +++ b/llama_stack/templates/vllm-gpu/build.yaml @@ -4,6 +4,7 @@ distribution_spec: providers: inference: - inline::vllm + - inline::sentence-transformers vector_io: - inline::faiss - remote::chromadb diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index 31900687b..8cdec589e 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -20,7 +20,7 @@ from llama_stack.templates.template import ( def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["inline::vllm"], + "inference": ["inline::vllm", "inline::sentence-transformers"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"],