Move embedding deps to RAG tool where they are needed

This commit is contained in:
Ashwin Bharambe 2025-02-21 10:51:39 -08:00
parent 11697f85c5
commit cf744a97f0
32 changed files with 70 additions and 130 deletions

View file

@ -30,9 +30,7 @@
"sentencepiece", "sentencepiece",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"cerebras": [ "cerebras": [
"aiosqlite", "aiosqlite",
@ -170,9 +168,7 @@
"sentencepiece", "sentencepiece",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"hf-serverless": [ "hf-serverless": [
"aiohttp", "aiohttp",
@ -247,9 +243,7 @@
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn",
"zmq", "zmq"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"meta-reference-quantized-gpu": [ "meta-reference-quantized-gpu": [
"accelerate", "accelerate",
@ -290,9 +284,7 @@
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn",
"zmq", "zmq"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"nvidia": [ "nvidia": [
"aiosqlite", "aiosqlite",
@ -323,9 +315,7 @@
"sentencepiece", "sentencepiece",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"ollama": [ "ollama": [
"aiohttp", "aiohttp",
@ -335,7 +325,6 @@
"chardet", "chardet",
"chromadb-client", "chromadb-client",
"datasets", "datasets",
"faiss-cpu",
"fastapi", "fastapi",
"fire", "fire",
"httpx", "httpx",
@ -359,9 +348,7 @@
"sqlite-vec", "sqlite-vec",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"remote-vllm": [ "remote-vllm": [
"aiosqlite", "aiosqlite",
@ -424,9 +411,7 @@
"sentencepiece", "sentencepiece",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "uvicorn"
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"tgi": [ "tgi": [
"aiohttp", "aiohttp",

View file

@ -8,7 +8,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::cerebras` | | inference | `remote::cerebras`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -19,7 +19,7 @@ The `llamastack/distribution-dell` distribution consists of the following provid
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::tgi` | | inference | `remote::tgi`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -18,7 +18,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::fireworks` | | inference | `remote::fireworks`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -23,7 +23,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
| vector_io | `inline::faiss`, `inline::sqlite_vec`, `remote::chromadb`, `remote::pgvector` | | vector_io | `inline::sqlite-vec`, `remote::chromadb`, `remote::pgvector` |
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration. You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.

View file

@ -17,7 +17,7 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::vllm` | | inference | `remote::vllm`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -19,7 +19,7 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::tgi` | | inference | `remote::tgi`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -18,7 +18,7 @@ The `llamastack/distribution-together` distribution consists of the following pr
| agents | `inline::meta-reference` | | agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::together` | | inference | `remote::together`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |

View file

@ -61,7 +61,10 @@ def available_providers() -> List[ProviderSpec]:
InlineProviderSpec( InlineProviderSpec(
api=Api.inference, api=Api.inference,
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
pip_packages=["sentence-transformers"], pip_packages=[
"torch torchvision --index-url https://download.pytorch.org/whl/cpu",
"sentence-transformers --no-deps",
],
module="llama_stack.providers.inline.inference.sentence_transformers", module="llama_stack.providers.inline.inference.sentence_transformers",
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig", config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
), ),

View file

@ -20,7 +20,18 @@ def available_providers() -> List[ProviderSpec]:
InlineProviderSpec( InlineProviderSpec(
api=Api.tool_runtime, api=Api.tool_runtime,
provider_type="inline::rag-runtime", provider_type="inline::rag-runtime",
pip_packages=[], pip_packages=[
"blobfile",
"chardet",
"pypdf",
"tqdm",
"numpy",
"scikit-learn",
"scipy",
"nltk",
"sentencepiece",
"transformers",
],
module="llama_stack.providers.inline.tool_runtime.rag", module="llama_stack.providers.inline.tool_runtime.rag",
config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig", config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig",
api_dependencies=[Api.vector_io, Api.inference], api_dependencies=[Api.vector_io, Api.inference],

View file

@ -14,33 +14,13 @@ from llama_stack.providers.datatypes import (
remote_provider_spec, remote_provider_spec,
) )
EMBEDDING_DEPS = [
"blobfile",
"chardet",
"pypdf",
"tqdm",
"numpy",
"scikit-learn",
"scipy",
"nltk",
"sentencepiece",
"transformers",
# this happens to work because special dependencies are always installed last
# so if there was a regular torch installed first, this would be ignored
# we need a better way to do this to identify potential conflicts, etc.
# for now, this lets us significantly reduce the size of the container which
# does not have any "local" inference code (and hence does not need GPU-enabled torch)
"torch torchvision --index-url https://download.pytorch.org/whl/cpu",
"sentence-transformers --no-deps",
]
def available_providers() -> List[ProviderSpec]: def available_providers() -> List[ProviderSpec]:
return [ return [
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
provider_type="inline::meta-reference", provider_type="inline::meta-reference",
pip_packages=EMBEDDING_DEPS + ["faiss-cpu"], pip_packages=["faiss-cpu"],
module="llama_stack.providers.inline.vector_io.faiss", module="llama_stack.providers.inline.vector_io.faiss",
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
deprecation_warning="Please use the `inline::faiss` provider instead.", deprecation_warning="Please use the `inline::faiss` provider instead.",
@ -49,15 +29,15 @@ def available_providers() -> List[ProviderSpec]:
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
provider_type="inline::faiss", provider_type="inline::faiss",
pip_packages=EMBEDDING_DEPS + ["faiss-cpu"], pip_packages=["faiss-cpu"],
module="llama_stack.providers.inline.vector_io.faiss", module="llama_stack.providers.inline.vector_io.faiss",
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
provider_type="inline::sqlite_vec", provider_type="inline::sqlite-vec",
pip_packages=EMBEDDING_DEPS + ["sqlite-vec"], pip_packages=["sqlite-vec"],
module="llama_stack.providers.inline.vector_io.sqlite_vec", module="llama_stack.providers.inline.vector_io.sqlite_vec",
config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
@ -66,7 +46,7 @@ def available_providers() -> List[ProviderSpec]:
Api.vector_io, Api.vector_io,
AdapterSpec( AdapterSpec(
adapter_type="chromadb", adapter_type="chromadb",
pip_packages=EMBEDDING_DEPS + ["chromadb-client"], pip_packages=["chromadb-client"],
module="llama_stack.providers.remote.vector_io.chroma", module="llama_stack.providers.remote.vector_io.chroma",
config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig", config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig",
), ),
@ -75,7 +55,7 @@ def available_providers() -> List[ProviderSpec]:
InlineProviderSpec( InlineProviderSpec(
api=Api.vector_io, api=Api.vector_io,
provider_type="inline::chromadb", provider_type="inline::chromadb",
pip_packages=EMBEDDING_DEPS + ["chromadb"], pip_packages=["chromadb"],
module="llama_stack.providers.inline.vector_io.chroma", module="llama_stack.providers.inline.vector_io.chroma",
config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig", config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
api_dependencies=[Api.inference], api_dependencies=[Api.inference],
@ -84,7 +64,7 @@ def available_providers() -> List[ProviderSpec]:
Api.vector_io, Api.vector_io,
AdapterSpec( AdapterSpec(
adapter_type="pgvector", adapter_type="pgvector",
pip_packages=EMBEDDING_DEPS + ["psycopg2-binary"], pip_packages=["psycopg2-binary"],
module="llama_stack.providers.remote.vector_io.pgvector", module="llama_stack.providers.remote.vector_io.pgvector",
config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig", config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig",
), ),
@ -94,7 +74,7 @@ def available_providers() -> List[ProviderSpec]:
Api.vector_io, Api.vector_io,
AdapterSpec( AdapterSpec(
adapter_type="weaviate", adapter_type="weaviate",
pip_packages=EMBEDDING_DEPS + ["weaviate-client"], pip_packages=["weaviate-client"],
module="llama_stack.providers.remote.vector_io.weaviate", module="llama_stack.providers.remote.vector_io.weaviate",
config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig", config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig",
provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData", provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
@ -115,7 +95,7 @@ def available_providers() -> List[ProviderSpec]:
Api.vector_io, Api.vector_io,
AdapterSpec( AdapterSpec(
adapter_type="qdrant", adapter_type="qdrant",
pip_packages=EMBEDDING_DEPS + ["qdrant-client"], pip_packages=["qdrant-client"],
module="llama_stack.providers.remote.vector_io.qdrant", module="llama_stack.providers.remote.vector_io.qdrant",
config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig", config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig",
), ),

View file

@ -61,7 +61,7 @@ def vector_io_sqlite_vec() -> ProviderFixture:
providers=[ providers=[
Provider( Provider(
provider_id="sqlite_vec", provider_id="sqlite_vec",
provider_type="inline::sqlite_vec", provider_type="inline::sqlite-vec",
config=SQLiteVectorIOConfig( config=SQLiteVectorIOConfig(
kvstore=SqliteKVStoreConfig(db_path=temp_file.name).model_dump(), kvstore=SqliteKVStoreConfig(db_path=temp_file.name).model_dump(),
).model_dump(), ).model_dump(),

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::cerebras - remote::cerebras
- inline::sentence-transformers
safety: safety:
- inline::llama-guard - inline::llama-guard
vector_io: vector_io:

View file

@ -20,7 +20,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::cerebras"], "inference": ["remote::cerebras", "inline::sentence-transformers"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -5,6 +5,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::tgi - remote::tgi
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -20,7 +20,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::tgi"], "inference": ["remote::tgi", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::fireworks - remote::fireworks
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -25,7 +25,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::fireworks"], "inference": ["remote::fireworks", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::hf::serverless - remote::hf::serverless
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -21,7 +21,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::hf::serverless"], "inference": ["remote::hf::serverless", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -5,8 +5,7 @@ distribution_spec:
inference: inference:
- remote::ollama - remote::ollama
vector_io: vector_io:
- inline::faiss - inline::sqlite-vec
- inline::sqlite_vec
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector
safety: safety:

View file

@ -13,10 +13,6 @@ from llama_stack.distribution.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -25,7 +21,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::ollama"], "inference": ["remote::ollama"],
"vector_io": ["inline::faiss", "inline::sqlite_vec", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -45,19 +41,9 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="remote::ollama", provider_type="remote::ollama",
config=OllamaImplConfig.sample_run_config(), config=OllamaImplConfig.sample_run_config(),
) )
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
vector_io_provider_faiss = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
)
vector_io_provider_sqlite = Provider( vector_io_provider_sqlite = Provider(
provider_id="sqlite_vec", provider_id="sqlite-vec",
provider_type="inline::sqlite_vec", provider_type="inline::sqlite-vec",
config=SQLiteVectorIOConfig.sample_run_config(f"distributions/{name}"), config=SQLiteVectorIOConfig.sample_run_config(f"distributions/{name}"),
) )
@ -104,19 +90,16 @@ def get_distribution_template() -> DistributionTemplate:
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider],
"vector_io": [vector_io_provider_faiss, vector_io_provider_sqlite], "vector_io": [vector_io_provider_sqlite],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
), ),
"run-with-safety.yaml": RunConfigSettings( "run-with-safety.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [ "inference": [inference_provider],
inference_provider, "vector_io": [vector_io_provider_sqlite],
embedding_provider,
],
"vector_io": [vector_io_provider_faiss, vector_io_provider_faiss],
"safety": [ "safety": [
Provider( Provider(
provider_id="llama-guard", provider_id="llama-guard",

View file

@ -16,24 +16,11 @@ providers:
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:http://localhost:11434} url: ${env.OLLAMA_URL:http://localhost:11434}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io: vector_io:
- provider_id: faiss - provider_id: sqlite-vec
provider_type: inline::faiss provider_type: inline::sqlite-vec
config: config:
kvstore: db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -16,19 +16,9 @@ providers:
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:http://localhost:11434} url: ${env.OLLAMA_URL:http://localhost:11434}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io: vector_io:
- provider_id: faiss - provider_id: sqlite-vec
provider_type: inline::faiss provider_type: inline::sqlite-vec
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
- provider_id: sqlite_vec
provider_type: inline::sqlite_vec
config: config:
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
safety: safety:
@ -97,12 +87,6 @@ models:
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: ollama provider_id: ollama
model_type: llm model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: ollama
provider_model_id: all-minilm:latest
model_type: embedding
shields: [] shields: []
vector_dbs: [] vector_dbs: []
datasets: [] datasets: []

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::vllm - remote::vllm
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -23,7 +23,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::vllm"], "inference": ["remote::vllm", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::tgi - remote::tgi
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -23,7 +23,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::tgi"], "inference": ["remote::tgi", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::together - remote::together
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -25,7 +25,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::together"], "inference": ["remote::together", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],

View file

@ -4,6 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- inline::vllm - inline::vllm
- inline::sentence-transformers
vector_io: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb

View file

@ -20,7 +20,7 @@ from llama_stack.templates.template import (
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["inline::vllm"], "inference": ["inline::vllm", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],