refactor: switch to the new default nomic-embed-text-v1.5 embedding model in LS

This commit is contained in:
r3v5 2025-08-18 10:34:46 +01:00
parent b95f095a54
commit 429f1d2405
No known key found for this signature in database
GPG key ID: C7611ACB4FECAD54
51 changed files with 16149 additions and 83 deletions

View file

@ -87,11 +87,11 @@ def get_distribution_template() -> DistributionTemplate:
provider_id="tgi1",
)
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
model_id="nomic-embed-text-v1.5",
provider_id="sentence-transformers",
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 384,
"embedding_dimension": 768,
},
)
default_tool_groups = [

View file

@ -114,8 +114,8 @@ models:
provider_id: tgi1
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:

View file

@ -106,8 +106,8 @@ models:
provider_id: tgi0
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields: []

View file

@ -77,11 +77,11 @@ def get_distribution_template() -> DistributionTemplate:
provider_id="meta-reference-inference",
)
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
model_id="nomic-embed-text-v1.5",
provider_id="sentence-transformers",
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 384,
"embedding_dimension": 768,
},
)
safety_model = ModelInput(

View file

@ -127,8 +127,8 @@ models:
provider_id: meta-reference-safety
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:

View file

@ -113,8 +113,8 @@ models:
provider_id: meta-reference-inference
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields: []

View file

@ -85,11 +85,11 @@ def get_distribution_template() -> DistributionTemplate:
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
model_id="nomic-embed-text-v1.5",
provider_id=embedding_provider.provider_id,
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 384,
"embedding_dimension": 768,
},
)
postgres_config = PostgresSqlStoreConfig.sample_run_config()

View file

@ -95,8 +95,8 @@ models:
provider_id: vllm-inference
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:

View file

@ -54,11 +54,11 @@ class SentenceTransformersInferenceImpl(
async def list_models(self) -> list[Model] | None:
return [
Model(
identifier="all-MiniLM-L6-v2",
provider_resource_id="all-MiniLM-L6-v2",
identifier="nomic-ai/nomic-embed-text-v1.5",
provider_resource_id="nomic-ai/nomic-embed-text-v1.5",
provider_id=self.__provider_id__,
metadata={
"embedding_dimension": 384,
"embedding_dimension": 768,
},
model_type=ModelType.embedding,
),

View file

@ -43,6 +43,12 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=[
"torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
"sentence-transformers --no-deps",
# required by some SentenceTransformers architectures for tensor rearrange/merge ops
"einops",
# fast HF tokenization backend used by SentenceTransformers models
"tokenizers",
# safe and fast file format for storing and loading tensors
"safetensors",
],
module="llama_stack.providers.inline.inference.sentence_transformers",
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",

View file

@ -83,7 +83,7 @@ class SentenceTransformerEmbeddingMixin:
def _load_model():
from sentence_transformers import SentenceTransformer
return SentenceTransformer(model)
return SentenceTransformer(model, trust_remote_code=True)
loaded_model = await asyncio.to_thread(_load_model)
EMBEDDING_MODELS[model] = loaded_model

View file

@ -352,7 +352,7 @@ class OpenAIVectorStoreMixin(ABC):
extra = params.model_extra or {}
provider_vector_db_id = extra.get("provider_vector_db_id")
embedding_model = extra.get("embedding_model")
embedding_dimension = extra.get("embedding_dimension", 384)
embedding_dimension = extra.get("embedding_dimension", 768)
provider_id = extra.get("provider_id")
# Derive the canonical vector_db_id (allow override, else generate)
@ -364,7 +364,7 @@ class OpenAIVectorStoreMixin(ABC):
if embedding_model is None:
raise ValueError("Embedding model is required")
# Embedding dimension is required (defaulted to 384 if not provided)
# Embedding dimension is required (defaulted to 768 if not provided)
if embedding_dimension is None:
raise ValueError("Embedding dimension is required")