mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
refactor: switch to the new default nomic-embed-text-v1.5 embedding model in LS
This commit is contained in:
parent
f1748e2f92
commit
1d0f0a0d8e
63 changed files with 16170 additions and 186 deletions
|
@ -91,7 +91,7 @@ class VectorDBs(Protocol):
|
|||
self,
|
||||
vector_db_id: str,
|
||||
embedding_model: str,
|
||||
embedding_dimension: int | None = 384,
|
||||
embedding_dimension: int | None = 768,
|
||||
provider_id: str | None = None,
|
||||
vector_db_name: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
|
|
|
@ -521,7 +521,7 @@ class VectorIO(Protocol):
|
|||
chunking_strategy: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
embedding_model: str | None = None,
|
||||
embedding_dimension: int | None = 384,
|
||||
embedding_dimension: int | None = 768,
|
||||
provider_id: str | None = None,
|
||||
) -> VectorStoreObject:
|
||||
"""Creates a vector store.
|
||||
|
@ -532,7 +532,7 @@ class VectorIO(Protocol):
|
|||
:param chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy.
|
||||
:param metadata: Set of 16 key-value pairs that can be attached to an object.
|
||||
:param embedding_model: The embedding model to use for this vector store.
|
||||
:param embedding_dimension: The dimension of the embedding vectors (default: 384).
|
||||
:param embedding_dimension: The dimension of the embedding vectors (default: 768).
|
||||
:param provider_id: The ID of the provider to use for this vector store.
|
||||
:returns: A VectorStoreObject representing the created vector store.
|
||||
"""
|
||||
|
|
|
@ -47,7 +47,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
|||
self,
|
||||
vector_db_id: str,
|
||||
embedding_model: str,
|
||||
embedding_dimension: int | None = 384,
|
||||
embedding_dimension: int | None = 768,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
vector_db_name: str | None = None,
|
||||
|
|
|
@ -66,8 +66,8 @@ def rag_chat_page():
|
|||
|
||||
llama_stack_api.client.vector_dbs.register(
|
||||
vector_db_id=vector_db_name, # Use the user-provided name
|
||||
embedding_dimension=384,
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
embedding_dimension=768,
|
||||
embedding_model="nomic-embed-text-v1.5",
|
||||
provider_id=vector_io_provider,
|
||||
)
|
||||
|
||||
|
|
|
@ -87,11 +87,11 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_id="tgi1",
|
||||
)
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
model_id="nomic-embed-text-v1.5",
|
||||
provider_id="sentence-transformers",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
"embedding_dimension": 768,
|
||||
},
|
||||
)
|
||||
default_tool_groups = [
|
||||
|
|
|
@ -111,8 +111,8 @@ models:
|
|||
provider_id: tgi1
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
|
|
|
@ -103,8 +103,8 @@ models:
|
|||
provider_id: tgi0
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
|
|
|
@ -77,11 +77,11 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_id="meta-reference-inference",
|
||||
)
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
model_id="nomic-embed-text-v1.5",
|
||||
provider_id="sentence-transformers",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
"embedding_dimension": 768,
|
||||
},
|
||||
)
|
||||
safety_model = ModelInput(
|
||||
|
|
|
@ -124,8 +124,8 @@ models:
|
|||
provider_id: meta-reference-safety
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
|
|
|
@ -110,8 +110,8 @@ models:
|
|||
provider_id: meta-reference-inference
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
|
|
|
@ -85,11 +85,11 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
model_id="nomic-embed-text-v1.5",
|
||||
provider_id=embedding_provider.provider_id,
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
"embedding_dimension": 768,
|
||||
},
|
||||
)
|
||||
postgres_config = PostgresSqlStoreConfig.sample_run_config()
|
||||
|
|
|
@ -92,8 +92,8 @@ models:
|
|||
provider_id: vllm-inference
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields:
|
||||
|
|
|
@ -201,8 +201,8 @@ models:
|
|||
provider_model_id: meta-llama/llama-guard-3-11b-vision
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
embedding_dimension: 768
|
||||
model_id: nomic-embed-text-v1.5
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
|
|
|
@ -73,11 +73,11 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
|||
]
|
||||
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
model_id="nomic-embed-text-v1.5",
|
||||
provider_id="sentence-transformers",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
"embedding_dimension": 768,
|
||||
},
|
||||
)
|
||||
|
||||
|
|
|
@ -57,11 +57,11 @@ class SentenceTransformersInferenceImpl(
|
|||
async def list_models(self) -> list[Model] | None:
|
||||
return [
|
||||
Model(
|
||||
identifier="all-MiniLM-L6-v2",
|
||||
provider_resource_id="all-MiniLM-L6-v2",
|
||||
identifier="nomic-ai/nomic-embed-text-v1.5",
|
||||
provider_resource_id="nomic-ai/nomic-embed-text-v1.5",
|
||||
provider_id=self.__provider_id__,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
"embedding_dimension": 768,
|
||||
},
|
||||
model_type=ModelType.embedding,
|
||||
),
|
||||
|
|
|
@ -43,6 +43,12 @@ def available_providers() -> list[ProviderSpec]:
|
|||
pip_packages=[
|
||||
"torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
|
||||
"sentence-transformers --no-deps",
|
||||
# required by some SentenceTransformers architectures for tensor rearrange/merge ops
|
||||
"einops",
|
||||
# fast HF tokenization backend used by SentenceTransformers models
|
||||
"tokenizers",
|
||||
# safe and fast file format for storing and loading tensors
|
||||
"safetensors",
|
||||
],
|
||||
module="llama_stack.providers.inline.inference.sentence_transformers",
|
||||
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
|
||||
|
|
|
@ -86,7 +86,7 @@ class SentenceTransformerEmbeddingMixin:
|
|||
def _load_model():
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
return SentenceTransformer(model)
|
||||
return SentenceTransformer(model, trust_remote_code=True)
|
||||
|
||||
loaded_model = await asyncio.to_thread(_load_model)
|
||||
EMBEDDING_MODELS[model] = loaded_model
|
||||
|
|
|
@ -203,7 +203,7 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
chunking_strategy: dict[str, Any] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
embedding_model: str | None = None,
|
||||
embedding_dimension: int | None = 384,
|
||||
embedding_dimension: int | None = 768,
|
||||
provider_id: str | None = None,
|
||||
provider_vector_db_id: str | None = None,
|
||||
) -> VectorStoreObject:
|
||||
|
@ -218,7 +218,7 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
if embedding_model is None:
|
||||
raise ValueError("Embedding model is required")
|
||||
|
||||
# Embedding dimension is required (defaulted to 384 if not provided)
|
||||
# Embedding dimension is required (defaulted to 768 if not provided)
|
||||
if embedding_dimension is None:
|
||||
raise ValueError("Embedding dimension is required")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue