forked from phoenix-oss/llama-stack-mirror
add embedding model by default to distribution templates (#617)
# What does this PR do? Adds the sentence transformer provider and the `all-MiniLM-L6-v2` embedding model to the default models to register in the run.yaml for all providers. ## Test Plan llama stack build --template together --image-type conda llama stack run ~/.llama/distributions/llamastack-together/together-run.yaml
This commit is contained in:
parent
e893b22868
commit
516e1a3e59
41 changed files with 473 additions and 64 deletions
|
@ -6,10 +6,15 @@
|
|||
|
||||
from pathlib import Path
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider
|
||||
from llama_stack.providers.inline.inference.meta_reference import (
|
||||
MetaReferenceQuantizedInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
||||
|
@ -34,6 +39,11 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
|
||||
),
|
||||
)
|
||||
embedding_provider = Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
|
@ -44,6 +54,14 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
model_id="${env.INFERENCE_MODEL}",
|
||||
provider_id="meta-reference-inference",
|
||||
)
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id="sentence-transformers",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
distro_type="self_hosted",
|
||||
|
@ -54,10 +72,10 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider],
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
},
|
||||
default_models=[inference_model],
|
||||
default_models=[inference_model, embedding_model],
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue