mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
add embedding model by default to distribution templates (#617)
# What does this PR do? Adds the sentence transformer provider and the `all-MiniLM-L6-v2` embedding model to the default models to register in the run.yaml for all providers. ## Test Plan llama stack build --template together --image-type conda llama stack run ~/.llama/distributions/llamastack-together/together-run.yaml
This commit is contained in:
parent
e893b22868
commit
516e1a3e59
41 changed files with 473 additions and 64 deletions
|
@ -4,7 +4,11 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
@ -28,6 +32,11 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="remote::hf::serverless",
|
||||
config=InferenceAPIImplConfig.sample_run_config(),
|
||||
)
|
||||
embedding_provider = Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
|
@ -42,6 +51,14 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
model_id="${env.SAFETY_MODEL}",
|
||||
provider_id="hf-serverless-safety",
|
||||
)
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
provider_id="sentence-transformers",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
|
||||
return DistributionTemplate(
|
||||
name=name,
|
||||
|
@ -54,15 +71,16 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider],
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
},
|
||||
default_models=[inference_model],
|
||||
default_models=[inference_model, embedding_model],
|
||||
),
|
||||
"run-with-safety.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [
|
||||
inference_provider,
|
||||
embedding_provider,
|
||||
Provider(
|
||||
provider_id="hf-serverless-safety",
|
||||
provider_type="remote::hf::serverless",
|
||||
|
@ -76,6 +94,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
default_models=[
|
||||
inference_model,
|
||||
safety_model,
|
||||
embedding_model,
|
||||
],
|
||||
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
|
||||
),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue