Merge branch 'main' into inference_refactor

This commit is contained in:
Botao Chen 2024-12-16 16:47:57 -08:00
commit 6a51e2268d
117 changed files with 12698 additions and 2589 deletions

View file

@ -233,10 +233,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
metadata = {}
if model_type is None:
model_type = ModelType.llm
if (
"embedding_dimension" not in metadata
and model_type == ModelType.embedding_model
):
if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
raise ValueError(
"Embedding model must have an embedding dimension in its metadata"
)
@ -323,8 +320,15 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
)
model = await self.get_object_by_identifier("model", params.embedding_model)
if model is None:
raise ValueError(f"Model {params.embedding_model} not found")
if model.model_type != ModelType.embedding_model:
if params.embedding_model == "all-MiniLM-L6-v2":
raise ValueError(
"Embeddings are now served via Inference providers. "
"Please upgrade your run.yaml to include inline::sentence-transformer as an additional inference provider. "
"See https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/together/run.yaml for an example."
)
else:
raise ValueError(f"Model {params.embedding_model} not found")
if model.model_type != ModelType.embedding:
raise ValueError(
f"Model {params.embedding_model} is not an embedding model"
)