Merge branch 'main' into inference_refactor

2025-12-17 20:59:48 +00:00 · 2024-12-16 16:47:57 -08:00 · 2024-12-16 16:47:57 -08:00 · 6a51e2268d
commit 6a51e2268d
parent 35b1a6f2dc c2f7905fa4
117 changed files with 12698 additions and 2589 deletions
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@ -233,10 +233,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
            metadata = {}
        if model_type is None:
            model_type = ModelType.llm
-        if (
-            "embedding_dimension" not in metadata
-            and model_type == ModelType.embedding_model
-        ):
+        if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
            raise ValueError(
                "Embedding model must have an embedding dimension in its metadata"
            )
@ -323,8 +320,15 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
                )
        model = await self.get_object_by_identifier("model", params.embedding_model)
        if model is None:
-            raise ValueError(f"Model {params.embedding_model} not found")
-        if model.model_type != ModelType.embedding_model:
+            if params.embedding_model == "all-MiniLM-L6-v2":
+                raise ValueError(
+                    "Embeddings are now served via Inference providers. "
+                    "Please upgrade your run.yaml to include inline::sentence-transformer as an additional inference provider. "
+                    "See https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/together/run.yaml for an example."
+                )
+            else:
+                raise ValueError(f"Model {params.embedding_model} not found")
+        if model.model_type != ModelType.embedding:
            raise ValueError(
                f"Model {params.embedding_model} is not an embedding model"
            )