feat: Enable setting a default embedding model in the stack (#3803)

# What does this PR do? Enables automatic embedding model detection for vector stores and by using a `default_configured` boolean that can be defined in the `run.yaml`.   ## Test Plan - Unit tests - Integration tests - Simple example below: Spin up the stack: ```bash uv run llama stack build --distro starter --image-type venv --run ``` Then test with OpenAI's client: ```python from openai import OpenAI client = OpenAI(base_url="http://localhost:8321/v1/", api_key="none") vs = client.vector_stores.create() ``` Previously you needed: ```python vs = client.vector_stores.create( extra_body={ "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", "embedding_dimension": 384, } ) ``` The `extra_body` is now unnecessary. --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-12-04 02:03:44 +00:00 · 2025-10-14 21:25:13 -04:00 · 2025-10-14 21:25:13 -04:00 · ef4bc70bbe
commit ef4bc70bbe
parent d875e427bf
29 changed files with 553 additions and 403 deletions
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@ -98,6 +98,30 @@ REGISTRY_REFRESH_TASK = None
 TEST_RECORDING_CONTEXT = None


+async def validate_default_embedding_model(impls: dict[Api, Any]):
+    """Validate that at most one embedding model is marked as default."""
+    if Api.models not in impls:
+        return
+
+    models_impl = impls[Api.models]
+    response = await models_impl.list_models()
+    models_list = response.data if hasattr(response, "data") else response
+
+    default_embedding_models = []
+    for model in models_list:
+        if model.model_type == "embedding" and model.metadata.get("default_configured") is True:
+            default_embedding_models.append(model.identifier)
+
+    if len(default_embedding_models) > 1:
+        raise ValueError(
+            f"Multiple embedding models marked as default_configured=True: {default_embedding_models}. "
+            "Only one embedding model can be marked as default."
+        )
+
+    if default_embedding_models:
+        logger.info(f"Default embedding model configured: {default_embedding_models[0]}")
+
+
 async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
    for rsrc, api, register_method, list_method in RESOURCES:
        objects = getattr(run_config, rsrc)
@ -128,6 +152,8 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
                f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}",
            )

+    await validate_default_embedding_model(impls)
+

 class EnvVarError(Exception):
    def __init__(self, var_name: str, path: str = ""):