chore: Updating how default embedding model is set in stack

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> # Conflicts: # .github/workflows/integration-vector-io-tests.yml # llama_stack/distributions/ci-tests/run.yaml # llama_stack/distributions/starter-gpu/run.yaml # llama_stack/distributions/starter/run.yaml # llama_stack/distributions/template.py # llama_stack/providers/utils/memory/openai_vector_store_mixin.py
2025-12-12 12:06:04 +00:00 · 2025-10-15 17:15:43 -04:00 · 2025-10-15 17:15:43 -04:00 · 24a1430c8b
commit 24a1430c8b
parent cd152f4240
32 changed files with 276 additions and 265 deletions
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -21,6 +21,7 @@ from llama_stack_client import LlamaStackClient
 from openai import OpenAI

 from llama_stack import LlamaStackAsLibraryClient
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.stack import run_config_from_adhoc_config_spec
 from llama_stack.env import get_env_or_fail

@ -236,6 +237,13 @@ def instantiate_llama_stack_client(session):

    if "=" in config:
        run_config = run_config_from_adhoc_config_spec(config)
+
+        # --stack-config bypasses template so need this to set default embedding model
+        if "vector_io" in config and "inference" in config:
+            run_config.vector_stores = VectorStoresConfig(
+                default_embedding_model_id="inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5"
+            )
+
        run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
        with open(run_config_file.name, "w") as f:
            yaml.dump(run_config.model_dump(), f)
--- a/tests/unit/core/test_stack_validation.py
+++ b/tests/unit/core/test_stack_validation.py
@ -4,90 +4,44 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-"""
-Unit tests for Stack validation functions.
-"""
+"""Unit tests for Stack validation functions."""

 from unittest.mock import AsyncMock

 import pytest

 from llama_stack.apis.models import Model, ModelType
-from llama_stack.core.stack import validate_default_embedding_model
+from llama_stack.core.datatypes import StackRunConfig, VectorStoresConfig
+from llama_stack.core.stack import validate_vector_stores_config
 from llama_stack.providers.datatypes import Api


-class TestStackValidation:
-    """Test Stack validation functions."""
+class TestVectorStoresValidation:
+    async def test_validate_missing_model(self):
+        """Test validation fails when model not found."""
+        run_config = StackRunConfig(
+            image_name="test", providers={}, vector_stores=VectorStoresConfig(default_embedding_model_id="missing")
+        )
+        mock_models = AsyncMock()
+        mock_models.list_models.return_value = []

-    @pytest.mark.parametrize(
-        "models,should_raise",
-        [
-            ([], False),  # No models
-            (
-                [
-                    Model(
-                        identifier="emb1",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb1",
-                    )
-                ],
-                False,
-            ),  # Single default
-            (
-                [
-                    Model(
-                        identifier="emb1",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb1",
-                    ),
-                    Model(
-                        identifier="emb2",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb2",
-                    ),
-                ],
-                True,
-            ),  # Multiple defaults
-            (
-                [
-                    Model(
-                        identifier="emb1",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb1",
-                    ),
-                    Model(
-                        identifier="llm1",
-                        model_type=ModelType.llm,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="llm1",
-                    ),
-                ],
-                False,
-            ),  # Ignores non-embedding
-        ],
-    )
-    async def test_validate_default_embedding_model(self, models, should_raise):
-        """Test validation with various model configurations."""
-        mock_models_impl = AsyncMock()
-        mock_models_impl.list_models.return_value = models
-        impls = {Api.models: mock_models_impl}
+        with pytest.raises(ValueError, match="not found"):
+            await validate_vector_stores_config(run_config, {Api.models: mock_models})

-        if should_raise:
-            with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
-                await validate_default_embedding_model(impls)
-        else:
-            await validate_default_embedding_model(impls)
+    async def test_validate_success(self):
+        """Test validation passes with valid model."""
+        run_config = StackRunConfig(
+            image_name="test", providers={}, vector_stores=VectorStoresConfig(default_embedding_model_id="valid")
+        )
+        mock_models = AsyncMock()
+        mock_models.list_models.return_value = [
+            Model(
+                identifier="valid",
+                model_type=ModelType.embedding,
+                metadata={"embedding_dimension": 768},
+                provider_id="p",
+                provider_resource_id="valid",
+            )
+        ]

-    async def test_validate_default_embedding_model_no_models_api(self):
-        """Test validation when models API is not available."""
-        await validate_default_embedding_model({})
+        await validate_vector_stores_config(run_config, {Api.models: mock_models})
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@ -6,13 +6,12 @@

 import json
 import time
-from unittest.mock import AsyncMock, Mock, patch
+from unittest.mock import AsyncMock, patch

 import numpy as np
 import pytest

 from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
    Chunk,
@ -996,96 +995,6 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
    assert batch.file_counts.in_progress == 8


-async def test_get_default_embedding_model_success(vector_io_adapter):
-    """Test successful default embedding model detection."""
-    # Mock models API with a default model
-    mock_models_api = Mock()
-    mock_models_api.list_models = AsyncMock(
-        return_value=Mock(
-            data=[
-                Model(
-                    identifier="nomic-embed-text-v1.5",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={
-                        "embedding_dimension": 768,
-                        "default_configured": True,
-                    },
-                )
-            ]
-        )
-    )
-
-    vector_io_adapter.models_api = mock_models_api
-    result = await vector_io_adapter._get_default_embedding_model_and_dimension()
-
-    assert result is not None
-    model_id, dimension = result
-    assert model_id == "nomic-embed-text-v1.5"
-    assert dimension == 768
-
-
-async def test_get_default_embedding_model_multiple_defaults_error(vector_io_adapter):
-    """Test error when multiple models are marked as default."""
-    mock_models_api = Mock()
-    mock_models_api.list_models = AsyncMock(
-        return_value=Mock(
-            data=[
-                Model(
-                    identifier="model1",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={"embedding_dimension": 768, "default_configured": True},
-                ),
-                Model(
-                    identifier="model2",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={"embedding_dimension": 512, "default_configured": True},
-                ),
-            ]
-        )
-    )
-
-    vector_io_adapter.models_api = mock_models_api
-
-    with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
-        await vector_io_adapter._get_default_embedding_model_and_dimension()
-
-
-async def test_openai_create_vector_store_uses_default_model(vector_io_adapter):
-    """Test that vector store creation uses default embedding model when none specified."""
-    # Mock models API and dependencies
-    mock_models_api = Mock()
-    mock_models_api.list_models = AsyncMock(
-        return_value=Mock(
-            data=[
-                Model(
-                    identifier="default-model",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={"embedding_dimension": 512, "default_configured": True},
-                )
-            ]
-        )
-    )
-
-    vector_io_adapter.models_api = mock_models_api
-    vector_io_adapter.register_vector_db = AsyncMock()
-    vector_io_adapter.__provider_id__ = "test-provider"
-
-    # Create vector store without specifying embedding model
-    params = OpenAICreateVectorStoreRequestWithExtraBody(name="test-store")
-    result = await vector_io_adapter.openai_create_vector_store(params)
-
-    # Verify the vector store was created with default model
-    assert result.name == "test-store"
-    vector_io_adapter.register_vector_db.assert_called_once()
-    call_args = vector_io_adapter.register_vector_db.call_args[0][0]
-    assert call_args.embedding_model == "default-model"
-    assert call_args.embedding_dimension == 512
-
-
 async def test_embedding_config_from_metadata(vector_io_adapter):
    """Test that embedding configuration is correctly extracted from metadata."""