mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 12:06:04 +00:00
chore: Updating how default embedding model is set in stack
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> # Conflicts: # .github/workflows/integration-vector-io-tests.yml # llama_stack/distributions/ci-tests/run.yaml # llama_stack/distributions/starter-gpu/run.yaml # llama_stack/distributions/starter/run.yaml # llama_stack/distributions/template.py # llama_stack/providers/utils/memory/openai_vector_store_mixin.py
This commit is contained in:
parent
cd152f4240
commit
24a1430c8b
32 changed files with 276 additions and 265 deletions
|
|
@ -21,6 +21,7 @@ from llama_stack_client import LlamaStackClient
|
|||
from openai import OpenAI
|
||||
|
||||
from llama_stack import LlamaStackAsLibraryClient
|
||||
from llama_stack.core.datatypes import VectorStoresConfig
|
||||
from llama_stack.core.stack import run_config_from_adhoc_config_spec
|
||||
from llama_stack.env import get_env_or_fail
|
||||
|
||||
|
|
@ -236,6 +237,13 @@ def instantiate_llama_stack_client(session):
|
|||
|
||||
if "=" in config:
|
||||
run_config = run_config_from_adhoc_config_spec(config)
|
||||
|
||||
# --stack-config bypasses template so need this to set default embedding model
|
||||
if "vector_io" in config and "inference" in config:
|
||||
run_config.vector_stores = VectorStoresConfig(
|
||||
default_embedding_model_id="inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5"
|
||||
)
|
||||
|
||||
run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
|
||||
with open(run_config_file.name, "w") as f:
|
||||
yaml.dump(run_config.model_dump(), f)
|
||||
|
|
|
|||
|
|
@ -4,90 +4,44 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
Unit tests for Stack validation functions.
|
||||
"""
|
||||
"""Unit tests for Stack validation functions."""
|
||||
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.core.stack import validate_default_embedding_model
|
||||
from llama_stack.core.datatypes import StackRunConfig, VectorStoresConfig
|
||||
from llama_stack.core.stack import validate_vector_stores_config
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
|
||||
class TestStackValidation:
|
||||
"""Test Stack validation functions."""
|
||||
class TestVectorStoresValidation:
|
||||
async def test_validate_missing_model(self):
|
||||
"""Test validation fails when model not found."""
|
||||
run_config = StackRunConfig(
|
||||
image_name="test", providers={}, vector_stores=VectorStoresConfig(default_embedding_model_id="missing")
|
||||
)
|
||||
mock_models = AsyncMock()
|
||||
mock_models.list_models.return_value = []
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"models,should_raise",
|
||||
[
|
||||
([], False), # No models
|
||||
(
|
||||
[
|
||||
Model(
|
||||
identifier="emb1",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={"default_configured": True},
|
||||
provider_id="p",
|
||||
provider_resource_id="emb1",
|
||||
)
|
||||
],
|
||||
False,
|
||||
), # Single default
|
||||
(
|
||||
[
|
||||
Model(
|
||||
identifier="emb1",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={"default_configured": True},
|
||||
provider_id="p",
|
||||
provider_resource_id="emb1",
|
||||
),
|
||||
Model(
|
||||
identifier="emb2",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={"default_configured": True},
|
||||
provider_id="p",
|
||||
provider_resource_id="emb2",
|
||||
),
|
||||
],
|
||||
True,
|
||||
), # Multiple defaults
|
||||
(
|
||||
[
|
||||
Model(
|
||||
identifier="emb1",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={"default_configured": True},
|
||||
provider_id="p",
|
||||
provider_resource_id="emb1",
|
||||
),
|
||||
Model(
|
||||
identifier="llm1",
|
||||
model_type=ModelType.llm,
|
||||
metadata={"default_configured": True},
|
||||
provider_id="p",
|
||||
provider_resource_id="llm1",
|
||||
),
|
||||
],
|
||||
False,
|
||||
), # Ignores non-embedding
|
||||
],
|
||||
)
|
||||
async def test_validate_default_embedding_model(self, models, should_raise):
|
||||
"""Test validation with various model configurations."""
|
||||
mock_models_impl = AsyncMock()
|
||||
mock_models_impl.list_models.return_value = models
|
||||
impls = {Api.models: mock_models_impl}
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
await validate_vector_stores_config(run_config, {Api.models: mock_models})
|
||||
|
||||
if should_raise:
|
||||
with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
|
||||
await validate_default_embedding_model(impls)
|
||||
else:
|
||||
await validate_default_embedding_model(impls)
|
||||
async def test_validate_success(self):
|
||||
"""Test validation passes with valid model."""
|
||||
run_config = StackRunConfig(
|
||||
image_name="test", providers={}, vector_stores=VectorStoresConfig(default_embedding_model_id="valid")
|
||||
)
|
||||
mock_models = AsyncMock()
|
||||
mock_models.list_models.return_value = [
|
||||
Model(
|
||||
identifier="valid",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={"embedding_dimension": 768},
|
||||
provider_id="p",
|
||||
provider_resource_id="valid",
|
||||
)
|
||||
]
|
||||
|
||||
async def test_validate_default_embedding_model_no_models_api(self):
|
||||
"""Test validation when models API is not available."""
|
||||
await validate_default_embedding_model({})
|
||||
await validate_vector_stores_config(run_config, {Api.models: mock_models})
|
||||
|
|
|
|||
|
|
@ -6,13 +6,12 @@
|
|||
|
||||
import json
|
||||
import time
|
||||
from unittest.mock import AsyncMock, Mock, patch
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
|
|
@ -996,96 +995,6 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
|
|||
assert batch.file_counts.in_progress == 8
|
||||
|
||||
|
||||
async def test_get_default_embedding_model_success(vector_io_adapter):
|
||||
"""Test successful default embedding model detection."""
|
||||
# Mock models API with a default model
|
||||
mock_models_api = Mock()
|
||||
mock_models_api.list_models = AsyncMock(
|
||||
return_value=Mock(
|
||||
data=[
|
||||
Model(
|
||||
identifier="nomic-embed-text-v1.5",
|
||||
model_type=ModelType.embedding,
|
||||
provider_id="test-provider",
|
||||
metadata={
|
||||
"embedding_dimension": 768,
|
||||
"default_configured": True,
|
||||
},
|
||||
)
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
vector_io_adapter.models_api = mock_models_api
|
||||
result = await vector_io_adapter._get_default_embedding_model_and_dimension()
|
||||
|
||||
assert result is not None
|
||||
model_id, dimension = result
|
||||
assert model_id == "nomic-embed-text-v1.5"
|
||||
assert dimension == 768
|
||||
|
||||
|
||||
async def test_get_default_embedding_model_multiple_defaults_error(vector_io_adapter):
|
||||
"""Test error when multiple models are marked as default."""
|
||||
mock_models_api = Mock()
|
||||
mock_models_api.list_models = AsyncMock(
|
||||
return_value=Mock(
|
||||
data=[
|
||||
Model(
|
||||
identifier="model1",
|
||||
model_type=ModelType.embedding,
|
||||
provider_id="test-provider",
|
||||
metadata={"embedding_dimension": 768, "default_configured": True},
|
||||
),
|
||||
Model(
|
||||
identifier="model2",
|
||||
model_type=ModelType.embedding,
|
||||
provider_id="test-provider",
|
||||
metadata={"embedding_dimension": 512, "default_configured": True},
|
||||
),
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
vector_io_adapter.models_api = mock_models_api
|
||||
|
||||
with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
|
||||
await vector_io_adapter._get_default_embedding_model_and_dimension()
|
||||
|
||||
|
||||
async def test_openai_create_vector_store_uses_default_model(vector_io_adapter):
|
||||
"""Test that vector store creation uses default embedding model when none specified."""
|
||||
# Mock models API and dependencies
|
||||
mock_models_api = Mock()
|
||||
mock_models_api.list_models = AsyncMock(
|
||||
return_value=Mock(
|
||||
data=[
|
||||
Model(
|
||||
identifier="default-model",
|
||||
model_type=ModelType.embedding,
|
||||
provider_id="test-provider",
|
||||
metadata={"embedding_dimension": 512, "default_configured": True},
|
||||
)
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
vector_io_adapter.models_api = mock_models_api
|
||||
vector_io_adapter.register_vector_db = AsyncMock()
|
||||
vector_io_adapter.__provider_id__ = "test-provider"
|
||||
|
||||
# Create vector store without specifying embedding model
|
||||
params = OpenAICreateVectorStoreRequestWithExtraBody(name="test-store")
|
||||
result = await vector_io_adapter.openai_create_vector_store(params)
|
||||
|
||||
# Verify the vector store was created with default model
|
||||
assert result.name == "test-store"
|
||||
vector_io_adapter.register_vector_db.assert_called_once()
|
||||
call_args = vector_io_adapter.register_vector_db.call_args[0][0]
|
||||
assert call_args.embedding_model == "default-model"
|
||||
assert call_args.embedding_dimension == 512
|
||||
|
||||
|
||||
async def test_embedding_config_from_metadata(vector_io_adapter):
|
||||
"""Test that embedding configuration is correctly extracted from metadata."""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue