diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py index acfe20399..719624e86 100644 --- a/src/llama_stack/core/routers/inference.py +++ b/src/llama_stack/core/routers/inference.py @@ -417,7 +417,7 @@ class InferenceRouter(Inference): prompt_tokens=chunk.usage.prompt_tokens, completion_tokens=chunk.usage.completion_tokens, total_tokens=chunk.usage.total_tokens, - model_id=fully_qualified_model_id, + fully_qualified_model_id=fully_qualified_model_id, provider_id=provider_id, ) for metric in metrics: diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py index 02e56ed7e..5256dda44 100644 --- a/src/llama_stack/core/routers/vector_io.py +++ b/src/llama_stack/core/routers/vector_io.py @@ -17,7 +17,9 @@ from llama_stack_api import ( HealthResponse, HealthStatus, InterleavedContent, + ModelNotFoundError, ModelType, + ModelTypeError, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, OpenAICreateVectorStoreRequestWithExtraBody, QueryChunksResponse, @@ -124,6 +126,14 @@ class VectorIORouter(VectorIO): if embedding_model is not None and embedding_dimension is None: embedding_dimension = await self._get_embedding_model_dimension(embedding_model) + # Validate that embedding model exists and is of the correct type + if embedding_model is not None: + model = await self.routing_table.get_object_by_identifier("model", embedding_model) + if model is None: + raise ModelNotFoundError(embedding_model) + if model.model_type != ModelType.embedding: + raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding) + # Auto-select provider if not specified if provider_id is None: num_providers = len(self.routing_table.impls_by_provider_id) diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py index 202e2da1b..beb7eee9a 100644 --- a/tests/unit/core/routers/test_vector_io.py +++ b/tests/unit/core/routers/test_vector_io.py @@ -7,6 +7,12 @@ from unittest.mock import AsyncMock, Mock import pytest +from llama_stack_api import ( + ModelNotFoundError, + ModelType, + ModelTypeError, + OpenAICreateVectorStoreRequestWithExtraBody, +) from llama_stack.core.routers.vector_io import VectorIORouter from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody @@ -21,6 +27,7 @@ async def test_single_provider_auto_selection(): Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384}) ] ) + mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.embedding)) mock_routing_table.register_vector_store = AsyncMock( return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123") ) @@ -48,6 +55,7 @@ async def test_create_vector_stores_multiple_providers_missing_provider_id_error Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384}) ] ) + mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.embedding)) router = VectorIORouter(mock_routing_table) request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate( {"name": "test_store", "embedding_model": "all-MiniLM-L6-v2"} @@ -117,3 +125,32 @@ async def test_update_vector_store_same_provider_id_succeeds(): provider.openai_update_vector_store.assert_called_once_with( vector_store_id="vs_123", name="updated_name", expires_after=None, metadata={"provider_id": "inline::faiss"} ) + + +async def test_create_vector_store_with_unknown_embedding_model_raises_error(): + """Test that creating a vector store with an unknown embedding model raises + FoundError.""" + mock_routing_table = Mock(impls_by_provider_id={"provider": "mock"}) + mock_routing_table.get_object_by_identifier = AsyncMock(return_value=None) + + router = VectorIORouter(mock_routing_table) + request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate( + {"embedding_model": "unknown-model", "embedding_dimension": 384} + ) + + with pytest.raises(ModelNotFoundError, match="Model 'unknown-model' not found"): + await router.openai_create_vector_store(request) + + +async def test_create_vector_store_with_wrong_model_type_raises_error(): + """Test that creating a vector store with a non-embedding model raises ModelTypeError.""" + mock_routing_table = Mock(impls_by_provider_id={"provider": "mock"}) + mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.llm)) + + router = VectorIORouter(mock_routing_table) + request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate( + {"embedding_model": "text-model", "embedding_dimension": 384} + ) + + with pytest.raises(ModelTypeError, match="Model 'text-model' is of type"): + await router.openai_create_vector_store(request)