From 7ffd20d112c300fd8859f5ec3cd1a64d92348e47 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Mon, 20 Oct 2025 14:50:57 -0400 Subject: [PATCH] fix test Signed-off-by: Francisco Javier Arceo updating structure of default Signed-off-by: Francisco Javier Arceo fix model id creation Signed-off-by: Francisco Javier Arceo --- docs/docs/building_applications/rag.mdx | 28 ++++---- llama_stack/core/datatypes.py | 23 +++++-- llama_stack/core/routers/vector_io.py | 10 ++- llama_stack/core/stack.py | 65 +++++++++++-------- llama_stack/distributions/ci-tests/run.yaml | 5 +- .../distributions/starter-gpu/run.yaml | 5 +- llama_stack/distributions/starter/run.yaml | 5 +- llama_stack/distributions/starter/starter.py | 7 +- .../vector_io/test_openai_vector_stores.py | 9 ++- tests/unit/core/test_stack_validation.py | 24 +++++-- 10 files changed, 119 insertions(+), 62 deletions(-) diff --git a/docs/docs/building_applications/rag.mdx b/docs/docs/building_applications/rag.mdx index 3d758e3e3..312ad29c2 100644 --- a/docs/docs/building_applications/rag.mdx +++ b/docs/docs/building_applications/rag.mdx @@ -87,20 +87,19 @@ Llama Stack provides OpenAI-compatible RAG capabilities through: To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so: ```yaml -models: - - model_id: nomic-ai/nomic-embed-text-v1.5 - provider_id: inline::sentence-transformers - metadata: - embedding_dimension: 768 - vector_stores: - default_embedding_model_id: nomic-ai/nomic-embed-text-v1.5 + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 ``` With this configuration: -- `client.vector_stores.create()` works without requiring embedding model parameters -- The system automatically uses the default model and its embedding dimension for any newly created vector store -- The `vector_stores` section explicitly configures which embedding model to use as default +- `client.vector_stores.create()` works without requiring embedding model or provider parameters +- The system automatically uses the default vector store provider (`faiss`) when multiple providers are available +- The system automatically uses the default embedding model (`sentence-transformers/nomic-ai/nomic-embed-text-v1.5`) for any newly created vector store +- The `default_provider_id` specifies which vector storage backend to use +- The `default_embedding_model` specifies both the inference provider and model for embeddings ## Vector Store Operations @@ -109,14 +108,15 @@ With this configuration: You can create vector stores with automatic or explicit embedding model selection: ```python -# Automatic - uses default configured embedding model +# Automatic - uses default configured embedding model and vector store provider vs = client.vector_stores.create() -# Explicit - specify embedding model when you need a specific one +# Explicit - specify embedding model and/or provider when you need specific ones vs = client.vector_stores.create( extra_body={ - "embedding_model": "nomic-ai/nomic-embed-text-v1.5", - "embedding_dimension": 768 + "provider_id": "faiss", # Optional: specify vector store provider + "embedding_model": "sentence-transformers/nomic-ai/nomic-embed-text-v1.5", + "embedding_dimension": 768 # Optional: will be auto-detected if not provided } ) ``` diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index 49035114c..a83decde0 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -351,17 +351,30 @@ class AuthenticationRequiredError(Exception): pass +class DefaultEmbeddingModel(BaseModel): + """Configuration for default embedding model.""" + + provider_id: str = Field( + ..., + description="ID of the inference provider that serves the embedding model (e.g., 'sentence-transformers').", + ) + model_id: str = Field( + ..., + description="ID of the embedding model (e.g., 'nomic-ai/nomic-embed-text-v1.5').", + ) + + class VectorStoresConfig(BaseModel): """Configuration for vector stores in the stack.""" - embedding_model_id: str = Field( - ..., - description="ID of the embedding model to use as default for vector stores when none is specified. Must reference a model defined in the 'models' section.", - ) - provider_id: str | None = Field( + default_provider_id: str | None = Field( default=None, description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.", ) + default_embedding_model: DefaultEmbeddingModel | None = Field( + default=None, + description="Default embedding model configuration for vector stores.", + ) class QuotaPeriod(StrEnum): diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index fd9ec387e..e06d1d45c 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -126,7 +126,11 @@ class VectorIORouter(VectorIO): # Use default embedding model if not specified if embedding_model is None and self.vector_stores_config is not None: - embedding_model = self.vector_stores_config.embedding_model_id + if self.vector_stores_config.default_embedding_model is not None: + # Construct the full model ID with provider prefix + embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id + model_id = self.vector_stores_config.default_embedding_model.model_id + embedding_model = f"{embedding_provider_id}/{model_id}" if embedding_model is not None and embedding_dimension is None: embedding_dimension = await self._get_embedding_model_dimension(embedding_model) @@ -139,8 +143,8 @@ class VectorIORouter(VectorIO): if num_providers > 1: available_providers = list(self.routing_table.impls_by_provider_id.keys()) # Use default configured provider - if self.vector_stores_config and self.vector_stores_config.provider_id: - default_provider = self.vector_stores_config.provider_id + if self.vector_stores_config and self.vector_stores_config.default_provider_id: + default_provider = self.vector_stores_config.default_provider_id if default_provider in available_providers: provider_id = default_provider logger.debug(f"Using configured default vector store provider: {provider_id}") diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py index 6a1015881..8c9b1a376 100644 --- a/llama_stack/core/stack.py +++ b/llama_stack/core/stack.py @@ -135,41 +135,52 @@ async def validate_vector_stores_config(run_config: StackRunConfig, impls: dict[ return vector_stores_config = run_config.vector_stores - default_model_id = vector_stores_config.embedding_model_id - if Api.models not in impls: - raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'") + # Validate default embedding model if configured + if vector_stores_config.default_embedding_model: + default_embedding_model = vector_stores_config.default_embedding_model + provider_id = default_embedding_model.provider_id + model_id = default_embedding_model.model_id + # Construct the full model identifier + default_model_id = f"{provider_id}/{model_id}" - models_impl = impls[Api.models] - response = await models_impl.list_models() - models_list = response.data if hasattr(response, "data") else response + if Api.models not in impls: + raise ValueError( + f"Models API is not available but vector_stores config requires model '{default_model_id}'" + ) - # find default embedding model - default_model = None - for model in models_list: - if model.identifier == default_model_id: - default_model = model - break + models_impl = impls[Api.models] + response = await models_impl.list_models() + models_list = response.data if hasattr(response, "data") else response - if not default_model: - available_models = [m.identifier for m in models_list if m.model_type == "embedding"] - raise ValueError( - f"Embedding model '{default_model_id}' not found. Available embedding models: {available_models}" - ) + # find default embedding model + default_model = None + for model in models_list: + if model.identifier == default_model_id: + default_model = model + break - if default_model.model_type != "embedding": - raise ValueError(f"Model '{default_model_id}' is type '{default_model.model_type}', not 'embedding'") + if not default_model: + available_models = [m.identifier for m in models_list if m.model_type == "embedding"] + raise ValueError( + f"Embedding model '{default_model_id}' not found. Available embedding models: {available_models}" + ) - embedding_dimension = default_model.metadata.get("embedding_dimension") - if embedding_dimension is None: - raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata") + if default_model.model_type != "embedding": + raise ValueError(f"Model '{default_model_id}' is type '{default_model.model_type}', not 'embedding'") - try: - int(embedding_dimension) - except ValueError as err: - raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err + embedding_dimension = default_model.metadata.get("embedding_dimension") + if embedding_dimension is None: + raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata") - logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})") + try: + int(embedding_dimension) + except ValueError as err: + raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err + + logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})") + + # If no default embedding model is configured, that's fine - validation passes class EnvVarError(Exception): diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml index 3ae049cbc..299402915 100644 --- a/llama_stack/distributions/ci-tests/run.yaml +++ b/llama_stack/distributions/ci-tests/run.yaml @@ -255,4 +255,7 @@ server: telemetry: enabled: true vector_stores: - embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5 + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml index 6803e8a64..273d806a1 100644 --- a/llama_stack/distributions/starter-gpu/run.yaml +++ b/llama_stack/distributions/starter-gpu/run.yaml @@ -258,4 +258,7 @@ server: telemetry: enabled: true vector_stores: - embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5 + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml index ca18baf09..d055b33bb 100644 --- a/llama_stack/distributions/starter/run.yaml +++ b/llama_stack/distributions/starter/run.yaml @@ -255,4 +255,7 @@ server: telemetry: enabled: true vector_stores: - embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5 + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py index 897f75bf9..6a3f5e3ac 100644 --- a/llama_stack/distributions/starter/starter.py +++ b/llama_stack/distributions/starter/starter.py @@ -9,6 +9,7 @@ from typing import Any from llama_stack.core.datatypes import ( BuildProvider, + DefaultEmbeddingModel, Provider, ProviderSpec, ShieldInput, @@ -249,7 +250,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: default_tool_groups=default_tool_groups, default_shields=default_shields, vector_stores_config=VectorStoresConfig( - embedding_model_id="sentence-transformers/nomic-ai/nomic-embed-text-v1.5" + default_provider_id="faiss", + default_embedding_model=DefaultEmbeddingModel( + provider_id="sentence-transformers", + model_id="nomic-ai/nomic-embed-text-v1.5", + ), ), ), }, diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 9b28adc90..626faf42d 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -8,9 +8,8 @@ import time from io import BytesIO import pytest -from llama_stack_client import BadRequestError, NotFoundError +from llama_stack_client import BadRequestError from openai import BadRequestError as OpenAIBadRequestError -from openai import NotFoundError as OpenAINotFoundError from llama_stack.apis.vector_io import Chunk from llama_stack.core.library_client import LlamaStackAsLibraryClient @@ -839,7 +838,7 @@ def test_openai_vector_store_list_files_invalid_vector_store( if isinstance(compat_client, LlamaStackAsLibraryClient): errors = ValueError else: - errors = (NotFoundError, OpenAINotFoundError) + errors = (BadRequestError, OpenAIBadRequestError) with pytest.raises(errors): compat_client.vector_stores.files.list(vector_store_id="abc123") @@ -1528,11 +1527,11 @@ def test_openai_vector_store_file_batch_error_handling( batch_id="non_existent_batch_id", ) - # Test operations on non-existent vector store (returns NotFoundError) + # Test operations on non-existent vector store (returns BadRequestError) if isinstance(compat_client, LlamaStackAsLibraryClient): vector_store_errors = ValueError else: - vector_store_errors = (NotFoundError, OpenAINotFoundError) + vector_store_errors = (BadRequestError, OpenAIBadRequestError) with pytest.raises(vector_store_errors): # Should raise an error for non-existent vector store compat_client.vector_stores.file_batches.create( diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py index b9fe29f23..74782fae9 100644 --- a/tests/unit/core/test_stack_validation.py +++ b/tests/unit/core/test_stack_validation.py @@ -11,7 +11,7 @@ from unittest.mock import AsyncMock import pytest from llama_stack.apis.models import Model, ModelType -from llama_stack.core.datatypes import StackRunConfig, VectorStoresConfig +from llama_stack.core.datatypes import DefaultEmbeddingModel, StackRunConfig, VectorStoresConfig from llama_stack.core.stack import validate_vector_stores_config from llama_stack.providers.datatypes import Api @@ -20,7 +20,15 @@ class TestVectorStoresValidation: async def test_validate_missing_model(self): """Test validation fails when model not found.""" run_config = StackRunConfig( - image_name="test", providers={}, vector_stores=VectorStoresConfig(embedding_model_id="missing") + image_name="test", + providers={}, + vector_stores=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=DefaultEmbeddingModel( + provider_id="p", + model_id="missing", + ), + ), ) mock_models = AsyncMock() mock_models.list_models.return_value = [] @@ -31,12 +39,20 @@ class TestVectorStoresValidation: async def test_validate_success(self): """Test validation passes with valid model.""" run_config = StackRunConfig( - image_name="test", providers={}, vector_stores=VectorStoresConfig(embedding_model_id="valid") + image_name="test", + providers={}, + vector_stores=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=DefaultEmbeddingModel( + provider_id="p", + model_id="valid", + ), + ), ) mock_models = AsyncMock() mock_models.list_models.return_value = [ Model( - identifier="valid", + identifier="p/valid", # Must match provider_id/model_id format model_type=ModelType.embedding, metadata={"embedding_dimension": 768}, provider_id="p",