Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>

updating structure of default

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>

fix model id creation

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Javier Arceo 2025-10-20 14:50:57 -04:00
parent b3addc94d1
commit 7ffd20d112
10 changed files with 119 additions and 62 deletions

View file

@ -351,17 +351,30 @@ class AuthenticationRequiredError(Exception):
pass
class DefaultEmbeddingModel(BaseModel):
"""Configuration for default embedding model."""
provider_id: str = Field(
...,
description="ID of the inference provider that serves the embedding model (e.g., 'sentence-transformers').",
)
model_id: str = Field(
...,
description="ID of the embedding model (e.g., 'nomic-ai/nomic-embed-text-v1.5').",
)
class VectorStoresConfig(BaseModel):
"""Configuration for vector stores in the stack."""
embedding_model_id: str = Field(
...,
description="ID of the embedding model to use as default for vector stores when none is specified. Must reference a model defined in the 'models' section.",
)
provider_id: str | None = Field(
default_provider_id: str | None = Field(
default=None,
description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.",
)
default_embedding_model: DefaultEmbeddingModel | None = Field(
default=None,
description="Default embedding model configuration for vector stores.",
)
class QuotaPeriod(StrEnum):

View file

@ -126,7 +126,11 @@ class VectorIORouter(VectorIO):
# Use default embedding model if not specified
if embedding_model is None and self.vector_stores_config is not None:
embedding_model = self.vector_stores_config.embedding_model_id
if self.vector_stores_config.default_embedding_model is not None:
# Construct the full model ID with provider prefix
embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id
model_id = self.vector_stores_config.default_embedding_model.model_id
embedding_model = f"{embedding_provider_id}/{model_id}"
if embedding_model is not None and embedding_dimension is None:
embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
@ -139,8 +143,8 @@ class VectorIORouter(VectorIO):
if num_providers > 1:
available_providers = list(self.routing_table.impls_by_provider_id.keys())
# Use default configured provider
if self.vector_stores_config and self.vector_stores_config.provider_id:
default_provider = self.vector_stores_config.provider_id
if self.vector_stores_config and self.vector_stores_config.default_provider_id:
default_provider = self.vector_stores_config.default_provider_id
if default_provider in available_providers:
provider_id = default_provider
logger.debug(f"Using configured default vector store provider: {provider_id}")

View file

@ -135,41 +135,52 @@ async def validate_vector_stores_config(run_config: StackRunConfig, impls: dict[
return
vector_stores_config = run_config.vector_stores
default_model_id = vector_stores_config.embedding_model_id
if Api.models not in impls:
raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
# Validate default embedding model if configured
if vector_stores_config.default_embedding_model:
default_embedding_model = vector_stores_config.default_embedding_model
provider_id = default_embedding_model.provider_id
model_id = default_embedding_model.model_id
# Construct the full model identifier
default_model_id = f"{provider_id}/{model_id}"
models_impl = impls[Api.models]
response = await models_impl.list_models()
models_list = response.data if hasattr(response, "data") else response
if Api.models not in impls:
raise ValueError(
f"Models API is not available but vector_stores config requires model '{default_model_id}'"
)
# find default embedding model
default_model = None
for model in models_list:
if model.identifier == default_model_id:
default_model = model
break
models_impl = impls[Api.models]
response = await models_impl.list_models()
models_list = response.data if hasattr(response, "data") else response
if not default_model:
available_models = [m.identifier for m in models_list if m.model_type == "embedding"]
raise ValueError(
f"Embedding model '{default_model_id}' not found. Available embedding models: {available_models}"
)
# find default embedding model
default_model = None
for model in models_list:
if model.identifier == default_model_id:
default_model = model
break
if default_model.model_type != "embedding":
raise ValueError(f"Model '{default_model_id}' is type '{default_model.model_type}', not 'embedding'")
if not default_model:
available_models = [m.identifier for m in models_list if m.model_type == "embedding"]
raise ValueError(
f"Embedding model '{default_model_id}' not found. Available embedding models: {available_models}"
)
embedding_dimension = default_model.metadata.get("embedding_dimension")
if embedding_dimension is None:
raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
if default_model.model_type != "embedding":
raise ValueError(f"Model '{default_model_id}' is type '{default_model.model_type}', not 'embedding'")
try:
int(embedding_dimension)
except ValueError as err:
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
embedding_dimension = default_model.metadata.get("embedding_dimension")
if embedding_dimension is None:
raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
try:
int(embedding_dimension)
except ValueError as err:
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
# If no default embedding model is configured, that's fine - validation passes
class EnvVarError(Exception):

View file

@ -255,4 +255,7 @@ server:
telemetry:
enabled: true
vector_stores:
embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5

View file

@ -258,4 +258,7 @@ server:
telemetry:
enabled: true
vector_stores:
embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5

View file

@ -255,4 +255,7 @@ server:
telemetry:
enabled: true
vector_stores:
embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5

View file

@ -9,6 +9,7 @@ from typing import Any
from llama_stack.core.datatypes import (
BuildProvider,
DefaultEmbeddingModel,
Provider,
ProviderSpec,
ShieldInput,
@ -249,7 +250,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
default_tool_groups=default_tool_groups,
default_shields=default_shields,
vector_stores_config=VectorStoresConfig(
embedding_model_id="sentence-transformers/nomic-ai/nomic-embed-text-v1.5"
default_provider_id="faiss",
default_embedding_model=DefaultEmbeddingModel(
provider_id="sentence-transformers",
model_id="nomic-ai/nomic-embed-text-v1.5",
),
),
),
},