feat(vector-io): configurable embedding models for all providers (v2)\n\nAdds embedding_model and embedding_dimension fields to all VectorIOConfig classes.\nRouter respects provider defaults with fallback.\nIntroduces embedding_utils helper.\nComprehensive docs & samples.\nResolves #2729

This commit is contained in:
skamenan7 2025-07-17 11:51:40 -04:00
parent c8f274347d
commit d55dd3e9a0
24 changed files with 482 additions and 14 deletions

View file

@ -6,12 +6,25 @@
from typing import Any
from pydantic import BaseModel
from pydantic import BaseModel, Field
class ChromaVectorIOConfig(BaseModel):
db_path: str
embedding_model: str | None = Field(
default=None,
description="Optional default embedding model for this provider. If not specified, will use system default.",
)
embedding_dimension: int | None = Field(
default=None,
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
)
@classmethod
def sample_run_config(cls, db_path: str = "${env.CHROMADB_PATH}", **kwargs: Any) -> dict[str, Any]:
return {"db_path": db_path}
return {
"db_path": db_path,
# Optional: Configure default embedding model for this provider
# "embedding_model": "all-MiniLM-L6-v2",
# "embedding_dimension": 384, # Only needed for variable-dimension models
}