mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-25 21:11:59 +00:00
Add configurable embedding models for vector IO providers
This change lets users configure default embedding models at the provider level instead of always relying on system defaults. Each vector store provider can now specify an embedding_model and optional embedding_dimension in their config. Key features: - Auto-dimension lookup for standard models from the registry - Support for Matryoshka embeddings with custom dimensions - Three-tier priority: explicit params > provider config > system fallback - Full backward compatibility - existing setups work unchanged - Comprehensive test coverage with 20 test cases Updated all vector IO providers (FAISS, Chroma, Milvus, Qdrant, etc.) with the new config fields and added detailed documentation with examples. Fixes #2729
This commit is contained in:
parent
2298d2473c
commit
474b50b422
28 changed files with 1160 additions and 24 deletions
|
|
@ -6,12 +6,25 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ChromaVectorIOConfig(BaseModel):
|
||||
url: str | None
|
||||
embedding_model: str | None = Field(
|
||||
default=None,
|
||||
description="Optional default embedding model for this provider. If not specified, will use system default.",
|
||||
)
|
||||
embedding_dimension: int | None = Field(
|
||||
default=None,
|
||||
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]:
|
||||
return {"url": url}
|
||||
return {
|
||||
"url": url,
|
||||
# Optional: Configure default embedding model for this provider
|
||||
# "embedding_model": "all-MiniLM-L6-v2",
|
||||
# "embedding_dimension": 384, # Only needed for variable-dimension models
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,6 +18,14 @@ class MilvusVectorIOConfig(BaseModel):
|
|||
token: str | None = Field(description="The token of the Milvus server")
|
||||
consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
|
||||
kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None)
|
||||
embedding_model: str | None = Field(
|
||||
default=None,
|
||||
description="Optional default embedding model for this provider. If not specified, will use system default.",
|
||||
)
|
||||
embedding_dimension: int | None = Field(
|
||||
default=None,
|
||||
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
|
||||
)
|
||||
|
||||
# This configuration allows additional fields to be passed through to the underlying Milvus client.
|
||||
# See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general.
|
||||
|
|
@ -25,4 +33,10 @@ class MilvusVectorIOConfig(BaseModel):
|
|||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||
return {"uri": "${env.MILVUS_ENDPOINT}", "token": "${env.MILVUS_TOKEN}"}
|
||||
return {
|
||||
"uri": "${env.MILVUS_ENDPOINT}",
|
||||
"token": "${env.MILVUS_TOKEN}",
|
||||
# Optional: Configure default embedding model for this provider
|
||||
# "embedding_model": "all-MiniLM-L6-v2",
|
||||
# "embedding_dimension": 384, # Only needed for variable-dimension models
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,15 +18,32 @@ class PGVectorVectorIOConfig(BaseModel):
|
|||
db: str | None = Field(default="postgres")
|
||||
user: str | None = Field(default="postgres")
|
||||
password: str | None = Field(default="mysecretpassword")
|
||||
embedding_model: str | None = Field(
|
||||
default=None,
|
||||
description="Optional default embedding model for this provider. If not specified, will use system default.",
|
||||
)
|
||||
embedding_dimension: int | None = Field(
|
||||
default=None,
|
||||
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
host: str = "${env.PGVECTOR_HOST:=localhost}",
|
||||
port: int = "${env.PGVECTOR_PORT:=5432}",
|
||||
port: int | str = "${env.PGVECTOR_PORT:=5432}",
|
||||
db: str = "${env.PGVECTOR_DB}",
|
||||
user: str = "${env.PGVECTOR_USER}",
|
||||
password: str = "${env.PGVECTOR_PASSWORD}",
|
||||
**kwargs: Any,
|
||||
) -> dict[str, Any]:
|
||||
return {"host": host, "port": port, "db": db, "user": user, "password": password}
|
||||
return {
|
||||
"host": host,
|
||||
"port": port,
|
||||
"db": db,
|
||||
"user": user,
|
||||
"password": password,
|
||||
# Optional: Configure default embedding model for this provider
|
||||
# "embedding_model": "all-MiniLM-L6-v2",
|
||||
# "embedding_dimension": 384, # Only needed for variable-dimension models
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
|
@ -23,9 +23,20 @@ class QdrantVectorIOConfig(BaseModel):
|
|||
prefix: str | None = None
|
||||
timeout: int | None = None
|
||||
host: str | None = None
|
||||
embedding_model: str | None = Field(
|
||||
default=None,
|
||||
description="Optional default embedding model for this provider. If not specified, will use system default.",
|
||||
)
|
||||
embedding_dimension: int | None = Field(
|
||||
default=None,
|
||||
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.QDRANT_API_KEY}",
|
||||
# Optional: Configure default embedding model for this provider
|
||||
# "embedding_model": "all-MiniLM-L6-v2",
|
||||
# "embedding_dimension": 384, # Only needed for variable-dimension models
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class WeaviateRequestProviderData(BaseModel):
|
||||
|
|
@ -15,6 +15,19 @@ class WeaviateRequestProviderData(BaseModel):
|
|||
|
||||
|
||||
class WeaviateVectorIOConfig(BaseModel):
|
||||
embedding_model: str | None = Field(
|
||||
default=None,
|
||||
description="Optional default embedding model for this provider. If not specified, will use system default.",
|
||||
)
|
||||
embedding_dimension: int | None = Field(
|
||||
default=None,
|
||||
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
||||
return {}
|
||||
return {
|
||||
# Optional: Configure default embedding model for this provider
|
||||
# "embedding_model": "all-MiniLM-L6-v2",
|
||||
# "embedding_dimension": 384, # Only needed for variable-dimension models
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue