mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 20:12:33 +00:00
chore: Updating how default embedding model is set in stack
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> # Conflicts: # .github/workflows/integration-vector-io-tests.yml # llama_stack/distributions/ci-tests/run.yaml # llama_stack/distributions/starter-gpu/run.yaml # llama_stack/distributions/starter/run.yaml # llama_stack/distributions/template.py # llama_stack/providers/utils/memory/openai_vector_store_mixin.py
This commit is contained in:
parent
cd152f4240
commit
24a1430c8b
32 changed files with 276 additions and 265 deletions
|
|
@ -169,9 +169,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
uv run --no-sync \
|
uv run --no-sync \
|
||||||
pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
|
pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
|
||||||
tests/integration/vector_io \
|
tests/integration/vector_io
|
||||||
--embedding-model inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
|
|
||||||
--embedding-dimension 768
|
|
||||||
|
|
||||||
- name: Check Storage and Memory Available After Tests
|
- name: Check Storage and Memory Available After Tests
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
|
|
|
||||||
|
|
@ -92,13 +92,15 @@ models:
|
||||||
provider_id: inline::sentence-transformers
|
provider_id: inline::sentence-transformers
|
||||||
metadata:
|
metadata:
|
||||||
embedding_dimension: 768
|
embedding_dimension: 768
|
||||||
default_configured: true
|
|
||||||
|
vector_stores:
|
||||||
|
default_embedding_model_id: nomic-ai/nomic-embed-text-v1.5
|
||||||
```
|
```
|
||||||
|
|
||||||
With this configuration:
|
With this configuration:
|
||||||
- `client.vector_stores.create()` works without requiring embedding model parameters
|
- `client.vector_stores.create()` works without requiring embedding model parameters
|
||||||
- The system automatically uses the default model and its embedding dimension for any newly created vector store
|
- The system automatically uses the default model and its embedding dimension for any newly created vector store
|
||||||
- Only one model can be marked as `default_configured: true`
|
- The `vector_stores` section explicitly configures which embedding model to use as default
|
||||||
|
|
||||||
## Vector Store Operations
|
## Vector Store Operations
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -351,6 +351,15 @@ class AuthenticationRequiredError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class VectorStoresConfig(BaseModel):
|
||||||
|
"""Configuration for vector stores in the stack."""
|
||||||
|
|
||||||
|
default_embedding_model_id: str = Field(
|
||||||
|
...,
|
||||||
|
description="ID of the embedding model to use as default for vector stores when none is specified. Must reference a model defined in the 'models' section.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class QuotaPeriod(StrEnum):
|
class QuotaPeriod(StrEnum):
|
||||||
DAY = "day"
|
DAY = "day"
|
||||||
|
|
||||||
|
|
@ -526,6 +535,11 @@ If not specified, a default SQLite store will be used.""",
|
||||||
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
|
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
vector_stores: VectorStoresConfig | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Configuration for vector stores, including default embedding model",
|
||||||
|
)
|
||||||
|
|
||||||
@field_validator("external_providers_dir")
|
@field_validator("external_providers_dir")
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_external_providers_dir(cls, v):
|
def validate_external_providers_dir(cls, v):
|
||||||
|
|
|
||||||
|
|
@ -409,6 +409,10 @@ async def instantiate_provider(
|
||||||
if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
|
if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
|
||||||
args.append(run_config.telemetry.enabled)
|
args.append(run_config.telemetry.enabled)
|
||||||
|
|
||||||
|
# vector_io providers need access to run_config.vector_stores
|
||||||
|
if provider_spec.api == Api.vector_io and "run_config" in inspect.signature(getattr(module, method)).parameters:
|
||||||
|
args.append(run_config)
|
||||||
|
|
||||||
fn = getattr(module, method)
|
fn = getattr(module, method)
|
||||||
impl = await fn(*args)
|
impl = await fn(*args)
|
||||||
impl.__provider_id__ = provider.provider_id
|
impl.__provider_id__ = provider.provider_id
|
||||||
|
|
|
||||||
|
|
@ -98,30 +98,6 @@ REGISTRY_REFRESH_TASK = None
|
||||||
TEST_RECORDING_CONTEXT = None
|
TEST_RECORDING_CONTEXT = None
|
||||||
|
|
||||||
|
|
||||||
async def validate_default_embedding_model(impls: dict[Api, Any]):
|
|
||||||
"""Validate that at most one embedding model is marked as default."""
|
|
||||||
if Api.models not in impls:
|
|
||||||
return
|
|
||||||
|
|
||||||
models_impl = impls[Api.models]
|
|
||||||
response = await models_impl.list_models()
|
|
||||||
models_list = response.data if hasattr(response, "data") else response
|
|
||||||
|
|
||||||
default_embedding_models = []
|
|
||||||
for model in models_list:
|
|
||||||
if model.model_type == "embedding" and model.metadata.get("default_configured") is True:
|
|
||||||
default_embedding_models.append(model.identifier)
|
|
||||||
|
|
||||||
if len(default_embedding_models) > 1:
|
|
||||||
raise ValueError(
|
|
||||||
f"Multiple embedding models marked as default_configured=True: {default_embedding_models}. "
|
|
||||||
"Only one embedding model can be marked as default."
|
|
||||||
)
|
|
||||||
|
|
||||||
if default_embedding_models:
|
|
||||||
logger.info(f"Default embedding model configured: {default_embedding_models[0]}")
|
|
||||||
|
|
||||||
|
|
||||||
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
||||||
for rsrc, api, register_method, list_method in RESOURCES:
|
for rsrc, api, register_method, list_method in RESOURCES:
|
||||||
objects = getattr(run_config, rsrc)
|
objects = getattr(run_config, rsrc)
|
||||||
|
|
@ -152,7 +128,48 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
||||||
f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}",
|
f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}",
|
||||||
)
|
)
|
||||||
|
|
||||||
await validate_default_embedding_model(impls)
|
|
||||||
|
async def validate_vector_stores_config(run_config: StackRunConfig, impls: dict[Api, Any]):
|
||||||
|
"""Validate vector stores configuration."""
|
||||||
|
if not run_config.vector_stores:
|
||||||
|
return
|
||||||
|
|
||||||
|
vector_stores_config = run_config.vector_stores
|
||||||
|
default_model_id = vector_stores_config.default_embedding_model_id
|
||||||
|
|
||||||
|
if Api.models not in impls:
|
||||||
|
raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
|
||||||
|
|
||||||
|
models_impl = impls[Api.models]
|
||||||
|
response = await models_impl.list_models()
|
||||||
|
models_list = response.data if hasattr(response, "data") else response
|
||||||
|
|
||||||
|
# find default embedding model
|
||||||
|
default_model = None
|
||||||
|
for model in models_list:
|
||||||
|
if model.identifier == default_model_id:
|
||||||
|
default_model = model
|
||||||
|
break
|
||||||
|
|
||||||
|
if not default_model:
|
||||||
|
available_models = [m.identifier for m in models_list if m.model_type == "embedding"]
|
||||||
|
raise ValueError(
|
||||||
|
f"Embedding model '{default_model_id}' not found. Available embedding models: {available_models}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if default_model.model_type != "embedding":
|
||||||
|
raise ValueError(f"Model '{default_model_id}' is type '{default_model.model_type}', not 'embedding'")
|
||||||
|
|
||||||
|
embedding_dimension = default_model.metadata.get("embedding_dimension")
|
||||||
|
if embedding_dimension is None:
|
||||||
|
raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
|
||||||
|
|
||||||
|
try:
|
||||||
|
int(embedding_dimension)
|
||||||
|
except ValueError as err:
|
||||||
|
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
|
||||||
|
|
||||||
|
logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
|
||||||
|
|
||||||
|
|
||||||
class EnvVarError(Exception):
|
class EnvVarError(Exception):
|
||||||
|
|
@ -367,8 +384,8 @@ class Stack:
|
||||||
await impls[Api.conversations].initialize()
|
await impls[Api.conversations].initialize()
|
||||||
|
|
||||||
await register_resources(self.run_config, impls)
|
await register_resources(self.run_config, impls)
|
||||||
|
|
||||||
await refresh_registry_once(impls)
|
await refresh_registry_once(impls)
|
||||||
|
await validate_vector_stores_config(self.run_config, impls)
|
||||||
self.impls = impls
|
self.impls = impls
|
||||||
|
|
||||||
def create_registry_refresh_task(self):
|
def create_registry_refresh_task(self):
|
||||||
|
|
|
||||||
|
|
@ -239,3 +239,5 @@ server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
telemetry:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
vector_stores:
|
||||||
|
default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5
|
||||||
|
|
|
||||||
|
|
@ -240,5 +240,7 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
|
vector_stores:
|
||||||
|
default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5
|
||||||
telemetry:
|
telemetry:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
|
|
||||||
|
|
@ -239,3 +239,5 @@ server:
|
||||||
port: 8321
|
port: 8321
|
||||||
telemetry:
|
telemetry:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
vector_stores:
|
||||||
|
default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ from llama_stack.core.datatypes import (
|
||||||
ProviderSpec,
|
ProviderSpec,
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
ToolGroupInput,
|
ToolGroupInput,
|
||||||
|
VectorStoresConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
@ -227,6 +228,9 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
||||||
default_models=[],
|
default_models=[],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
default_shields=default_shields,
|
default_shields=default_shields,
|
||||||
|
vector_stores_config=VectorStoresConfig(
|
||||||
|
default_embedding_model_id="sentence-transformers/nomic-ai/nomic-embed-text-v1.5"
|
||||||
|
),
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
run_config_env_vars={
|
run_config_env_vars={
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ from llama_stack.core.datatypes import (
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
TelemetryConfig,
|
TelemetryConfig,
|
||||||
ToolGroupInput,
|
ToolGroupInput,
|
||||||
|
VectorStoresConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.core.distribution import get_provider_registry
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
|
|
@ -183,6 +184,7 @@ class RunConfigSettings(BaseModel):
|
||||||
metadata_store: dict | None = None
|
metadata_store: dict | None = None
|
||||||
inference_store: dict | None = None
|
inference_store: dict | None = None
|
||||||
conversations_store: dict | None = None
|
conversations_store: dict | None = None
|
||||||
|
vector_stores_config: VectorStoresConfig | None = None
|
||||||
telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
|
telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
|
||||||
|
|
||||||
def run_config(
|
def run_config(
|
||||||
|
|
@ -227,7 +229,7 @@ class RunConfigSettings(BaseModel):
|
||||||
apis = sorted(providers.keys())
|
apis = sorted(providers.keys())
|
||||||
|
|
||||||
# Return a dict that matches StackRunConfig structure
|
# Return a dict that matches StackRunConfig structure
|
||||||
return {
|
config = {
|
||||||
"version": LLAMA_STACK_RUN_CONFIG_VERSION,
|
"version": LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||||
"image_name": name,
|
"image_name": name,
|
||||||
"container_image": container_image,
|
"container_image": container_image,
|
||||||
|
|
@ -261,6 +263,11 @@ class RunConfigSettings(BaseModel):
|
||||||
"telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
|
"telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if self.vector_stores_config:
|
||||||
|
config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
|
||||||
|
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
class DistributionTemplate(BaseModel):
|
class DistributionTemplate(BaseModel):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,6 @@ class SentenceTransformersInferenceImpl(
|
||||||
provider_id=self.__provider_id__,
|
provider_id=self.__provider_id__,
|
||||||
metadata={
|
metadata={
|
||||||
"embedding_dimension": 768,
|
"embedding_dimension": 768,
|
||||||
"default_configured": True,
|
|
||||||
},
|
},
|
||||||
model_type=ModelType.embedding,
|
model_type=ModelType.embedding,
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -6,21 +6,29 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
from .config import ChromaVectorIOConfig
|
from .config import ChromaVectorIOConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]):
|
async def get_provider_impl(
|
||||||
|
config: ChromaVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
|
||||||
|
):
|
||||||
from llama_stack.providers.remote.vector_io.chroma.chroma import (
|
from llama_stack.providers.remote.vector_io.chroma.chroma import (
|
||||||
ChromaVectorIOAdapter,
|
ChromaVectorIOAdapter,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
vector_stores_config = None
|
||||||
|
if run_config and run_config.vector_stores:
|
||||||
|
vector_stores_config = run_config.vector_stores
|
||||||
|
|
||||||
impl = ChromaVectorIOAdapter(
|
impl = ChromaVectorIOAdapter(
|
||||||
config,
|
config,
|
||||||
deps[Api.inference],
|
deps[Api.inference],
|
||||||
deps[Api.models],
|
deps[Api.models],
|
||||||
deps.get(Api.files),
|
deps.get(Api.files),
|
||||||
|
vector_stores_config,
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -6,21 +6,29 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
from .config import FaissVectorIOConfig
|
from .config import FaissVectorIOConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
|
async def get_provider_impl(
|
||||||
|
config: FaissVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
|
||||||
|
):
|
||||||
from .faiss import FaissVectorIOAdapter
|
from .faiss import FaissVectorIOAdapter
|
||||||
|
|
||||||
assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
|
assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
|
||||||
|
|
||||||
|
vector_stores_config = None
|
||||||
|
if run_config and run_config.vector_stores:
|
||||||
|
vector_stores_config = run_config.vector_stores
|
||||||
|
|
||||||
impl = FaissVectorIOAdapter(
|
impl = FaissVectorIOAdapter(
|
||||||
config,
|
config,
|
||||||
deps[Api.inference],
|
deps[Api.inference],
|
||||||
deps[Api.models],
|
deps[Api.models],
|
||||||
deps.get(Api.files),
|
deps.get(Api.files),
|
||||||
|
vector_stores_config,
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ from llama_stack.apis.vector_io import (
|
||||||
QueryChunksResponse,
|
QueryChunksResponse,
|
||||||
VectorIO,
|
VectorIO,
|
||||||
)
|
)
|
||||||
|
from llama_stack.core.datatypes import VectorStoresConfig
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import (
|
from llama_stack.providers.datatypes import (
|
||||||
HealthResponse,
|
HealthResponse,
|
||||||
|
|
@ -206,11 +207,13 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
|
||||||
inference_api: Inference,
|
inference_api: Inference,
|
||||||
models_api: Models,
|
models_api: Models,
|
||||||
files_api: Files | None,
|
files_api: Files | None,
|
||||||
|
vector_stores_config: VectorStoresConfig | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(files_api=files_api, kvstore=None)
|
super().__init__(files_api=files_api, kvstore=None)
|
||||||
self.config = config
|
self.config = config
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.models_api = models_api
|
self.models_api = models_api
|
||||||
|
self.vector_stores_config = vector_stores_config
|
||||||
self.cache: dict[str, VectorDBWithIndex] = {}
|
self.cache: dict[str, VectorDBWithIndex] = {}
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
|
|
|
||||||
|
|
@ -6,19 +6,27 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
from .config import MilvusVectorIOConfig
|
from .config import MilvusVectorIOConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]):
|
async def get_provider_impl(
|
||||||
|
config: MilvusVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
|
||||||
|
):
|
||||||
from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter
|
from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter
|
||||||
|
|
||||||
|
vector_stores_config = None
|
||||||
|
if run_config and run_config.vector_stores:
|
||||||
|
vector_stores_config = run_config.vector_stores
|
||||||
|
|
||||||
impl = MilvusVectorIOAdapter(
|
impl = MilvusVectorIOAdapter(
|
||||||
config,
|
config,
|
||||||
deps[Api.inference],
|
deps[Api.inference],
|
||||||
deps[Api.models],
|
deps.get(Api.models),
|
||||||
deps.get(Api.files),
|
deps.get(Api.files),
|
||||||
|
vector_stores_config,
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -6,20 +6,28 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
from .config import QdrantVectorIOConfig
|
from .config import QdrantVectorIOConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]):
|
async def get_provider_impl(
|
||||||
|
config: QdrantVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
|
||||||
|
):
|
||||||
from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
|
from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
|
||||||
|
|
||||||
|
vector_stores_config = None
|
||||||
|
if run_config and run_config.vector_stores:
|
||||||
|
vector_stores_config = run_config.vector_stores
|
||||||
|
|
||||||
assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}"
|
assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}"
|
||||||
impl = QdrantVectorIOAdapter(
|
impl = QdrantVectorIOAdapter(
|
||||||
config,
|
config,
|
||||||
deps[Api.inference],
|
deps[Api.inference],
|
||||||
deps[Api.models],
|
deps[Api.models],
|
||||||
deps.get(Api.files),
|
deps.get(Api.files),
|
||||||
|
vector_stores_config,
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -6,20 +6,28 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
from .config import SQLiteVectorIOConfig
|
from .config import SQLiteVectorIOConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]):
|
async def get_provider_impl(
|
||||||
|
config: SQLiteVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
|
||||||
|
):
|
||||||
from .sqlite_vec import SQLiteVecVectorIOAdapter
|
from .sqlite_vec import SQLiteVecVectorIOAdapter
|
||||||
|
|
||||||
|
vector_stores_config = None
|
||||||
|
if run_config and run_config.vector_stores:
|
||||||
|
vector_stores_config = run_config.vector_stores
|
||||||
|
|
||||||
assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
|
assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
|
||||||
impl = SQLiteVecVectorIOAdapter(
|
impl = SQLiteVecVectorIOAdapter(
|
||||||
config,
|
config,
|
||||||
deps[Api.inference],
|
deps[Api.inference],
|
||||||
deps[Api.models],
|
deps[Api.models],
|
||||||
deps.get(Api.files),
|
deps.get(Api.files),
|
||||||
|
vector_stores_config,
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ from llama_stack.apis.vector_io import (
|
||||||
QueryChunksResponse,
|
QueryChunksResponse,
|
||||||
VectorIO,
|
VectorIO,
|
||||||
)
|
)
|
||||||
|
from llama_stack.core.datatypes import VectorStoresConfig
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
||||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||||
|
|
@ -416,11 +417,13 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
|
||||||
inference_api: Inference,
|
inference_api: Inference,
|
||||||
models_api: Models,
|
models_api: Models,
|
||||||
files_api: Files | None,
|
files_api: Files | None,
|
||||||
|
vector_stores_config: VectorStoresConfig | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(files_api=files_api, kvstore=None)
|
super().__init__(files_api=files_api, kvstore=None)
|
||||||
self.config = config
|
self.config = config
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.models_api = models_api
|
self.models_api = models_api
|
||||||
|
self.vector_stores_config = vector_stores_config
|
||||||
self.cache: dict[str, VectorDBWithIndex] = {}
|
self.cache: dict[str, VectorDBWithIndex] = {}
|
||||||
self.vector_db_store = None
|
self.vector_db_store = None
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,19 +4,27 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||||
|
|
||||||
from .config import ChromaVectorIOConfig
|
from .config import ChromaVectorIOConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]):
|
async def get_adapter_impl(
|
||||||
|
config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
|
||||||
|
):
|
||||||
from .chroma import ChromaVectorIOAdapter
|
from .chroma import ChromaVectorIOAdapter
|
||||||
|
|
||||||
|
vector_stores_config = None
|
||||||
|
if run_config and run_config.vector_stores:
|
||||||
|
vector_stores_config = run_config.vector_stores
|
||||||
|
|
||||||
impl = ChromaVectorIOAdapter(
|
impl = ChromaVectorIOAdapter(
|
||||||
config,
|
config,
|
||||||
deps[Api.inference],
|
deps[Api.inference],
|
||||||
deps[Api.models],
|
deps[Api.models],
|
||||||
deps.get(Api.files),
|
deps.get(Api.files),
|
||||||
|
vector_stores_config,
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -12,15 +12,17 @@ import chromadb
|
||||||
from numpy.typing import NDArray
|
from numpy.typing import NDArray
|
||||||
|
|
||||||
from llama_stack.apis.files import Files
|
from llama_stack.apis.files import Files
|
||||||
from llama_stack.apis.inference import InterleavedContent
|
from llama_stack.apis.inference import Inference, InterleavedContent
|
||||||
|
from llama_stack.apis.models import Models
|
||||||
from llama_stack.apis.vector_dbs import VectorDB
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
from llama_stack.apis.vector_io import (
|
from llama_stack.apis.vector_io import (
|
||||||
Chunk,
|
Chunk,
|
||||||
QueryChunksResponse,
|
QueryChunksResponse,
|
||||||
VectorIO,
|
VectorIO,
|
||||||
)
|
)
|
||||||
|
from llama_stack.core.datatypes import VectorStoresConfig
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
||||||
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
|
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
|
||||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||||
|
|
@ -137,15 +139,17 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
|
config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
|
||||||
inference_api: Api.inference,
|
inference_api: Inference,
|
||||||
models_apis: Api.models,
|
models_apis: Models,
|
||||||
files_api: Files | None,
|
files_api: Files | None,
|
||||||
|
vector_stores_config: VectorStoresConfig | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(files_api=files_api, kvstore=None)
|
super().__init__(files_api=files_api, kvstore=None)
|
||||||
log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
|
log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
|
||||||
self.config = config
|
self.config = config
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.models_api = models_apis
|
self.models_api = models_apis
|
||||||
|
self.vector_stores_config = vector_stores_config
|
||||||
self.client = None
|
self.client = None
|
||||||
self.cache = {}
|
self.cache = {}
|
||||||
self.vector_db_store = None
|
self.vector_db_store = None
|
||||||
|
|
|
||||||
|
|
@ -4,21 +4,28 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||||
|
|
||||||
from .config import MilvusVectorIOConfig
|
from .config import MilvusVectorIOConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec]):
|
async def get_adapter_impl(
|
||||||
|
config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
|
||||||
|
):
|
||||||
from .milvus import MilvusVectorIOAdapter
|
from .milvus import MilvusVectorIOAdapter
|
||||||
|
|
||||||
assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}"
|
vector_stores_config = None
|
||||||
|
if run_config and run_config.vector_stores:
|
||||||
|
vector_stores_config = run_config.vector_stores
|
||||||
|
|
||||||
|
assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}"
|
||||||
impl = MilvusVectorIOAdapter(
|
impl = MilvusVectorIOAdapter(
|
||||||
config,
|
config,
|
||||||
deps[Api.inference],
|
deps[Api.inference],
|
||||||
deps[Api.models],
|
deps[Api.models],
|
||||||
deps.get(Api.files),
|
deps.get(Api.files),
|
||||||
|
vector_stores_config,
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ from llama_stack.apis.vector_io import (
|
||||||
QueryChunksResponse,
|
QueryChunksResponse,
|
||||||
VectorIO,
|
VectorIO,
|
||||||
)
|
)
|
||||||
|
from llama_stack.core.datatypes import VectorStoresConfig
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
||||||
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
|
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
|
||||||
|
|
@ -308,8 +309,9 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
||||||
self,
|
self,
|
||||||
config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
|
config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
|
||||||
inference_api: Inference,
|
inference_api: Inference,
|
||||||
models_api: Models,
|
models_api: Models | None,
|
||||||
files_api: Files | None,
|
files_api: Files | None,
|
||||||
|
vector_stores_config: VectorStoresConfig | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(files_api=files_api, kvstore=None)
|
super().__init__(files_api=files_api, kvstore=None)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
@ -317,6 +319,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
||||||
self.client = None
|
self.client = None
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.models_api = models_api
|
self.models_api = models_api
|
||||||
|
self.vector_stores_config = vector_stores_config
|
||||||
self.vector_db_store = None
|
self.vector_db_store = None
|
||||||
self.metadata_collection_name = "openai_vector_stores_metadata"
|
self.metadata_collection_name = "openai_vector_stores_metadata"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,14 +4,26 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||||
|
|
||||||
from .config import PGVectorVectorIOConfig
|
from .config import PGVectorVectorIOConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]):
|
async def get_adapter_impl(
|
||||||
|
config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
|
||||||
|
):
|
||||||
from .pgvector import PGVectorVectorIOAdapter
|
from .pgvector import PGVectorVectorIOAdapter
|
||||||
|
|
||||||
impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps[Api.models], deps.get(Api.files, None))
|
vector_stores_config = None
|
||||||
|
if run_config and run_config.vector_stores:
|
||||||
|
vector_stores_config = run_config.vector_stores
|
||||||
|
impl = PGVectorVectorIOAdapter(
|
||||||
|
config,
|
||||||
|
deps[Api.inference],
|
||||||
|
deps[Api.models],
|
||||||
|
deps.get(Api.files),
|
||||||
|
vector_stores_config,
|
||||||
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ from llama_stack.apis.vector_io import (
|
||||||
QueryChunksResponse,
|
QueryChunksResponse,
|
||||||
VectorIO,
|
VectorIO,
|
||||||
)
|
)
|
||||||
|
from llama_stack.core.datatypes import VectorStoresConfig
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
|
|
@ -346,11 +347,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
|
||||||
inference_api: Inference,
|
inference_api: Inference,
|
||||||
models_api: Models,
|
models_api: Models,
|
||||||
files_api: Files | None = None,
|
files_api: Files | None = None,
|
||||||
|
vector_stores_config: VectorStoresConfig | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(files_api=files_api, kvstore=None)
|
super().__init__(files_api=files_api, kvstore=None)
|
||||||
self.config = config
|
self.config = config
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.models_api = models_api
|
self.models_api = models_api
|
||||||
|
self.vector_stores_config = vector_stores_config
|
||||||
self.conn = None
|
self.conn = None
|
||||||
self.cache = {}
|
self.cache = {}
|
||||||
self.vector_db_store = None
|
self.vector_db_store = None
|
||||||
|
|
|
||||||
|
|
@ -4,19 +4,27 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||||
|
|
||||||
from .config import QdrantVectorIOConfig
|
from .config import QdrantVectorIOConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]):
|
async def get_adapter_impl(
|
||||||
|
config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
|
||||||
|
):
|
||||||
from .qdrant import QdrantVectorIOAdapter
|
from .qdrant import QdrantVectorIOAdapter
|
||||||
|
|
||||||
|
vector_stores_config = None
|
||||||
|
if run_config and run_config.vector_stores:
|
||||||
|
vector_stores_config = run_config.vector_stores
|
||||||
|
|
||||||
impl = QdrantVectorIOAdapter(
|
impl = QdrantVectorIOAdapter(
|
||||||
config,
|
config,
|
||||||
deps[Api.inference],
|
deps[Api.inference],
|
||||||
deps[Api.models],
|
deps[Api.models],
|
||||||
deps.get(Api.files),
|
deps.get(Api.files),
|
||||||
|
vector_stores_config,
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ from llama_stack.apis.vector_io import (
|
||||||
VectorStoreChunkingStrategy,
|
VectorStoreChunkingStrategy,
|
||||||
VectorStoreFileObject,
|
VectorStoreFileObject,
|
||||||
)
|
)
|
||||||
|
from llama_stack.core.datatypes import VectorStoresConfig
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
||||||
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
||||||
|
|
@ -163,6 +164,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
||||||
inference_api: Inference,
|
inference_api: Inference,
|
||||||
models_api: Models,
|
models_api: Models,
|
||||||
files_api: Files | None = None,
|
files_api: Files | None = None,
|
||||||
|
vector_stores_config: VectorStoresConfig | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(files_api=files_api, kvstore=None)
|
super().__init__(files_api=files_api, kvstore=None)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
@ -170,6 +172,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
|
||||||
self.cache = {}
|
self.cache = {}
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.models_api = models_api
|
self.models_api = models_api
|
||||||
|
self.vector_stores_config = vector_stores_config
|
||||||
self.vector_db_store = None
|
self.vector_db_store = None
|
||||||
self._qdrant_lock = asyncio.Lock()
|
self._qdrant_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,19 +4,27 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||||
|
|
||||||
from .config import WeaviateVectorIOConfig
|
from .config import WeaviateVectorIOConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]):
|
async def get_adapter_impl(
|
||||||
|
config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
|
||||||
|
):
|
||||||
from .weaviate import WeaviateVectorIOAdapter
|
from .weaviate import WeaviateVectorIOAdapter
|
||||||
|
|
||||||
|
vector_stores_config = None
|
||||||
|
if run_config and run_config.vector_stores:
|
||||||
|
vector_stores_config = run_config.vector_stores
|
||||||
|
|
||||||
impl = WeaviateVectorIOAdapter(
|
impl = WeaviateVectorIOAdapter(
|
||||||
config,
|
config,
|
||||||
deps[Api.inference],
|
deps[Api.inference],
|
||||||
deps[Api.models],
|
deps[Api.models],
|
||||||
deps.get(Api.files),
|
deps.get(Api.files),
|
||||||
|
vector_stores_config,
|
||||||
)
|
)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ from llama_stack.apis.inference import Inference
|
||||||
from llama_stack.apis.models import Models
|
from llama_stack.apis.models import Models
|
||||||
from llama_stack.apis.vector_dbs import VectorDB
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||||
|
from llama_stack.core.datatypes import VectorStoresConfig
|
||||||
from llama_stack.core.request_headers import NeedsRequestProviderData
|
from llama_stack.core.request_headers import NeedsRequestProviderData
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
|
||||||
|
|
@ -286,11 +287,13 @@ class WeaviateVectorIOAdapter(
|
||||||
inference_api: Inference,
|
inference_api: Inference,
|
||||||
models_api: Models,
|
models_api: Models,
|
||||||
files_api: Files | None,
|
files_api: Files | None,
|
||||||
|
vector_stores_config: VectorStoresConfig | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(files_api=files_api, kvstore=None)
|
super().__init__(files_api=files_api, kvstore=None)
|
||||||
self.config = config
|
self.config = config
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.models_api = models_api
|
self.models_api = models_api
|
||||||
|
self.vector_stores_config = vector_stores_config
|
||||||
self.client_cache = {}
|
self.client_cache = {}
|
||||||
self.cache = {}
|
self.cache = {}
|
||||||
self.vector_db_store = None
|
self.vector_db_store = None
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ from llama_stack.apis.vector_io import (
|
||||||
VectorStoreSearchResponse,
|
VectorStoreSearchResponse,
|
||||||
VectorStoreSearchResponsePage,
|
VectorStoreSearchResponsePage,
|
||||||
)
|
)
|
||||||
|
from llama_stack.core.datatypes import VectorStoresConfig
|
||||||
from llama_stack.core.id_generation import generate_object_id
|
from llama_stack.core.id_generation import generate_object_id
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||||
|
|
@ -81,13 +82,17 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
# Implementing classes should call super().__init__() in their __init__ method
|
# Implementing classes should call super().__init__() in their __init__ method
|
||||||
# to properly initialize the mixin attributes.
|
# to properly initialize the mixin attributes.
|
||||||
def __init__(
|
def __init__(
|
||||||
self, files_api: Files | None = None, kvstore: KVStore | None = None, models_api: Models | None = None
|
self,
|
||||||
|
files_api: Files | None = None,
|
||||||
|
kvstore: KVStore | None = None,
|
||||||
):
|
):
|
||||||
self.openai_vector_stores: dict[str, dict[str, Any]] = {}
|
self.openai_vector_stores: dict[str, dict[str, Any]] = {}
|
||||||
self.openai_file_batches: dict[str, dict[str, Any]] = {}
|
self.openai_file_batches: dict[str, dict[str, Any]] = {}
|
||||||
self.files_api = files_api
|
self.files_api = files_api
|
||||||
self.kvstore = kvstore
|
self.kvstore = kvstore
|
||||||
self.models_api = models_api
|
# These will be set by implementing classes
|
||||||
|
self.models_api: Models | None = None
|
||||||
|
self.vector_stores_config: VectorStoresConfig | None = None
|
||||||
self._last_file_batch_cleanup_time = 0
|
self._last_file_batch_cleanup_time = 0
|
||||||
self._file_batch_tasks: dict[str, asyncio.Task[None]] = {}
|
self._file_batch_tasks: dict[str, asyncio.Task[None]] = {}
|
||||||
|
|
||||||
|
|
@ -474,24 +479,6 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
store_info = self.openai_vector_stores[vector_db_id]
|
store_info = self.openai_vector_stores[vector_db_id]
|
||||||
return VectorStoreObject.model_validate(store_info)
|
return VectorStoreObject.model_validate(store_info)
|
||||||
|
|
||||||
async def _get_embedding_models(self) -> list[Model]:
|
|
||||||
"""Get list of embedding models from the models API."""
|
|
||||||
if not self.models_api:
|
|
||||||
return []
|
|
||||||
|
|
||||||
models_response = await self.models_api.list_models()
|
|
||||||
models_list = models_response.data if hasattr(models_response, "data") else models_response
|
|
||||||
|
|
||||||
embedding_models = []
|
|
||||||
for model in models_list:
|
|
||||||
if not isinstance(model, Model):
|
|
||||||
logger.warning(f"Non-Model object found in models list: {type(model)} - {model}")
|
|
||||||
continue
|
|
||||||
if model.model_type == "embedding":
|
|
||||||
embedding_models.append(model)
|
|
||||||
|
|
||||||
return embedding_models
|
|
||||||
|
|
||||||
async def _get_embedding_dimension_for_model(self, model_id: str) -> int | None:
|
async def _get_embedding_dimension_for_model(self, model_id: str) -> int | None:
|
||||||
"""Get embedding dimension for a specific model by looking it up in the models API.
|
"""Get embedding dimension for a specific model by looking it up in the models API.
|
||||||
|
|
||||||
|
|
@ -501,9 +488,18 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
Returns:
|
Returns:
|
||||||
The embedding dimension for the model, or None if not found
|
The embedding dimension for the model, or None if not found
|
||||||
"""
|
"""
|
||||||
embedding_models = await self._get_embedding_models()
|
if not self.models_api:
|
||||||
|
return None
|
||||||
|
|
||||||
|
models_response = await self.models_api.list_models()
|
||||||
|
models_list = models_response.data if hasattr(models_response, "data") else models_response
|
||||||
|
|
||||||
|
for model in models_list:
|
||||||
|
if not isinstance(model, Model):
|
||||||
|
continue
|
||||||
|
if model.model_type != "embedding":
|
||||||
|
continue
|
||||||
|
|
||||||
for model in embedding_models:
|
|
||||||
# Check for exact match first
|
# Check for exact match first
|
||||||
if model.identifier == model_id:
|
if model.identifier == model_id:
|
||||||
embedding_dimension = model.metadata.get("embedding_dimension")
|
embedding_dimension = model.metadata.get("embedding_dimension")
|
||||||
|
|
@ -523,35 +519,23 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def _get_default_embedding_model_and_dimension(self) -> tuple[str, int] | None:
|
async def _get_default_embedding_model_and_dimension(self) -> tuple[str, int] | None:
|
||||||
"""Get default embedding model from the models API.
|
"""Get default embedding model from vector stores config.
|
||||||
|
|
||||||
Looks for embedding models marked with default_configured=True in metadata.
|
Returns None if no vector stores config is provided.
|
||||||
Returns None if no default embedding model is found.
|
|
||||||
Raises ValueError if multiple defaults are found.
|
|
||||||
"""
|
"""
|
||||||
embedding_models = await self._get_embedding_models()
|
if not self.vector_stores_config:
|
||||||
|
logger.info("No vector stores config provided")
|
||||||
|
return None
|
||||||
|
|
||||||
default_models = []
|
model_id = self.vector_stores_config.default_embedding_model_id
|
||||||
for model in embedding_models:
|
|
||||||
if model.metadata.get("default_configured") is True:
|
|
||||||
default_models.append(model.identifier)
|
|
||||||
|
|
||||||
if len(default_models) > 1:
|
|
||||||
raise ValueError(
|
|
||||||
f"Multiple embedding models marked as default_configured=True: {default_models}. "
|
|
||||||
"Only one embedding model can be marked as default."
|
|
||||||
)
|
|
||||||
|
|
||||||
if default_models:
|
|
||||||
model_id = default_models[0]
|
|
||||||
embedding_dimension = await self._get_embedding_dimension_for_model(model_id)
|
embedding_dimension = await self._get_embedding_dimension_for_model(model_id)
|
||||||
if embedding_dimension is None:
|
if embedding_dimension is None:
|
||||||
raise ValueError(f"Embedding model '{model_id}' has no embedding_dimension in metadata")
|
raise ValueError(f"Embedding model '{model_id}' not found or has no embedding_dimension in metadata")
|
||||||
logger.info(f"Using default embedding model: {model_id} with dimension {embedding_dimension}")
|
|
||||||
return model_id, embedding_dimension
|
|
||||||
|
|
||||||
logger.debug("No default embedding models found")
|
logger.debug(
|
||||||
return None
|
f"Using default embedding model from vector stores config: {model_id} with dimension {embedding_dimension}"
|
||||||
|
)
|
||||||
|
return model_id, embedding_dimension
|
||||||
|
|
||||||
async def openai_list_vector_stores(
|
async def openai_list_vector_stores(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ from llama_stack_client import LlamaStackClient
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
||||||
from llama_stack import LlamaStackAsLibraryClient
|
from llama_stack import LlamaStackAsLibraryClient
|
||||||
|
from llama_stack.core.datatypes import VectorStoresConfig
|
||||||
from llama_stack.core.stack import run_config_from_adhoc_config_spec
|
from llama_stack.core.stack import run_config_from_adhoc_config_spec
|
||||||
from llama_stack.env import get_env_or_fail
|
from llama_stack.env import get_env_or_fail
|
||||||
|
|
||||||
|
|
@ -236,6 +237,13 @@ def instantiate_llama_stack_client(session):
|
||||||
|
|
||||||
if "=" in config:
|
if "=" in config:
|
||||||
run_config = run_config_from_adhoc_config_spec(config)
|
run_config = run_config_from_adhoc_config_spec(config)
|
||||||
|
|
||||||
|
# --stack-config bypasses template so need this to set default embedding model
|
||||||
|
if "vector_io" in config and "inference" in config:
|
||||||
|
run_config.vector_stores = VectorStoresConfig(
|
||||||
|
default_embedding_model_id="inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5"
|
||||||
|
)
|
||||||
|
|
||||||
run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
|
run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
|
||||||
with open(run_config_file.name, "w") as f:
|
with open(run_config_file.name, "w") as f:
|
||||||
yaml.dump(run_config.model_dump(), f)
|
yaml.dump(run_config.model_dump(), f)
|
||||||
|
|
|
||||||
|
|
@ -4,90 +4,44 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
"""
|
"""Unit tests for Stack validation functions."""
|
||||||
Unit tests for Stack validation functions.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from unittest.mock import AsyncMock
|
from unittest.mock import AsyncMock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from llama_stack.apis.models import Model, ModelType
|
from llama_stack.apis.models import Model, ModelType
|
||||||
from llama_stack.core.stack import validate_default_embedding_model
|
from llama_stack.core.datatypes import StackRunConfig, VectorStoresConfig
|
||||||
|
from llama_stack.core.stack import validate_vector_stores_config
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
|
|
||||||
class TestStackValidation:
|
class TestVectorStoresValidation:
|
||||||
"""Test Stack validation functions."""
|
async def test_validate_missing_model(self):
|
||||||
|
"""Test validation fails when model not found."""
|
||||||
@pytest.mark.parametrize(
|
run_config = StackRunConfig(
|
||||||
"models,should_raise",
|
image_name="test", providers={}, vector_stores=VectorStoresConfig(default_embedding_model_id="missing")
|
||||||
[
|
|
||||||
([], False), # No models
|
|
||||||
(
|
|
||||||
[
|
|
||||||
Model(
|
|
||||||
identifier="emb1",
|
|
||||||
model_type=ModelType.embedding,
|
|
||||||
metadata={"default_configured": True},
|
|
||||||
provider_id="p",
|
|
||||||
provider_resource_id="emb1",
|
|
||||||
)
|
)
|
||||||
],
|
mock_models = AsyncMock()
|
||||||
False,
|
mock_models.list_models.return_value = []
|
||||||
), # Single default
|
|
||||||
(
|
with pytest.raises(ValueError, match="not found"):
|
||||||
[
|
await validate_vector_stores_config(run_config, {Api.models: mock_models})
|
||||||
Model(
|
|
||||||
identifier="emb1",
|
async def test_validate_success(self):
|
||||||
model_type=ModelType.embedding,
|
"""Test validation passes with valid model."""
|
||||||
metadata={"default_configured": True},
|
run_config = StackRunConfig(
|
||||||
provider_id="p",
|
image_name="test", providers={}, vector_stores=VectorStoresConfig(default_embedding_model_id="valid")
|
||||||
provider_resource_id="emb1",
|
|
||||||
),
|
|
||||||
Model(
|
|
||||||
identifier="emb2",
|
|
||||||
model_type=ModelType.embedding,
|
|
||||||
metadata={"default_configured": True},
|
|
||||||
provider_id="p",
|
|
||||||
provider_resource_id="emb2",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
True,
|
|
||||||
), # Multiple defaults
|
|
||||||
(
|
|
||||||
[
|
|
||||||
Model(
|
|
||||||
identifier="emb1",
|
|
||||||
model_type=ModelType.embedding,
|
|
||||||
metadata={"default_configured": True},
|
|
||||||
provider_id="p",
|
|
||||||
provider_resource_id="emb1",
|
|
||||||
),
|
|
||||||
Model(
|
|
||||||
identifier="llm1",
|
|
||||||
model_type=ModelType.llm,
|
|
||||||
metadata={"default_configured": True},
|
|
||||||
provider_id="p",
|
|
||||||
provider_resource_id="llm1",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
False,
|
|
||||||
), # Ignores non-embedding
|
|
||||||
],
|
|
||||||
)
|
)
|
||||||
async def test_validate_default_embedding_model(self, models, should_raise):
|
mock_models = AsyncMock()
|
||||||
"""Test validation with various model configurations."""
|
mock_models.list_models.return_value = [
|
||||||
mock_models_impl = AsyncMock()
|
Model(
|
||||||
mock_models_impl.list_models.return_value = models
|
identifier="valid",
|
||||||
impls = {Api.models: mock_models_impl}
|
model_type=ModelType.embedding,
|
||||||
|
metadata={"embedding_dimension": 768},
|
||||||
|
provider_id="p",
|
||||||
|
provider_resource_id="valid",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
if should_raise:
|
await validate_vector_stores_config(run_config, {Api.models: mock_models})
|
||||||
with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
|
|
||||||
await validate_default_embedding_model(impls)
|
|
||||||
else:
|
|
||||||
await validate_default_embedding_model(impls)
|
|
||||||
|
|
||||||
async def test_validate_default_embedding_model_no_models_api(self):
|
|
||||||
"""Test validation when models API is not available."""
|
|
||||||
await validate_default_embedding_model({})
|
|
||||||
|
|
|
||||||
|
|
@ -6,13 +6,12 @@
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
from unittest.mock import AsyncMock, Mock, patch
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
||||||
from llama_stack.apis.models import Model, ModelType
|
|
||||||
from llama_stack.apis.vector_dbs import VectorDB
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
from llama_stack.apis.vector_io import (
|
from llama_stack.apis.vector_io import (
|
||||||
Chunk,
|
Chunk,
|
||||||
|
|
@ -996,96 +995,6 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
|
||||||
assert batch.file_counts.in_progress == 8
|
assert batch.file_counts.in_progress == 8
|
||||||
|
|
||||||
|
|
||||||
async def test_get_default_embedding_model_success(vector_io_adapter):
|
|
||||||
"""Test successful default embedding model detection."""
|
|
||||||
# Mock models API with a default model
|
|
||||||
mock_models_api = Mock()
|
|
||||||
mock_models_api.list_models = AsyncMock(
|
|
||||||
return_value=Mock(
|
|
||||||
data=[
|
|
||||||
Model(
|
|
||||||
identifier="nomic-embed-text-v1.5",
|
|
||||||
model_type=ModelType.embedding,
|
|
||||||
provider_id="test-provider",
|
|
||||||
metadata={
|
|
||||||
"embedding_dimension": 768,
|
|
||||||
"default_configured": True,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
vector_io_adapter.models_api = mock_models_api
|
|
||||||
result = await vector_io_adapter._get_default_embedding_model_and_dimension()
|
|
||||||
|
|
||||||
assert result is not None
|
|
||||||
model_id, dimension = result
|
|
||||||
assert model_id == "nomic-embed-text-v1.5"
|
|
||||||
assert dimension == 768
|
|
||||||
|
|
||||||
|
|
||||||
async def test_get_default_embedding_model_multiple_defaults_error(vector_io_adapter):
|
|
||||||
"""Test error when multiple models are marked as default."""
|
|
||||||
mock_models_api = Mock()
|
|
||||||
mock_models_api.list_models = AsyncMock(
|
|
||||||
return_value=Mock(
|
|
||||||
data=[
|
|
||||||
Model(
|
|
||||||
identifier="model1",
|
|
||||||
model_type=ModelType.embedding,
|
|
||||||
provider_id="test-provider",
|
|
||||||
metadata={"embedding_dimension": 768, "default_configured": True},
|
|
||||||
),
|
|
||||||
Model(
|
|
||||||
identifier="model2",
|
|
||||||
model_type=ModelType.embedding,
|
|
||||||
provider_id="test-provider",
|
|
||||||
metadata={"embedding_dimension": 512, "default_configured": True},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
vector_io_adapter.models_api = mock_models_api
|
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
|
|
||||||
await vector_io_adapter._get_default_embedding_model_and_dimension()
|
|
||||||
|
|
||||||
|
|
||||||
async def test_openai_create_vector_store_uses_default_model(vector_io_adapter):
|
|
||||||
"""Test that vector store creation uses default embedding model when none specified."""
|
|
||||||
# Mock models API and dependencies
|
|
||||||
mock_models_api = Mock()
|
|
||||||
mock_models_api.list_models = AsyncMock(
|
|
||||||
return_value=Mock(
|
|
||||||
data=[
|
|
||||||
Model(
|
|
||||||
identifier="default-model",
|
|
||||||
model_type=ModelType.embedding,
|
|
||||||
provider_id="test-provider",
|
|
||||||
metadata={"embedding_dimension": 512, "default_configured": True},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
vector_io_adapter.models_api = mock_models_api
|
|
||||||
vector_io_adapter.register_vector_db = AsyncMock()
|
|
||||||
vector_io_adapter.__provider_id__ = "test-provider"
|
|
||||||
|
|
||||||
# Create vector store without specifying embedding model
|
|
||||||
params = OpenAICreateVectorStoreRequestWithExtraBody(name="test-store")
|
|
||||||
result = await vector_io_adapter.openai_create_vector_store(params)
|
|
||||||
|
|
||||||
# Verify the vector store was created with default model
|
|
||||||
assert result.name == "test-store"
|
|
||||||
vector_io_adapter.register_vector_db.assert_called_once()
|
|
||||||
call_args = vector_io_adapter.register_vector_db.call_args[0][0]
|
|
||||||
assert call_args.embedding_model == "default-model"
|
|
||||||
assert call_args.embedding_dimension == 512
|
|
||||||
|
|
||||||
|
|
||||||
async def test_embedding_config_from_metadata(vector_io_adapter):
|
async def test_embedding_config_from_metadata(vector_io_adapter):
|
||||||
"""Test that embedding configuration is correctly extracted from metadata."""
|
"""Test that embedding configuration is correctly extracted from metadata."""
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue