mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 20:12:33 +00:00
fix test
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> updating structure of default Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> fix model id creation Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
b3addc94d1
commit
7ffd20d112
10 changed files with 119 additions and 62 deletions
|
|
@ -87,20 +87,19 @@ Llama Stack provides OpenAI-compatible RAG capabilities through:
|
||||||
To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so:
|
To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
models:
|
|
||||||
- model_id: nomic-ai/nomic-embed-text-v1.5
|
|
||||||
provider_id: inline::sentence-transformers
|
|
||||||
metadata:
|
|
||||||
embedding_dimension: 768
|
|
||||||
|
|
||||||
vector_stores:
|
vector_stores:
|
||||||
default_embedding_model_id: nomic-ai/nomic-embed-text-v1.5
|
default_provider_id: faiss
|
||||||
|
default_embedding_model:
|
||||||
|
provider_id: sentence-transformers
|
||||||
|
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||||
```
|
```
|
||||||
|
|
||||||
With this configuration:
|
With this configuration:
|
||||||
- `client.vector_stores.create()` works without requiring embedding model parameters
|
- `client.vector_stores.create()` works without requiring embedding model or provider parameters
|
||||||
- The system automatically uses the default model and its embedding dimension for any newly created vector store
|
- The system automatically uses the default vector store provider (`faiss`) when multiple providers are available
|
||||||
- The `vector_stores` section explicitly configures which embedding model to use as default
|
- The system automatically uses the default embedding model (`sentence-transformers/nomic-ai/nomic-embed-text-v1.5`) for any newly created vector store
|
||||||
|
- The `default_provider_id` specifies which vector storage backend to use
|
||||||
|
- The `default_embedding_model` specifies both the inference provider and model for embeddings
|
||||||
|
|
||||||
## Vector Store Operations
|
## Vector Store Operations
|
||||||
|
|
||||||
|
|
@ -109,14 +108,15 @@ With this configuration:
|
||||||
You can create vector stores with automatic or explicit embedding model selection:
|
You can create vector stores with automatic or explicit embedding model selection:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# Automatic - uses default configured embedding model
|
# Automatic - uses default configured embedding model and vector store provider
|
||||||
vs = client.vector_stores.create()
|
vs = client.vector_stores.create()
|
||||||
|
|
||||||
# Explicit - specify embedding model when you need a specific one
|
# Explicit - specify embedding model and/or provider when you need specific ones
|
||||||
vs = client.vector_stores.create(
|
vs = client.vector_stores.create(
|
||||||
extra_body={
|
extra_body={
|
||||||
"embedding_model": "nomic-ai/nomic-embed-text-v1.5",
|
"provider_id": "faiss", # Optional: specify vector store provider
|
||||||
"embedding_dimension": 768
|
"embedding_model": "sentence-transformers/nomic-ai/nomic-embed-text-v1.5",
|
||||||
|
"embedding_dimension": 768 # Optional: will be auto-detected if not provided
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -351,17 +351,30 @@ class AuthenticationRequiredError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DefaultEmbeddingModel(BaseModel):
|
||||||
|
"""Configuration for default embedding model."""
|
||||||
|
|
||||||
|
provider_id: str = Field(
|
||||||
|
...,
|
||||||
|
description="ID of the inference provider that serves the embedding model (e.g., 'sentence-transformers').",
|
||||||
|
)
|
||||||
|
model_id: str = Field(
|
||||||
|
...,
|
||||||
|
description="ID of the embedding model (e.g., 'nomic-ai/nomic-embed-text-v1.5').",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class VectorStoresConfig(BaseModel):
|
class VectorStoresConfig(BaseModel):
|
||||||
"""Configuration for vector stores in the stack."""
|
"""Configuration for vector stores in the stack."""
|
||||||
|
|
||||||
embedding_model_id: str = Field(
|
default_provider_id: str | None = Field(
|
||||||
...,
|
|
||||||
description="ID of the embedding model to use as default for vector stores when none is specified. Must reference a model defined in the 'models' section.",
|
|
||||||
)
|
|
||||||
provider_id: str | None = Field(
|
|
||||||
default=None,
|
default=None,
|
||||||
description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.",
|
description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.",
|
||||||
)
|
)
|
||||||
|
default_embedding_model: DefaultEmbeddingModel | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Default embedding model configuration for vector stores.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class QuotaPeriod(StrEnum):
|
class QuotaPeriod(StrEnum):
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,11 @@ class VectorIORouter(VectorIO):
|
||||||
|
|
||||||
# Use default embedding model if not specified
|
# Use default embedding model if not specified
|
||||||
if embedding_model is None and self.vector_stores_config is not None:
|
if embedding_model is None and self.vector_stores_config is not None:
|
||||||
embedding_model = self.vector_stores_config.embedding_model_id
|
if self.vector_stores_config.default_embedding_model is not None:
|
||||||
|
# Construct the full model ID with provider prefix
|
||||||
|
embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id
|
||||||
|
model_id = self.vector_stores_config.default_embedding_model.model_id
|
||||||
|
embedding_model = f"{embedding_provider_id}/{model_id}"
|
||||||
|
|
||||||
if embedding_model is not None and embedding_dimension is None:
|
if embedding_model is not None and embedding_dimension is None:
|
||||||
embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
|
embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
|
||||||
|
|
@ -139,8 +143,8 @@ class VectorIORouter(VectorIO):
|
||||||
if num_providers > 1:
|
if num_providers > 1:
|
||||||
available_providers = list(self.routing_table.impls_by_provider_id.keys())
|
available_providers = list(self.routing_table.impls_by_provider_id.keys())
|
||||||
# Use default configured provider
|
# Use default configured provider
|
||||||
if self.vector_stores_config and self.vector_stores_config.provider_id:
|
if self.vector_stores_config and self.vector_stores_config.default_provider_id:
|
||||||
default_provider = self.vector_stores_config.provider_id
|
default_provider = self.vector_stores_config.default_provider_id
|
||||||
if default_provider in available_providers:
|
if default_provider in available_providers:
|
||||||
provider_id = default_provider
|
provider_id = default_provider
|
||||||
logger.debug(f"Using configured default vector store provider: {provider_id}")
|
logger.debug(f"Using configured default vector store provider: {provider_id}")
|
||||||
|
|
|
||||||
|
|
@ -135,41 +135,52 @@ async def validate_vector_stores_config(run_config: StackRunConfig, impls: dict[
|
||||||
return
|
return
|
||||||
|
|
||||||
vector_stores_config = run_config.vector_stores
|
vector_stores_config = run_config.vector_stores
|
||||||
default_model_id = vector_stores_config.embedding_model_id
|
|
||||||
|
|
||||||
if Api.models not in impls:
|
# Validate default embedding model if configured
|
||||||
raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
|
if vector_stores_config.default_embedding_model:
|
||||||
|
default_embedding_model = vector_stores_config.default_embedding_model
|
||||||
|
provider_id = default_embedding_model.provider_id
|
||||||
|
model_id = default_embedding_model.model_id
|
||||||
|
# Construct the full model identifier
|
||||||
|
default_model_id = f"{provider_id}/{model_id}"
|
||||||
|
|
||||||
models_impl = impls[Api.models]
|
if Api.models not in impls:
|
||||||
response = await models_impl.list_models()
|
raise ValueError(
|
||||||
models_list = response.data if hasattr(response, "data") else response
|
f"Models API is not available but vector_stores config requires model '{default_model_id}'"
|
||||||
|
)
|
||||||
|
|
||||||
# find default embedding model
|
models_impl = impls[Api.models]
|
||||||
default_model = None
|
response = await models_impl.list_models()
|
||||||
for model in models_list:
|
models_list = response.data if hasattr(response, "data") else response
|
||||||
if model.identifier == default_model_id:
|
|
||||||
default_model = model
|
|
||||||
break
|
|
||||||
|
|
||||||
if not default_model:
|
# find default embedding model
|
||||||
available_models = [m.identifier for m in models_list if m.model_type == "embedding"]
|
default_model = None
|
||||||
raise ValueError(
|
for model in models_list:
|
||||||
f"Embedding model '{default_model_id}' not found. Available embedding models: {available_models}"
|
if model.identifier == default_model_id:
|
||||||
)
|
default_model = model
|
||||||
|
break
|
||||||
|
|
||||||
if default_model.model_type != "embedding":
|
if not default_model:
|
||||||
raise ValueError(f"Model '{default_model_id}' is type '{default_model.model_type}', not 'embedding'")
|
available_models = [m.identifier for m in models_list if m.model_type == "embedding"]
|
||||||
|
raise ValueError(
|
||||||
|
f"Embedding model '{default_model_id}' not found. Available embedding models: {available_models}"
|
||||||
|
)
|
||||||
|
|
||||||
embedding_dimension = default_model.metadata.get("embedding_dimension")
|
if default_model.model_type != "embedding":
|
||||||
if embedding_dimension is None:
|
raise ValueError(f"Model '{default_model_id}' is type '{default_model.model_type}', not 'embedding'")
|
||||||
raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
|
|
||||||
|
|
||||||
try:
|
embedding_dimension = default_model.metadata.get("embedding_dimension")
|
||||||
int(embedding_dimension)
|
if embedding_dimension is None:
|
||||||
except ValueError as err:
|
raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
|
||||||
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
|
|
||||||
|
|
||||||
logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
|
try:
|
||||||
|
int(embedding_dimension)
|
||||||
|
except ValueError as err:
|
||||||
|
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
|
||||||
|
|
||||||
|
logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
|
||||||
|
|
||||||
|
# If no default embedding model is configured, that's fine - validation passes
|
||||||
|
|
||||||
|
|
||||||
class EnvVarError(Exception):
|
class EnvVarError(Exception):
|
||||||
|
|
|
||||||
|
|
@ -255,4 +255,7 @@ server:
|
||||||
telemetry:
|
telemetry:
|
||||||
enabled: true
|
enabled: true
|
||||||
vector_stores:
|
vector_stores:
|
||||||
embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5
|
default_provider_id: faiss
|
||||||
|
default_embedding_model:
|
||||||
|
provider_id: sentence-transformers
|
||||||
|
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||||
|
|
|
||||||
|
|
@ -258,4 +258,7 @@ server:
|
||||||
telemetry:
|
telemetry:
|
||||||
enabled: true
|
enabled: true
|
||||||
vector_stores:
|
vector_stores:
|
||||||
embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5
|
default_provider_id: faiss
|
||||||
|
default_embedding_model:
|
||||||
|
provider_id: sentence-transformers
|
||||||
|
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||||
|
|
|
||||||
|
|
@ -255,4 +255,7 @@ server:
|
||||||
telemetry:
|
telemetry:
|
||||||
enabled: true
|
enabled: true
|
||||||
vector_stores:
|
vector_stores:
|
||||||
embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5
|
default_provider_id: faiss
|
||||||
|
default_embedding_model:
|
||||||
|
provider_id: sentence-transformers
|
||||||
|
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from typing import Any
|
||||||
|
|
||||||
from llama_stack.core.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
BuildProvider,
|
BuildProvider,
|
||||||
|
DefaultEmbeddingModel,
|
||||||
Provider,
|
Provider,
|
||||||
ProviderSpec,
|
ProviderSpec,
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
|
|
@ -249,7 +250,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
default_shields=default_shields,
|
default_shields=default_shields,
|
||||||
vector_stores_config=VectorStoresConfig(
|
vector_stores_config=VectorStoresConfig(
|
||||||
embedding_model_id="sentence-transformers/nomic-ai/nomic-embed-text-v1.5"
|
default_provider_id="faiss",
|
||||||
|
default_embedding_model=DefaultEmbeddingModel(
|
||||||
|
provider_id="sentence-transformers",
|
||||||
|
model_id="nomic-ai/nomic-embed-text-v1.5",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -8,9 +8,8 @@ import time
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from llama_stack_client import BadRequestError, NotFoundError
|
from llama_stack_client import BadRequestError
|
||||||
from openai import BadRequestError as OpenAIBadRequestError
|
from openai import BadRequestError as OpenAIBadRequestError
|
||||||
from openai import NotFoundError as OpenAINotFoundError
|
|
||||||
|
|
||||||
from llama_stack.apis.vector_io import Chunk
|
from llama_stack.apis.vector_io import Chunk
|
||||||
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
||||||
|
|
@ -839,7 +838,7 @@ def test_openai_vector_store_list_files_invalid_vector_store(
|
||||||
if isinstance(compat_client, LlamaStackAsLibraryClient):
|
if isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||||
errors = ValueError
|
errors = ValueError
|
||||||
else:
|
else:
|
||||||
errors = (NotFoundError, OpenAINotFoundError)
|
errors = (BadRequestError, OpenAIBadRequestError)
|
||||||
|
|
||||||
with pytest.raises(errors):
|
with pytest.raises(errors):
|
||||||
compat_client.vector_stores.files.list(vector_store_id="abc123")
|
compat_client.vector_stores.files.list(vector_store_id="abc123")
|
||||||
|
|
@ -1528,11 +1527,11 @@ def test_openai_vector_store_file_batch_error_handling(
|
||||||
batch_id="non_existent_batch_id",
|
batch_id="non_existent_batch_id",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Test operations on non-existent vector store (returns NotFoundError)
|
# Test operations on non-existent vector store (returns BadRequestError)
|
||||||
if isinstance(compat_client, LlamaStackAsLibraryClient):
|
if isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||||
vector_store_errors = ValueError
|
vector_store_errors = ValueError
|
||||||
else:
|
else:
|
||||||
vector_store_errors = (NotFoundError, OpenAINotFoundError)
|
vector_store_errors = (BadRequestError, OpenAIBadRequestError)
|
||||||
|
|
||||||
with pytest.raises(vector_store_errors): # Should raise an error for non-existent vector store
|
with pytest.raises(vector_store_errors): # Should raise an error for non-existent vector store
|
||||||
compat_client.vector_stores.file_batches.create(
|
compat_client.vector_stores.file_batches.create(
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from unittest.mock import AsyncMock
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from llama_stack.apis.models import Model, ModelType
|
from llama_stack.apis.models import Model, ModelType
|
||||||
from llama_stack.core.datatypes import StackRunConfig, VectorStoresConfig
|
from llama_stack.core.datatypes import DefaultEmbeddingModel, StackRunConfig, VectorStoresConfig
|
||||||
from llama_stack.core.stack import validate_vector_stores_config
|
from llama_stack.core.stack import validate_vector_stores_config
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
|
|
@ -20,7 +20,15 @@ class TestVectorStoresValidation:
|
||||||
async def test_validate_missing_model(self):
|
async def test_validate_missing_model(self):
|
||||||
"""Test validation fails when model not found."""
|
"""Test validation fails when model not found."""
|
||||||
run_config = StackRunConfig(
|
run_config = StackRunConfig(
|
||||||
image_name="test", providers={}, vector_stores=VectorStoresConfig(embedding_model_id="missing")
|
image_name="test",
|
||||||
|
providers={},
|
||||||
|
vector_stores=VectorStoresConfig(
|
||||||
|
default_provider_id="faiss",
|
||||||
|
default_embedding_model=DefaultEmbeddingModel(
|
||||||
|
provider_id="p",
|
||||||
|
model_id="missing",
|
||||||
|
),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
mock_models = AsyncMock()
|
mock_models = AsyncMock()
|
||||||
mock_models.list_models.return_value = []
|
mock_models.list_models.return_value = []
|
||||||
|
|
@ -31,12 +39,20 @@ class TestVectorStoresValidation:
|
||||||
async def test_validate_success(self):
|
async def test_validate_success(self):
|
||||||
"""Test validation passes with valid model."""
|
"""Test validation passes with valid model."""
|
||||||
run_config = StackRunConfig(
|
run_config = StackRunConfig(
|
||||||
image_name="test", providers={}, vector_stores=VectorStoresConfig(embedding_model_id="valid")
|
image_name="test",
|
||||||
|
providers={},
|
||||||
|
vector_stores=VectorStoresConfig(
|
||||||
|
default_provider_id="faiss",
|
||||||
|
default_embedding_model=DefaultEmbeddingModel(
|
||||||
|
provider_id="p",
|
||||||
|
model_id="valid",
|
||||||
|
),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
mock_models = AsyncMock()
|
mock_models = AsyncMock()
|
||||||
mock_models.list_models.return_value = [
|
mock_models.list_models.return_value = [
|
||||||
Model(
|
Model(
|
||||||
identifier="valid",
|
identifier="p/valid", # Must match provider_id/model_id format
|
||||||
model_type=ModelType.embedding,
|
model_type=ModelType.embedding,
|
||||||
metadata={"embedding_dimension": 768},
|
metadata={"embedding_dimension": 768},
|
||||||
provider_id="p",
|
provider_id="p",
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue