mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
Merge branch 'main' into rename
This commit is contained in:
commit
dcd4370806
3 changed files with 48 additions and 1 deletions
|
|
@ -417,7 +417,7 @@ class InferenceRouter(Inference):
|
||||||
prompt_tokens=chunk.usage.prompt_tokens,
|
prompt_tokens=chunk.usage.prompt_tokens,
|
||||||
completion_tokens=chunk.usage.completion_tokens,
|
completion_tokens=chunk.usage.completion_tokens,
|
||||||
total_tokens=chunk.usage.total_tokens,
|
total_tokens=chunk.usage.total_tokens,
|
||||||
model_id=fully_qualified_model_id,
|
fully_qualified_model_id=fully_qualified_model_id,
|
||||||
provider_id=provider_id,
|
provider_id=provider_id,
|
||||||
)
|
)
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,9 @@ from llama_stack_api import (
|
||||||
HealthResponse,
|
HealthResponse,
|
||||||
HealthStatus,
|
HealthStatus,
|
||||||
InterleavedContent,
|
InterleavedContent,
|
||||||
|
ModelNotFoundError,
|
||||||
ModelType,
|
ModelType,
|
||||||
|
ModelTypeError,
|
||||||
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
||||||
OpenAICreateVectorStoreRequestWithExtraBody,
|
OpenAICreateVectorStoreRequestWithExtraBody,
|
||||||
QueryChunksResponse,
|
QueryChunksResponse,
|
||||||
|
|
@ -124,6 +126,14 @@ class VectorIORouter(VectorIO):
|
||||||
if embedding_model is not None and embedding_dimension is None:
|
if embedding_model is not None and embedding_dimension is None:
|
||||||
embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
|
embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
|
||||||
|
|
||||||
|
# Validate that embedding model exists and is of the correct type
|
||||||
|
if embedding_model is not None:
|
||||||
|
model = await self.routing_table.get_object_by_identifier("model", embedding_model)
|
||||||
|
if model is None:
|
||||||
|
raise ModelNotFoundError(embedding_model)
|
||||||
|
if model.model_type != ModelType.embedding:
|
||||||
|
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
|
||||||
|
|
||||||
# Auto-select provider if not specified
|
# Auto-select provider if not specified
|
||||||
if provider_id is None:
|
if provider_id is None:
|
||||||
num_providers = len(self.routing_table.impls_by_provider_id)
|
num_providers = len(self.routing_table.impls_by_provider_id)
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,12 @@
|
||||||
from unittest.mock import AsyncMock, Mock
|
from unittest.mock import AsyncMock, Mock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from llama_stack_api import (
|
||||||
|
ModelNotFoundError,
|
||||||
|
ModelType,
|
||||||
|
ModelTypeError,
|
||||||
|
OpenAICreateVectorStoreRequestWithExtraBody,
|
||||||
|
)
|
||||||
|
|
||||||
from llama_stack.core.routers.vector_io import VectorIORouter
|
from llama_stack.core.routers.vector_io import VectorIORouter
|
||||||
from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody
|
from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody
|
||||||
|
|
@ -21,6 +27,7 @@ async def test_single_provider_auto_selection():
|
||||||
Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384})
|
Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384})
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.embedding))
|
||||||
mock_routing_table.register_vector_store = AsyncMock(
|
mock_routing_table.register_vector_store = AsyncMock(
|
||||||
return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123")
|
return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123")
|
||||||
)
|
)
|
||||||
|
|
@ -48,6 +55,7 @@ async def test_create_vector_stores_multiple_providers_missing_provider_id_error
|
||||||
Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384})
|
Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384})
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.embedding))
|
||||||
router = VectorIORouter(mock_routing_table)
|
router = VectorIORouter(mock_routing_table)
|
||||||
request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate(
|
request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate(
|
||||||
{"name": "test_store", "embedding_model": "all-MiniLM-L6-v2"}
|
{"name": "test_store", "embedding_model": "all-MiniLM-L6-v2"}
|
||||||
|
|
@ -117,3 +125,32 @@ async def test_update_vector_store_same_provider_id_succeeds():
|
||||||
provider.openai_update_vector_store.assert_called_once_with(
|
provider.openai_update_vector_store.assert_called_once_with(
|
||||||
vector_store_id="vs_123", name="updated_name", expires_after=None, metadata={"provider_id": "inline::faiss"}
|
vector_store_id="vs_123", name="updated_name", expires_after=None, metadata={"provider_id": "inline::faiss"}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def test_create_vector_store_with_unknown_embedding_model_raises_error():
|
||||||
|
"""Test that creating a vector store with an unknown embedding model raises
|
||||||
|
FoundError."""
|
||||||
|
mock_routing_table = Mock(impls_by_provider_id={"provider": "mock"})
|
||||||
|
mock_routing_table.get_object_by_identifier = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
router = VectorIORouter(mock_routing_table)
|
||||||
|
request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate(
|
||||||
|
{"embedding_model": "unknown-model", "embedding_dimension": 384}
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(ModelNotFoundError, match="Model 'unknown-model' not found"):
|
||||||
|
await router.openai_create_vector_store(request)
|
||||||
|
|
||||||
|
|
||||||
|
async def test_create_vector_store_with_wrong_model_type_raises_error():
|
||||||
|
"""Test that creating a vector store with a non-embedding model raises ModelTypeError."""
|
||||||
|
mock_routing_table = Mock(impls_by_provider_id={"provider": "mock"})
|
||||||
|
mock_routing_table.get_object_by_identifier = AsyncMock(return_value=Mock(model_type=ModelType.llm))
|
||||||
|
|
||||||
|
router = VectorIORouter(mock_routing_table)
|
||||||
|
request = OpenAICreateVectorStoreRequestWithExtraBody.model_validate(
|
||||||
|
{"embedding_model": "text-model", "embedding_dimension": 384}
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(ModelTypeError, match="Model 'text-model' is of type"):
|
||||||
|
await router.openai_create_vector_store(request)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue