resource oriented object design for models

This commit is contained in:
Dinesh Yeduguru 2024-11-07 16:43:55 -08:00
parent 0297111dfd
commit 9076221924
17 changed files with 64 additions and 86 deletions

View file

@ -31,7 +31,7 @@ RoutingKey = Union[str, List[str]]
RoutableObject = Union[
ModelDef,
Model,
Shield,
MemoryBankDef,
DatasetDef,
@ -41,7 +41,7 @@ RoutableObject = Union[
RoutableObjectWithProvider = Annotated[
Union[
ModelDefWithProvider,
Model,
Shield,
MemoryBankDefWithProvider,
DatasetDefWithProvider,

View file

@ -70,7 +70,7 @@ class InferenceRouter(Inference):
async def shutdown(self) -> None:
pass
async def register_model(self, model: ModelDef) -> None:
async def register_model(self, model: Model) -> None:
await self.routing_table.register_model(model)
async def chat_completion(

View file

@ -78,12 +78,7 @@ class CommonRoutingTableImpl(RoutingTable):
# Register all objects from providers
for pid, p in self.impls_by_provider_id.items():
api = get_impl_api(p)
if api == Api.inference:
p.model_store = self
models = await p.list_models()
await add_objects(models, pid, ModelDefWithProvider)
elif api == Api.memory:
if api == Api.memory:
p.memory_bank_store = self
memory_banks = await p.list_memory_banks()
await add_objects(memory_banks, pid, None)
@ -185,13 +180,13 @@ class CommonRoutingTableImpl(RoutingTable):
class ModelsRoutingTable(CommonRoutingTableImpl, Models):
async def list_models(self) -> List[ModelDefWithProvider]:
async def list_models(self) -> List[Model]:
return await self.get_all_with_type("model")
async def get_model(self, identifier: str) -> Optional[ModelDefWithProvider]:
async def get_model(self, identifier: str) -> Optional[Model]:
return await self.get_object_by_identifier(identifier)
async def register_model(self, model: ModelDefWithProvider) -> None:
async def register_model(self, model: Model) -> None:
await self.register_object(model)

View file

@ -9,7 +9,7 @@ import os
import pytest
import pytest_asyncio
from llama_stack.distribution.store import * # noqa F403
from llama_stack.apis.inference import ModelDefWithProvider
from llama_stack.apis.inference import Model
from llama_stack.apis.memory_banks import VectorMemoryBankDef
from llama_stack.providers.utils.kvstore import kvstore_impl, SqliteKVStoreConfig
from llama_stack.distribution.datatypes import * # noqa F403
@ -50,9 +50,8 @@ def sample_bank():
@pytest.fixture
def sample_model():
return ModelDefWithProvider(
return Model(
identifier="test_model",
llama_model="Llama3.2-3B-Instruct",
provider_id="test-provider",
)
@ -84,7 +83,6 @@ async def test_basic_registration(registry, sample_bank, sample_model):
assert len(results) == 1
result_model = results[0]
assert result_model.identifier == sample_model.identifier
assert result_model.llama_model == sample_model.llama_model
assert result_model.provider_id == sample_model.provider_id