Fix server conditional awaiting on coroutines

This commit is contained in:
Ashwin Bharambe 2024-10-08 17:03:31 -07:00
parent 216e7eb4d5
commit 8eee5b9adc
2 changed files with 9 additions and 5 deletions

View file

@ -34,6 +34,7 @@ class MetaReferenceInferenceImpl(Inference):
# verify that the checkpoint actually is for this model lol
async def initialize(self) -> None:
print(f"Loading model `{self.model.descriptor()}`")
self.generator = LlamaModelParallelGenerator(self.config)
self.generator.start()