Added a comment explaining the client handling in register_model.

This commit is contained in:
ilya-kolchinsky 2025-04-16 15:29:23 +02:00
parent f1fd382d51
commit 6a0ee7180b

View file

@ -368,6 +368,9 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
yield chunk
async def register_model(self, model: Model) -> Model:
# register_model is called during Llama Stack initialization, hence we cannot init self.client if not initialized yet.
# self.client should only be created after the initialization is complete to avoid asyncio cross-context errors.
# Changing this may lead to unpredictable behavior.
client = self._create_client() if self.client is None else self.client
model = await self.register_helper.register_model(model)
res = await client.models.list()