add async to get_model signature in Protocol

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
This commit is contained in:
Ihar Hrachyshka 2025-03-25 18:25:58 -04:00
parent 6aedfc2201
commit fcf3b0a835
3 changed files with 7 additions and 7 deletions

View file

@ -249,7 +249,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
assert self.model_store is not None
if sampling_params is None:
sampling_params = SamplingParams()
model = self.model_store.get_model(model_id)
model = await self.model_store.get_model(model_id)
request = CompletionRequest(
model=model.provider_resource_id,
content=content,
@ -279,7 +279,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
assert self.model_store is not None
if sampling_params is None:
sampling_params = SamplingParams()
model = self.model_store.get_model(model_id)
model = await self.model_store.get_model(model_id)
# This is to be consistent with OpenAI API and support vLLM <= v0.6.3
# References:
# * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
@ -397,7 +397,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
task_type: Optional[EmbeddingTaskType] = None,
) -> EmbeddingsResponse:
assert self.client is not None
model = self.model_store.get_model(model_id)
model = await self.model_store.get_model(model_id)
kwargs = {}
assert model.model_type == ModelType.embedding