add async to get_model signature in Protocol

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
2025-12-31 03:20:00 +00:00 · 2025-03-25 18:25:58 -04:00 · 2025-03-25 18:25:58 -04:00 · fcf3b0a835
commit fcf3b0a835
parent 6aedfc2201
3 changed files with 7 additions and 7 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -249,7 +249,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
        assert self.model_store is not None
        if sampling_params is None:
            sampling_params = SamplingParams()
-        model = self.model_store.get_model(model_id)
+        model = await self.model_store.get_model(model_id)
        request = CompletionRequest(
            model=model.provider_resource_id,
            content=content,
@ -279,7 +279,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
        assert self.model_store is not None
        if sampling_params is None:
            sampling_params = SamplingParams()
-        model = self.model_store.get_model(model_id)
+        model = await self.model_store.get_model(model_id)
        # This is to be consistent with OpenAI API and support vLLM <= v0.6.3
        # References:
        #   * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
@ -397,7 +397,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
        task_type: Optional[EmbeddingTaskType] = None,
    ) -> EmbeddingsResponse:
        assert self.client is not None
-        model = self.model_store.get_model(model_id)
+        model = await self.model_store.get_model(model_id)

        kwargs = {}
        assert model.model_type == ModelType.embedding