mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-02 03:50:01 +00:00
Fix register_model protocol to return Model
Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
This commit is contained in:
parent
58a040a805
commit
6df600f014
5 changed files with 8 additions and 5 deletions
|
|
@ -346,7 +346,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
async for chunk in process_completion_stream_response(stream):
|
||||
yield chunk
|
||||
|
||||
async def register_model(self, model: Model) -> None:
|
||||
async def register_model(self, model: Model) -> Model:
|
||||
assert self.client is not None
|
||||
model = await self.register_helper.register_model(model)
|
||||
res = await self.client.models.list()
|
||||
|
|
@ -356,6 +356,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
f"Model {model.provider_resource_id} is not being served by vLLM. "
|
||||
f"Available models: {', '.join(available_models)}"
|
||||
)
|
||||
return model
|
||||
|
||||
async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequest]) -> dict:
|
||||
options = get_sampling_options(request.sampling_params)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue