mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 05:32:36 +00:00
fix vllm registry
This commit is contained in:
parent
a019011326
commit
e272f8aa62
3 changed files with 10 additions and 4 deletions
|
|
@ -131,6 +131,15 @@ class VLLMInferenceAdapter(Inference, ModelRegistryHelper, ModelsProtocolPrivate
|
|||
):
|
||||
yield chunk
|
||||
|
||||
async def register_model(self, model: Model) -> None:
|
||||
model = await super().register_model(model)
|
||||
res = self.client.models.list()
|
||||
available_models = [m.id for m in res]
|
||||
if model.provider_resource_id not in available_models:
|
||||
raise ValueError(
|
||||
f"Model {model.provider_resource_id} is not being served by vLLM"
|
||||
)
|
||||
|
||||
async def _get_params(
|
||||
self, request: Union[ChatCompletionRequest, CompletionRequest]
|
||||
) -> dict:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue