mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-16 06:53:47 +00:00
resource oriented object design for models
This commit is contained in:
parent
5625aef48a
commit
ca88f3f182
17 changed files with 63 additions and 82 deletions
|
@ -20,7 +20,7 @@ from vllm.sampling_params import SamplingParams as VLLMSamplingParams
|
|||
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
|
||||
from llama_stack.providers.datatypes import ModelDef, ModelsProtocolPrivate
|
||||
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
OpenAICompatCompletionChoice,
|
||||
OpenAICompatCompletionResponse,
|
||||
|
@ -83,14 +83,14 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
|
|||
if self.engine:
|
||||
self.engine.shutdown_background_loop()
|
||||
|
||||
async def register_model(self, model: ModelDef) -> None:
|
||||
async def register_model(self, model: Model) -> None:
|
||||
raise ValueError(
|
||||
"You cannot dynamically add a model to a running vllm instance"
|
||||
)
|
||||
|
||||
async def list_models(self) -> List[ModelDef]:
|
||||
async def list_models(self) -> List[Model]:
|
||||
return [
|
||||
ModelDef(
|
||||
Model(
|
||||
identifier=self.config.model,
|
||||
llama_model=self.config.model,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue