mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-16 06:53:47 +00:00
address feedback
This commit is contained in:
parent
772e23e29e
commit
d6a9a17828
4 changed files with 2 additions and 19 deletions
|
@ -88,14 +88,6 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
|
|||
"You cannot dynamically add a model to a running vllm instance"
|
||||
)
|
||||
|
||||
async def list_models(self) -> List[Model]:
|
||||
return [
|
||||
Model(
|
||||
identifier=self.config.model,
|
||||
llama_model=self.config.model,
|
||||
)
|
||||
]
|
||||
|
||||
def _sampling_params(self, sampling_params: SamplingParams) -> VLLMSamplingParams:
|
||||
if sampling_params is None:
|
||||
return VLLMSamplingParams(max_tokens=self.config.max_tokens)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue