mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-03 01:03:59 +00:00
Apply formatting to source files
This commit is contained in:
parent
6ec9eabbeb
commit
c8580d3b0c
2 changed files with 15 additions and 12 deletions
|
@ -102,9 +102,11 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
|
|||
registered_model = resolve_model(model.model_id)
|
||||
|
||||
if configured_model.core_model_id != registered_model.core_model_id:
|
||||
raise ValueError(f"Requested model '{model.identifier}' is different from "
|
||||
raise ValueError(
|
||||
f"Requested model '{model.identifier}' is different from "
|
||||
f"model '{self.config.model}' that this provider "
|
||||
f"is configured to serve")
|
||||
f"is configured to serve"
|
||||
)
|
||||
return model
|
||||
|
||||
def _sampling_params(self, sampling_params: SamplingParams) -> VLLMSamplingParams:
|
||||
|
@ -169,8 +171,9 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
|
|||
log.info("Sampling params: %s", sampling_params)
|
||||
request_id = _random_uuid()
|
||||
|
||||
prompt = await chat_completion_request_to_prompt(request, self.config.model,
|
||||
self.formatter)
|
||||
prompt = await chat_completion_request_to_prompt(
|
||||
request, self.config.model, self.formatter
|
||||
)
|
||||
vllm_sampling_params = self._sampling_params(request.sampling_params)
|
||||
results_generator = self.engine.generate(
|
||||
prompt, vllm_sampling_params, request_id
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue