refactor get_max_tokens and build_options

This commit is contained in:
Dinesh Yeduguru 2024-10-23 19:11:04 -07:00
parent 5965ef3979
commit 4a073fcee5
7 changed files with 33 additions and 38 deletions

View file

@ -143,7 +143,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
"model": VLLM_SUPPORTED_MODELS[request.model],
"prompt": chat_completion_request_to_prompt(request, self.formatter),
"stream": request.stream,
**get_sampling_options(request),
**get_sampling_options(request.sampling_params),
}
async def embeddings(