dont set num_predict for all providers

This commit is contained in:
Dinesh Yeduguru 2024-10-23 10:54:25 -07:00
parent ffb561070d
commit dbbfaf68cd

View file

@ -34,8 +34,6 @@ def get_sampling_options(request: ChatCompletionRequest) -> dict:
if params := request.sampling_params:
for attr in {"temperature", "top_p", "top_k", "max_tokens"}:
if getattr(params, attr):
if attr == "max_tokens":
options["num_predict"] = getattr(params, attr)
options[attr] = getattr(params, attr)
if params.repetition_penalty is not None and params.repetition_penalty != 1.0: