From dbbfaf68cd3a0414a5d67a7bf0bc437c5c7b2b3c Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Wed, 23 Oct 2024 10:54:25 -0700 Subject: [PATCH] dont set num_predict for all providers --- llama_stack/providers/utils/inference/openai_compat.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index add29da99..22ae8a717 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -34,8 +34,6 @@ def get_sampling_options(request: ChatCompletionRequest) -> dict: if params := request.sampling_params: for attr in {"temperature", "top_p", "top_k", "max_tokens"}: if getattr(params, attr): - if attr == "max_tokens": - options["num_predict"] = getattr(params, attr) options[attr] = getattr(params, attr) if params.repetition_penalty is not None and params.repetition_penalty != 1.0: