From 21f2e9adf59b7cf7535ba9312006af97a5b14e3b Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Wed, 23 Oct 2024 11:44:04 -0700 Subject: [PATCH] dont set num_predict for all providers (#294) --- llama_stack/providers/utils/inference/openai_compat.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index add29da99..22ae8a717 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -34,8 +34,6 @@ def get_sampling_options(request: ChatCompletionRequest) -> dict: if params := request.sampling_params: for attr in {"temperature", "top_p", "top_k", "max_tokens"}: if getattr(params, attr): - if attr == "max_tokens": - options["num_predict"] = getattr(params, attr) options[attr] = getattr(params, attr) if params.repetition_penalty is not None and params.repetition_penalty != 1.0: