more robust 0 check

This commit is contained in:
Xi Yan 2025-01-16 17:38:22 -08:00
parent 8fc1ded6d2
commit b194fed28d

View file

@ -128,8 +128,10 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
fmt: ResponseFormat = None,
):
options = get_sampling_options(sampling_params)
# TGI does not support temperature=0, so we set it to 1e-3 instead
if options["temperature"] == 0:
# TGI does not support temperature=0 when using greedy sampling
# We set it to 1e-3 instead, anything lower outputs garbage from TGI
# We can use top_p sampling strategy to specify lower temperature
if abs(options["temperature"]) < 1e-10:
options["temperature"] = 1e-3
# delete key "max_tokens" from options since its not supported by the API