mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 09:33:54 +00:00
more robust 0 check
This commit is contained in:
parent
8fc1ded6d2
commit
b194fed28d
1 changed files with 4 additions and 2 deletions
|
|
@ -128,8 +128,10 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
|
|||
fmt: ResponseFormat = None,
|
||||
):
|
||||
options = get_sampling_options(sampling_params)
|
||||
# TGI does not support temperature=0, so we set it to 1e-3 instead
|
||||
if options["temperature"] == 0:
|
||||
# TGI does not support temperature=0 when using greedy sampling
|
||||
# We set it to 1e-3 instead, anything lower outputs garbage from TGI
|
||||
# We can use top_p sampling strategy to specify lower temperature
|
||||
if abs(options["temperature"]) < 1e-10:
|
||||
options["temperature"] = 1e-3
|
||||
|
||||
# delete key "max_tokens" from options since its not supported by the API
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue