more robust 0 check

2025-12-31 09:33:54 +00:00 · 2025-01-16 17:38:22 -08:00 · 2025-01-16 17:38:22 -08:00 · b194fed28d
commit b194fed28d
parent 8fc1ded6d2
1 changed files with 4 additions and 2 deletions
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@ -128,8 +128,10 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
        fmt: ResponseFormat = None,
    ):
        options = get_sampling_options(sampling_params)
-        # TGI does not support temperature=0, so we set it to 1e-3 instead
-        if options["temperature"] == 0:
+        # TGI does not support temperature=0 when using greedy sampling
+        # We set it to 1e-3 instead, anything lower outputs garbage from TGI
+        # We can use top_p sampling strategy to specify lower temperature
+        if abs(options["temperature"]) < 1e-10:
            options["temperature"] = 1e-3

        # delete key "max_tokens" from options since its not supported by the API