temperature 0

2025-12-31 09:30:03 +00:00 · 2025-01-16 16:58:18 -08:00 · 2025-01-16 16:58:18 -08:00 · c7a5795ab2
commit c7a5795ab2
parent 5c6e1e9d1e
1 changed files with 2 additions and 2 deletions
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@ -128,8 +128,9 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
        fmt: ResponseFormat = None,
    ):
        options = get_sampling_options(sampling_params)
+        # TGI does not support temperature=0, so we set it to 1e-3 instead
        if options["temperature"] == 0:
-            options["temperature"] = 0.1
+            options["temperature"] = 1e-3

        # delete key "max_tokens" from options since its not supported by the API
        options.pop("max_tokens", None)
@ -233,7 +234,6 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
        self, request: ChatCompletionRequest
    ) -> ChatCompletionResponse:
        params = await self._get_params(request)
-        print("TGI params", params)
        r = await self.client.text_generation(**params)

        choice = OpenAICompatCompletionChoice(