mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-05 13:52:29 +00:00
temperature 0
This commit is contained in:
parent
5c6e1e9d1e
commit
c7a5795ab2
1 changed files with 2 additions and 2 deletions
|
|
@ -128,8 +128,9 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
|
||||||
fmt: ResponseFormat = None,
|
fmt: ResponseFormat = None,
|
||||||
):
|
):
|
||||||
options = get_sampling_options(sampling_params)
|
options = get_sampling_options(sampling_params)
|
||||||
|
# TGI does not support temperature=0, so we set it to 1e-3 instead
|
||||||
if options["temperature"] == 0:
|
if options["temperature"] == 0:
|
||||||
options["temperature"] = 0.1
|
options["temperature"] = 1e-3
|
||||||
|
|
||||||
# delete key "max_tokens" from options since its not supported by the API
|
# delete key "max_tokens" from options since its not supported by the API
|
||||||
options.pop("max_tokens", None)
|
options.pop("max_tokens", None)
|
||||||
|
|
@ -233,7 +234,6 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
|
||||||
self, request: ChatCompletionRequest
|
self, request: ChatCompletionRequest
|
||||||
) -> ChatCompletionResponse:
|
) -> ChatCompletionResponse:
|
||||||
params = await self._get_params(request)
|
params = await self._get_params(request)
|
||||||
print("TGI params", params)
|
|
||||||
r = await self.client.text_generation(**params)
|
r = await self.client.text_generation(**params)
|
||||||
|
|
||||||
choice = OpenAICompatCompletionChoice(
|
choice = OpenAICompatCompletionChoice(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue