mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-16 16:39:28 +00:00
refactor get_max_tokens and build_options
This commit is contained in:
parent
5965ef3979
commit
4a073fcee5
7 changed files with 33 additions and 38 deletions
|
|
@ -116,7 +116,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference):
|
|||
if prompt.startswith("<|begin_of_text|>"):
|
||||
prompt = prompt[len("<|begin_of_text|>") :]
|
||||
|
||||
options = get_sampling_options(request)
|
||||
options = get_sampling_options(request.sampling_params)
|
||||
options.setdefault("max_tokens", 512)
|
||||
|
||||
if fmt := request.response_format:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue