refactor get_max_tokens and build_options

2025-12-16 16:39:28 +00:00 · 2024-10-23 19:11:04 -07:00 · 2024-10-23 19:11:04 -07:00 · 4a073fcee5
commit 4a073fcee5
parent 5965ef3979
7 changed files with 33 additions and 38 deletions
--- a/llama_stack/providers/adapters/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/adapters/inference/fireworks/fireworks.py
@ -116,7 +116,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference):
        if prompt.startswith("<|begin_of_text|>"):
            prompt = prompt[len("<|begin_of_text|>") :]

-        options = get_sampling_options(request)
+        options = get_sampling_options(request.sampling_params)
        options.setdefault("max_tokens", 512)

        if fmt := request.response_format: