mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-16 19:49:27 +00:00
refactor get_max_tokens and build_options
This commit is contained in:
parent
5965ef3979
commit
4a073fcee5
7 changed files with 33 additions and 38 deletions
|
|
@ -131,7 +131,7 @@ class TogetherInferenceAdapter(
|
|||
yield chunk
|
||||
|
||||
def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||
options = get_sampling_options(request)
|
||||
options = get_sampling_options(request.sampling_params)
|
||||
if fmt := request.response_format:
|
||||
if fmt.type == ResponseFormatType.json_schema.value:
|
||||
options["response_format"] = {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue