mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-02 13:44:30 +00:00
change to dev, fix issues with test
This commit is contained in:
parent
e31a52b26e
commit
941d5f1b18
2 changed files with 8 additions and 28 deletions
|
|
@ -256,32 +256,6 @@ class CentMLInferenceAdapter(
|
|||
logcat.debug("inference", f"params to centml: {params}")
|
||||
return params
|
||||
|
||||
def _build_options(
|
||||
self,
|
||||
sampling_params: Optional[SamplingParams],
|
||||
fmt: Optional[ResponseFormat],
|
||||
) -> dict:
|
||||
"""
|
||||
Build temperature, max_tokens, top_p, etc., plus any response format data.
|
||||
"""
|
||||
options = get_sampling_options(sampling_params)
|
||||
options.setdefault("max_tokens", 512)
|
||||
|
||||
if fmt:
|
||||
if fmt.type == ResponseFormatType.json_schema.value:
|
||||
options["response_format"] = {
|
||||
"type": "json_object",
|
||||
"schema": fmt.json_schema,
|
||||
}
|
||||
elif fmt.type == ResponseFormatType.grammar.value:
|
||||
raise NotImplementedError(
|
||||
"Grammar response format not supported yet"
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown response format {fmt.type}")
|
||||
|
||||
return options
|
||||
|
||||
def _build_options(
|
||||
self,
|
||||
sampling_params: Optional[SamplingParams],
|
||||
|
|
@ -302,7 +276,7 @@ class CentMLInferenceAdapter(
|
|||
raise ValueError(f"Unknown response format {fmt.type}")
|
||||
|
||||
if logprobs and logprobs.top_k:
|
||||
options["logprobs"] = 1
|
||||
options["logprobs"] = logprobs.top_k
|
||||
|
||||
return options
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue