mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 10:54:19 +00:00
Support structured output for Together (#289)
This commit is contained in:
parent
2e5e46d896
commit
ffb561070d
2 changed files with 16 additions and 2 deletions
|
@ -70,10 +70,10 @@ class TogetherInferenceAdapter(
|
|||
model: str,
|
||||
messages: List[Message],
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
tools: Optional[List[ToolDefinition]] = None,
|
||||
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> AsyncGenerator:
|
||||
|
@ -96,6 +96,7 @@ class TogetherInferenceAdapter(
|
|||
tools=tools or [],
|
||||
tool_choice=tool_choice,
|
||||
tool_prompt_format=tool_prompt_format,
|
||||
response_format=response_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
)
|
||||
|
@ -130,11 +131,23 @@ class TogetherInferenceAdapter(
|
|||
yield chunk
|
||||
|
||||
def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||
options = get_sampling_options(request)
|
||||
if fmt := request.response_format:
|
||||
if fmt.type == ResponseFormatType.json_schema.value:
|
||||
options["response_format"] = {
|
||||
"type": "json_object",
|
||||
"schema": fmt.schema,
|
||||
}
|
||||
elif fmt.type == ResponseFormatType.grammar.value:
|
||||
raise NotImplementedError("Grammar response format not supported yet")
|
||||
else:
|
||||
raise ValueError(f"Unknown response format {fmt.type}")
|
||||
|
||||
return {
|
||||
"model": self.map_to_provider_model(request.model),
|
||||
"prompt": chat_completion_request_to_prompt(request, self.formatter),
|
||||
"stream": request.stream,
|
||||
**get_sampling_options(request),
|
||||
**options,
|
||||
}
|
||||
|
||||
async def embeddings(
|
||||
|
|
|
@ -195,6 +195,7 @@ async def test_structured_output(inference_settings):
|
|||
"meta-reference",
|
||||
"remote::fireworks",
|
||||
"remote::tgi",
|
||||
"remote::together",
|
||||
):
|
||||
pytest.skip("Other inference providers don't support structured output yet")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue