[#391] Add support for json structured output for vLLM

This commit is contained in:
Aidan Do 2024-11-26 09:40:17 +00:00
parent 4e6c984c26
commit 1801aa145d
2 changed files with 13 additions and 0 deletions

View file

@ -100,6 +100,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
tool_prompt_format=tool_prompt_format,
stream=stream,
logprobs=logprobs,
response_format=response_format,
)
if stream:
return self._stream_chat_completion(request, self.client)
@ -180,6 +181,16 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
self.formatter,
)
if fmt := request.response_format:
if fmt.type == ResponseFormatType.json_schema.value:
input_dict["extra_body"] = {
"guided_json": request.response_format.json_schema
}
elif fmt.type == ResponseFormatType.grammar.value:
raise NotImplementedError("Grammar response format not supported yet")
else:
raise ValueError(f"Unknown response format {fmt.type}")
return {
"model": request.model,
**input_dict,

View file

@ -139,6 +139,7 @@ class TestInference:
"remote::tgi",
"remote::together",
"remote::fireworks",
"remote::vllm",
):
pytest.skip(
"Other inference providers don't support structured output in completions yet"
@ -198,6 +199,7 @@ class TestInference:
"remote::fireworks",
"remote::tgi",
"remote::together",
"remote::vllm",
"remote::nvidia",
):
pytest.skip("Other inference providers don't support structured output yet")