mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-03 09:21:45 +00:00
[#391] Add support for json structured output for vLLM
This commit is contained in:
parent
4e6c984c26
commit
1801aa145d
2 changed files with 13 additions and 0 deletions
|
@ -100,6 +100,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
tool_prompt_format=tool_prompt_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
response_format=response_format,
|
||||
)
|
||||
if stream:
|
||||
return self._stream_chat_completion(request, self.client)
|
||||
|
@ -180,6 +181,16 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
self.formatter,
|
||||
)
|
||||
|
||||
if fmt := request.response_format:
|
||||
if fmt.type == ResponseFormatType.json_schema.value:
|
||||
input_dict["extra_body"] = {
|
||||
"guided_json": request.response_format.json_schema
|
||||
}
|
||||
elif fmt.type == ResponseFormatType.grammar.value:
|
||||
raise NotImplementedError("Grammar response format not supported yet")
|
||||
else:
|
||||
raise ValueError(f"Unknown response format {fmt.type}")
|
||||
|
||||
return {
|
||||
"model": request.model,
|
||||
**input_dict,
|
||||
|
|
|
@ -139,6 +139,7 @@ class TestInference:
|
|||
"remote::tgi",
|
||||
"remote::together",
|
||||
"remote::fireworks",
|
||||
"remote::vllm",
|
||||
):
|
||||
pytest.skip(
|
||||
"Other inference providers don't support structured output in completions yet"
|
||||
|
@ -198,6 +199,7 @@ class TestInference:
|
|||
"remote::fireworks",
|
||||
"remote::tgi",
|
||||
"remote::together",
|
||||
"remote::vllm",
|
||||
"remote::nvidia",
|
||||
):
|
||||
pytest.skip("Other inference providers don't support structured output yet")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue