mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 08:52:39 +00:00
[#391] Add support for json structured output for vLLM
This commit is contained in:
parent
4e6c984c26
commit
1801aa145d
2 changed files with 13 additions and 0 deletions
|
|
@ -100,6 +100,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
tool_prompt_format=tool_prompt_format,
|
||||
stream=stream,
|
||||
logprobs=logprobs,
|
||||
response_format=response_format,
|
||||
)
|
||||
if stream:
|
||||
return self._stream_chat_completion(request, self.client)
|
||||
|
|
@ -180,6 +181,16 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
self.formatter,
|
||||
)
|
||||
|
||||
if fmt := request.response_format:
|
||||
if fmt.type == ResponseFormatType.json_schema.value:
|
||||
input_dict["extra_body"] = {
|
||||
"guided_json": request.response_format.json_schema
|
||||
}
|
||||
elif fmt.type == ResponseFormatType.grammar.value:
|
||||
raise NotImplementedError("Grammar response format not supported yet")
|
||||
else:
|
||||
raise ValueError(f"Unknown response format {fmt.type}")
|
||||
|
||||
return {
|
||||
"model": request.model,
|
||||
**input_dict,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue