From 1801aa145dc21019ca61bec65704a092b335be5c Mon Sep 17 00:00:00 2001 From: Aidan Do Date: Tue, 26 Nov 2024 09:40:17 +0000 Subject: [PATCH] [#391] Add support for json structured output for vLLM --- llama_stack/providers/remote/inference/vllm/vllm.py | 11 +++++++++++ .../providers/tests/inference/test_text_inference.py | 2 ++ 2 files changed, 13 insertions(+) diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 0f4034478..57f3db802 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -100,6 +100,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): tool_prompt_format=tool_prompt_format, stream=stream, logprobs=logprobs, + response_format=response_format, ) if stream: return self._stream_chat_completion(request, self.client) @@ -180,6 +181,16 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): self.formatter, ) + if fmt := request.response_format: + if fmt.type == ResponseFormatType.json_schema.value: + input_dict["extra_body"] = { + "guided_json": request.response_format.json_schema + } + elif fmt.type == ResponseFormatType.grammar.value: + raise NotImplementedError("Grammar response format not supported yet") + else: + raise ValueError(f"Unknown response format {fmt.type}") + return { "model": request.model, **input_dict, diff --git a/llama_stack/providers/tests/inference/test_text_inference.py b/llama_stack/providers/tests/inference/test_text_inference.py index f0f1d0eb2..abfbc7a85 100644 --- a/llama_stack/providers/tests/inference/test_text_inference.py +++ b/llama_stack/providers/tests/inference/test_text_inference.py @@ -139,6 +139,7 @@ class TestInference: "remote::tgi", "remote::together", "remote::fireworks", + "remote::vllm", ): pytest.skip( "Other inference providers don't support structured output in completions yet" @@ -198,6 +199,7 @@ class TestInference: "remote::fireworks", "remote::tgi", "remote::together", + "remote::vllm", "remote::nvidia", ): pytest.skip("Other inference providers don't support structured output yet")