From 122793ab9224bd9a520cc3df6628e3f15c6c5c33 Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Mon, 4 Nov 2024 23:49:35 -0500 Subject: [PATCH] Correct a traceback in vllm (#366) File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/adapters/inference/vllm/vllm.py", line 136, in _stream_chat_completion async for chunk in process_chat_completion_stream_response( TypeError: process_chat_completion_stream_response() takes 2 positional arguments but 3 were given This corrects the error by deleting the request variable --- llama_stack/providers/adapters/inference/vllm/vllm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/adapters/inference/vllm/vllm.py b/llama_stack/providers/adapters/inference/vllm/vllm.py index 4cf55035c..aad2fdc1f 100644 --- a/llama_stack/providers/adapters/inference/vllm/vllm.py +++ b/llama_stack/providers/adapters/inference/vllm/vllm.py @@ -134,7 +134,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): stream = _to_async_generator() async for chunk in process_chat_completion_stream_response( - request, stream, self.formatter + stream, self.formatter ): yield chunk