Remove request arg from chat completion response processing (#240)

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
Yuan Tang 2024-10-15 16:03:17 -04:00 committed by GitHub
parent 209cd3d35e
commit 80ada04f76
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 14 additions and 18 deletions

View file

@ -207,7 +207,7 @@ class VLLMInferenceImpl(ModelRegistryHelper, Inference):
response = OpenAICompatCompletionResponse(
choices=[choice],
)
return process_chat_completion_response(request, response, self.formatter)
return process_chat_completion_response(response, self.formatter)
async def _stream_chat_completion(
self, request: ChatCompletionRequest, results_generator: AsyncGenerator
@ -229,7 +229,7 @@ class VLLMInferenceImpl(ModelRegistryHelper, Inference):
stream = _generate_and_convert_to_openai_compat()
async for chunk in process_chat_completion_stream_response(
request, stream, self.formatter
stream, self.formatter
):
yield chunk