diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index ac9d18312..8ee838d84 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -175,6 +175,7 @@ def process_chat_completion_response( choice = response.choices[0] # TODO: This does not work well with tool calls for vLLM remote provider + # Ref: https://github.com/meta-llama/llama-stack/issues/1058 raw_message = formatter.decode_assistant_message_from_content( text_from_choice(choice), get_stop_reason(choice.finish_reason) )