Don't load as bf16 on CPU unless fp8 is active

This commit is contained in:
Ashwin Bharambe 2024-07-22 19:09:32 -07:00
parent 8cd2e4164c
commit fef679bb34
2 changed files with 13 additions and 4 deletions

View file

@ -1,3 +1,4 @@
from copy import deepcopy
from dataclasses import dataclass
from functools import partial
from typing import Generator, List, Optional
@ -86,7 +87,7 @@ class LlamaModelParallelGenerator:
logprobs: bool = False,
) -> Generator:
req_obj = InferenceArgs(
messages=messages,
messages=deepcopy(messages),
temperature=temperature,
top_p=top_p,
max_gen_len=max_gen_len,