mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 20:14:13 +00:00
Don't load as bf16 on CPU unless fp8 is active
This commit is contained in:
parent
8cd2e4164c
commit
fef679bb34
2 changed files with 13 additions and 4 deletions
|
@ -1,3 +1,4 @@
|
|||
from copy import deepcopy
|
||||
from dataclasses import dataclass
|
||||
from functools import partial
|
||||
from typing import Generator, List, Optional
|
||||
|
@ -86,7 +87,7 @@ class LlamaModelParallelGenerator:
|
|||
logprobs: bool = False,
|
||||
) -> Generator:
|
||||
req_obj = InferenceArgs(
|
||||
messages=messages,
|
||||
messages=deepcopy(messages),
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
max_gen_len=max_gen_len,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue