forked from phoenix-oss/llama-stack-mirror
Small updates to quantization config
This commit is contained in:
parent
8eceebec98
commit
161aef0aae
2 changed files with 3 additions and 3 deletions
|
@ -16,9 +16,9 @@ providers:
|
|||
- provider_id: meta0
|
||||
provider_type: meta-reference-quantized
|
||||
config:
|
||||
model: Llama3.2-3B-Instruct
|
||||
model: Llama3.2-3B-Instruct:int4-qlora-eo8
|
||||
quantization:
|
||||
type: fp8
|
||||
type: int4
|
||||
torch_seed: null
|
||||
max_seq_len: 2048
|
||||
max_batch_size: 1
|
||||
|
|
|
@ -41,7 +41,7 @@ class Bf16QuantizationConfig(BaseModel):
|
|||
@json_schema_type
|
||||
class Int4QuantizationConfig(BaseModel):
|
||||
type: Literal[QuantizationType.int4.value] = QuantizationType.int4.value
|
||||
scheme: Optional[str] = None
|
||||
scheme: Optional[str] = "int4_weight_int8_dynamic_activation"
|
||||
|
||||
|
||||
QuantizationConfig = Annotated[
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue