Small updates to quantization config

This commit is contained in:
Ashwin Bharambe 2024-10-24 12:08:43 -07:00
parent 8eceebec98
commit 161aef0aae
2 changed files with 3 additions and 3 deletions

View file

@ -16,9 +16,9 @@ providers:
- provider_id: meta0 - provider_id: meta0
provider_type: meta-reference-quantized provider_type: meta-reference-quantized
config: config:
model: Llama3.2-3B-Instruct model: Llama3.2-3B-Instruct:int4-qlora-eo8
quantization: quantization:
type: fp8 type: int4
torch_seed: null torch_seed: null
max_seq_len: 2048 max_seq_len: 2048
max_batch_size: 1 max_batch_size: 1

View file

@ -41,7 +41,7 @@ class Bf16QuantizationConfig(BaseModel):
@json_schema_type @json_schema_type
class Int4QuantizationConfig(BaseModel): class Int4QuantizationConfig(BaseModel):
type: Literal[QuantizationType.int4.value] = QuantizationType.int4.value type: Literal[QuantizationType.int4.value] = QuantizationType.int4.value
scheme: Optional[str] = None scheme: Optional[str] = "int4_weight_int8_dynamic_activation"
QuantizationConfig = Annotated[ QuantizationConfig = Annotated[