mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 10:54:19 +00:00
Small updates to quantization config
This commit is contained in:
parent
8eceebec98
commit
161aef0aae
2 changed files with 3 additions and 3 deletions
|
@ -16,9 +16,9 @@ providers:
|
||||||
- provider_id: meta0
|
- provider_id: meta0
|
||||||
provider_type: meta-reference-quantized
|
provider_type: meta-reference-quantized
|
||||||
config:
|
config:
|
||||||
model: Llama3.2-3B-Instruct
|
model: Llama3.2-3B-Instruct:int4-qlora-eo8
|
||||||
quantization:
|
quantization:
|
||||||
type: fp8
|
type: int4
|
||||||
torch_seed: null
|
torch_seed: null
|
||||||
max_seq_len: 2048
|
max_seq_len: 2048
|
||||||
max_batch_size: 1
|
max_batch_size: 1
|
||||||
|
|
|
@ -41,7 +41,7 @@ class Bf16QuantizationConfig(BaseModel):
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class Int4QuantizationConfig(BaseModel):
|
class Int4QuantizationConfig(BaseModel):
|
||||||
type: Literal[QuantizationType.int4.value] = QuantizationType.int4.value
|
type: Literal[QuantizationType.int4.value] = QuantizationType.int4.value
|
||||||
scheme: Optional[str] = None
|
scheme: Optional[str] = "int4_weight_int8_dynamic_activation"
|
||||||
|
|
||||||
|
|
||||||
QuantizationConfig = Annotated[
|
QuantizationConfig = Annotated[
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue