forked from phoenix-oss/llama-stack-mirror
14 lines
406 B
YAML
14 lines
406 B
YAML
inference_config:
|
|
impl_config:
|
|
impl_type: "inline"
|
|
checkpoint_config:
|
|
checkpoint:
|
|
checkpoint_type: "pytorch"
|
|
checkpoint_dir: {checkpoint_dir}/
|
|
tokenizer_path: {checkpoint_dir}/tokenizer.model
|
|
model_parallel_size: {model_parallel_size}
|
|
quantization_format: bf16
|
|
quantization: null
|
|
torch_seed: null
|
|
max_seq_len: 16384
|
|
max_batch_size: 1
|