llama-stack/llama_toolchain/data/default_inference_config.yaml
2024-07-23 08:32:33 -07:00

14 lines
406 B
YAML

inference_config:
impl_config:
impl_type: "inline"
checkpoint_config:
checkpoint:
checkpoint_type: "pytorch"
checkpoint_dir: {checkpoint_dir}/
tokenizer_path: {checkpoint_dir}/tokenizer.model
model_parallel_size: {model_parallel_size}
quantization_format: bf16
quantization: null
torch_seed: null
max_seq_len: 16384
max_batch_size: 1