llama-stack-mirror/toolchain/cli/inference/default_configuration.yaml
2024-07-21 19:26:11 -07:00

9 lines
270 B
YAML

inference_config:
impl_type: "inline"
inline_config:
checkpoint_type: "pytorch"
checkpoint_dir: {checkpoint_dir}/
tokenizer_path: {checkpoint_dir}/tokenizer.model
model_parallel_size: {model_parallel_size}
max_seq_len: 2048
max_batch_size: 1