inference_config: impl_type: "inline" inline_config: checkpoint_type: "pytorch" checkpoint_dir: {checkpoint_dir}/ tokenizer_path: {checkpoint_dir}/tokenizer.model model_parallel_size: {model_parallel_size} max_seq_len: 2048 max_batch_size: 1