use default_config file to configure inference

2025-10-04 04:04:14 +00:00 · 2024-07-21 19:26:11 -07:00 · 2024-07-21 19:26:11 -07:00 · d95f5f863d
commit d95f5f863d
parent c64b8cba22
3 changed files with 25 additions and 15 deletions
--- a/toolchain/cli/inference/default_configuration.yaml
+++ b/toolchain/cli/inference/default_configuration.yaml
@ -0,0 +1,9 @@
+inference_config:
+  impl_type: "inline"
+  inline_config:
+    checkpoint_type: "pytorch"
+    checkpoint_dir: {checkpoint_dir}/
+    tokenizer_path: {checkpoint_dir}/tokenizer.model
+    model_parallel_size: {model_parallel_size}
+    max_seq_len: 2048
+    max_batch_size: 1