mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
use default_config file to configure inference
This commit is contained in:
parent
c64b8cba22
commit
d95f5f863d
3 changed files with 25 additions and 15 deletions
|
@ -1,5 +1,6 @@
|
|||
accelerate
|
||||
black==24.4.2
|
||||
blobfile
|
||||
codeshield
|
||||
fairscale
|
||||
fastapi
|
||||
|
|
|
@ -46,17 +46,17 @@ class InferenceConfigure(Subcommand):
|
|||
model_parallel_size,
|
||||
yaml_output_path
|
||||
):
|
||||
yaml_content = textwrap.dedent(f"""
|
||||
inference_config:
|
||||
impl_type: "inline"
|
||||
inline_config:
|
||||
checkpoint_type: "pytorch"
|
||||
checkpoint_dir: {checkpoint_dir}/
|
||||
tokenizer_path: {checkpoint_dir}/tokenizer.model
|
||||
model_parallel_size: {model_parallel_size}
|
||||
max_seq_len: 2048
|
||||
max_batch_size: 1
|
||||
""")
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
default_conf_path = os.path.join(current_dir, "default_configuration.yaml")
|
||||
|
||||
with open(default_conf_path, "r") as f:
|
||||
yaml_content = f.read()
|
||||
|
||||
yaml_content = yaml_content.format(
|
||||
checkpoint_dir=checkpoint_dir,
|
||||
model_parallel_size=model_parallel_size,
|
||||
)
|
||||
|
||||
with open(yaml_output_path, 'w') as yaml_file:
|
||||
yaml_file.write(yaml_content.strip())
|
||||
|
||||
|
|
9
toolchain/cli/inference/default_configuration.yaml
Normal file
9
toolchain/cli/inference/default_configuration.yaml
Normal file
|
@ -0,0 +1,9 @@
|
|||
inference_config:
|
||||
impl_type: "inline"
|
||||
inline_config:
|
||||
checkpoint_type: "pytorch"
|
||||
checkpoint_dir: {checkpoint_dir}/
|
||||
tokenizer_path: {checkpoint_dir}/tokenizer.model
|
||||
model_parallel_size: {model_parallel_size}
|
||||
max_seq_len: 2048
|
||||
max_batch_size: 1
|
Loading…
Add table
Add a link
Reference in a new issue