mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
11 lines
404 B
YAML
11 lines
404 B
YAML
model_inference_config:
|
|
impl_type: "inline"
|
|
inline_config:
|
|
checkpoint_type: "pytorch"
|
|
checkpoint_dir: /home/ashwin/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000
|
|
tokenizer_path: /home/ashwin/local/checkpoints/Meta-Llama-3.1-8B-Instruct-20240710150000/tokenizer.model
|
|
model_parallel_size: 1
|
|
max_seq_len: 2048
|
|
max_batch_size: 1
|
|
quantization:
|
|
type: "fp8"
|