allow changing model parallel size

This commit is contained in:
Ashwin Bharambe 2025-04-07 11:34:28 -07:00
parent ff6c47d4e5
commit 63cf5dda50
5 changed files with 15 additions and 46 deletions

View file

@ -18,6 +18,9 @@ providers:
model: ${env.INFERENCE_MODEL}
max_seq_len: 4096
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
quantization:
type: ${env.QUANTIZATION_TYPE:bf16}
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:null}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
@ -27,6 +30,9 @@ providers:
model: ${env.SAFETY_MODEL}
max_seq_len: 4096
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
quantization:
type: ${env.QUANTIZATION_TYPE:bf16}
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:null}
vector_io:
- provider_id: faiss
provider_type: inline::faiss

View file

@ -18,6 +18,9 @@ providers:
model: ${env.INFERENCE_MODEL}
max_seq_len: 4096
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
quantization:
type: ${env.QUANTIZATION_TYPE:bf16}
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:null}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}