mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-01 20:19:59 +00:00
allow changing model parallel size
This commit is contained in:
parent
ff6c47d4e5
commit
63cf5dda50
5 changed files with 15 additions and 46 deletions
|
|
@ -18,6 +18,9 @@ providers:
|
|||
model: ${env.INFERENCE_MODEL}
|
||||
max_seq_len: 4096
|
||||
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
|
||||
quantization:
|
||||
type: ${env.QUANTIZATION_TYPE:bf16}
|
||||
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:null}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
|
|
@ -27,6 +30,9 @@ providers:
|
|||
model: ${env.SAFETY_MODEL}
|
||||
max_seq_len: 4096
|
||||
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
|
||||
quantization:
|
||||
type: ${env.QUANTIZATION_TYPE:bf16}
|
||||
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:null}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
|
|
|||
|
|
@ -18,6 +18,9 @@ providers:
|
|||
model: ${env.INFERENCE_MODEL}
|
||||
max_seq_len: 4096
|
||||
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
|
||||
quantization:
|
||||
type: ${env.QUANTIZATION_TYPE:bf16}
|
||||
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:null}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue