mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 05:13:53 +00:00
Merge-related changes.
This commit is contained in:
commit
a714bbac9d
95 changed files with 11044 additions and 4639 deletions
|
|
@ -16,11 +16,12 @@ providers:
|
|||
- provider_id: vllm
|
||||
provider_type: inline::vllm
|
||||
config:
|
||||
model: ${env.INFERENCE_MODEL:Llama3.2-3B-Instruct}
|
||||
tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:1}
|
||||
max_tokens: ${env.MAX_TOKENS:4096}
|
||||
max_model_len: ${env.MAX_MODEL_LEN:4096}
|
||||
max_num_seqs: ${env.MAX_NUM_SEQS:4}
|
||||
enforce_eager: ${env.ENFORCE_EAGER:False}
|
||||
gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:0.7}
|
||||
gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:0.3}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue