mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
Update more distribution docs to be simpler and partially codegen'ed
This commit is contained in:
parent
e84d4436b5
commit
2411a44833
51 changed files with 1188 additions and 291 deletions
|
@ -37,11 +37,11 @@ class VLLMConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls):
|
||||
return {
|
||||
"model": "${env.VLLM_INFERENCE_MODEL:Llama3.2-3B-Instruct}",
|
||||
"tensor_parallel_size": "${env.VLLM_TENSOR_PARALLEL_SIZE:1}",
|
||||
"max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
|
||||
"enforce_eager": "${env.VLLM_ENFORCE_EAGER:False}",
|
||||
"gpu_memory_utilization": "${env.VLLM_GPU_MEMORY_UTILIZATION:0.3}",
|
||||
"model": "${env.INFERENCE_MODEL:Llama3.2-3B-Instruct}",
|
||||
"tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:1}",
|
||||
"max_tokens": "${env.MAX_TOKENS:4096}",
|
||||
"enforce_eager": "${env.ENFORCE_EAGER:False}",
|
||||
"gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:0.7}",
|
||||
}
|
||||
|
||||
@field_validator("model")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue