mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 22:49:47 +00:00
Start auto-generating { build, run, doc.md } for distributions
This commit is contained in:
parent
20bf2f50c2
commit
cfa913fdd5
11 changed files with 362 additions and 23 deletions
|
|
@ -34,6 +34,16 @@ class VLLMConfig(BaseModel):
|
|||
default=0.3,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_dict(cls):
|
||||
return {
|
||||
"model": "${env.VLLM_INFERENCE_MODEL:Llama3.2-3B-Instruct}",
|
||||
"tensor_parallel_size": "${env.VLLM_TENSOR_PARALLEL_SIZE:1}",
|
||||
"max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
|
||||
"enforce_eager": "${env.VLLM_ENFORCE_EAGER:False}",
|
||||
"gpu_memory_utilization": "${env.VLLM_GPU_MEMORY_UTILIZATION:0.3}",
|
||||
}
|
||||
|
||||
@field_validator("model")
|
||||
@classmethod
|
||||
def validate_model(cls, model: str) -> str:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue