mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-18 19:09:47 +00:00
Docs for meta-reference-gpu
This commit is contained in:
parent
38563d7c00
commit
dd732f037f
9 changed files with 374 additions and 101 deletions
|
|
@ -49,6 +49,18 @@ class MetaReferenceInferenceConfig(BaseModel):
|
|||
resolved = resolve_model(self.model)
|
||||
return resolved.pth_file_count
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
model: str = "Llama3.2-3B-Instruct",
|
||||
checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}",
|
||||
) -> Dict[str, Any]:
|
||||
return {
|
||||
"model": model,
|
||||
"max_seq_len": 4096,
|
||||
"checkpoint_dir": checkpoint_dir,
|
||||
}
|
||||
|
||||
|
||||
class MetaReferenceQuantizedInferenceConfig(MetaReferenceInferenceConfig):
|
||||
quantization: QuantizationConfig
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue