temp commit

This commit is contained in:
Botao Chen 2024-12-16 16:44:15 -08:00
parent de44af1501
commit 35b1a6f2dc
7 changed files with 54 additions and 113 deletions

View file

@ -79,7 +79,7 @@ class Llama:
config: Union[
MetaReferenceInferenceConfig, MetaReferenceQuantizedInferenceConfig
],
request: Optional[Union[CompletionRequest, ChatCompletionRequest]] = None,
model_id: str,
):
"""
Build a Llama instance by initializing and loading a model checkpoint.
@ -88,12 +88,7 @@ class Llama:
This method initializes the distributed process group, sets the device to CUDA,
and loads the pre-trained model and tokenizer.
"""
if config.model:
model = resolve_model(config.model)
elif request:
model = resolve_model(request.model)
else:
raise RuntimeError("you need to provide a model for inference")
model = resolve_model(model_id)
llama_model = model.core_model_id.value