mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 20:02:37 +00:00
temp commit
This commit is contained in:
parent
de44af1501
commit
35b1a6f2dc
7 changed files with 54 additions and 113 deletions
|
|
@ -79,7 +79,7 @@ class Llama:
|
|||
config: Union[
|
||||
MetaReferenceInferenceConfig, MetaReferenceQuantizedInferenceConfig
|
||||
],
|
||||
request: Optional[Union[CompletionRequest, ChatCompletionRequest]] = None,
|
||||
model_id: str,
|
||||
):
|
||||
"""
|
||||
Build a Llama instance by initializing and loading a model checkpoint.
|
||||
|
|
@ -88,12 +88,7 @@ class Llama:
|
|||
This method initializes the distributed process group, sets the device to CUDA,
|
||||
and loads the pre-trained model and tokenizer.
|
||||
"""
|
||||
if config.model:
|
||||
model = resolve_model(config.model)
|
||||
elif request:
|
||||
model = resolve_model(request.model)
|
||||
else:
|
||||
raise RuntimeError("you need to provide a model for inference")
|
||||
model = resolve_model(model_id)
|
||||
|
||||
llama_model = model.core_model_id.value
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue