forked from phoenix-oss/llama-stack-mirror
Add an option to not use elastic agents for meta-reference inference (#269)
This commit is contained in:
parent
be3c5c034d
commit
33afd34e6f
2 changed files with 33 additions and 8 deletions
|
@ -17,13 +17,18 @@ from llama_stack.providers.utils.inference import supported_inference_models
|
|||
|
||||
class MetaReferenceInferenceConfig(BaseModel):
|
||||
model: str = Field(
|
||||
default="Llama3.1-8B-Instruct",
|
||||
default="Llama3.2-3B-Instruct",
|
||||
description="Model descriptor from `llama model list`",
|
||||
)
|
||||
torch_seed: Optional[int] = None
|
||||
max_seq_len: int = 4096
|
||||
max_batch_size: int = 1
|
||||
|
||||
# when this is False, we assume that the distributed process group is setup by someone
|
||||
# outside of this code (e.g., when run inside `torchrun`). that is useful for clients
|
||||
# (including our testing code) who might be using llama-stack as a library.
|
||||
create_distributed_process_group: bool = True
|
||||
|
||||
@field_validator("model")
|
||||
@classmethod
|
||||
def validate_model(cls, model: str) -> str:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue