Add an option to not use elastic agents for meta-reference inference (#269)

2024-10-18 12:51:10 -07:00 · 2024-10-18 12:51:10 -07:00 · 33afd34e6f
commit 33afd34e6f
parent be3c5c034d
2 changed files with 33 additions and 8 deletions
--- a/llama_stack/providers/impls/meta_reference/inference/config.py
+++ b/llama_stack/providers/impls/meta_reference/inference/config.py
@ -17,13 +17,18 @@ from llama_stack.providers.utils.inference import supported_inference_models

 class MetaReferenceInferenceConfig(BaseModel):
    model: str = Field(
-        default="Llama3.1-8B-Instruct",
+        default="Llama3.2-3B-Instruct",
        description="Model descriptor from `llama model list`",
    )
    torch_seed: Optional[int] = None
    max_seq_len: int = 4096
    max_batch_size: int = 1

+    # when this is False, we assume that the distributed process group is setup by someone
+    # outside of this code (e.g., when run inside `torchrun`). that is useful for clients
+    # (including our testing code) who might be using llama-stack as a library.
+    create_distributed_process_group: bool = True
+
    @field_validator("model")
    @classmethod
    def validate_model(cls, model: str) -> str: