Write a script to perform the codegen

2025-07-31 16:01:46 +00:00 · 2024-11-17 14:01:04 -08:00 · 2024-11-17 14:01:04 -08:00 · 0218e68849
commit 0218e68849
parent f38e76ee98
9 changed files with 223 additions and 142 deletions
--- a/docs/source/getting_started/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/getting_started/distributions/self_hosted_distro/remote-vllm.md
--- a/llama_stack/providers/inline/agents/meta_reference/config.py
+++ b/llama_stack/providers/inline/agents/meta_reference/config.py
@ -4,19 +4,22 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from pydantic import BaseModel, Field
+from typing import Any, Dict
+
+from pydantic import BaseModel

 from llama_stack.providers.utils.kvstore import KVStoreConfig
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig


 class MetaReferenceAgentsImplConfig(BaseModel):
-    persistence_store: KVStoreConfig = Field(default=SqliteKVStoreConfig())
+    persistence_store: KVStoreConfig

    @classmethod
-    def sample_run_config(cls):
+    def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
        return {
            "persistence_store": SqliteKVStoreConfig.sample_run_config(
-                db_name="agents_store.db"
-            ),
+                __distro_dir__=__distro_dir__,
+                db_name="agents_store.db",
+            ).model_dump(),
        }
--- a/llama_stack/providers/inline/memory/faiss/config.py
+++ b/llama_stack/providers/inline/memory/faiss/config.py
@ -4,10 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+from typing import Any, Dict
+
 from llama_models.schema_utils import json_schema_type
 from pydantic import BaseModel

-from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
 from llama_stack.providers.utils.kvstore.config import (
    KVStoreConfig,
    SqliteKVStoreConfig,
@ -16,6 +17,13 @@ from llama_stack.providers.utils.kvstore.config import (

@json_schema_type
 class FaissImplConfig(BaseModel):
-    kvstore: KVStoreConfig = SqliteKVStoreConfig(
-        db_path=(RUNTIME_BASE_DIR / "faiss_store.db").as_posix()
-    )  # Uses SQLite config specific to FAISS storage
+    kvstore: KVStoreConfig
+
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
+        return {
+            "kvstore": SqliteKVStoreConfig.sample_run_config(
+                __distro_dir__=__distro_dir__,
+                db_name="faiss_store.db",
+            ).model_dump(),
+        }
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@ -54,11 +54,15 @@ class SqliteKVStoreConfig(CommonConfig):
    )

    @classmethod
-    def sample_run_config(cls, dir: str = "runtime", db_name: str = "kvstore.db"):
+    def sample_run_config(
+        cls, __distro_dir__: str = "runtime", db_name: str = "kvstore.db"
+    ):
        return {
            "type": "sqlite",
            "namespace": None,
-            "db_path": "${env.SQLITE_STORE_DIR:~/.llama/" + f"{dir}/{db_name}" + "}",
+            "db_path": "${env.SQLITE_STORE_DIR:~/.llama/"
+            + f"{__distro_dir__}/{db_name}"
+            + "}",
        }


--- a/llama_stack/scripts/save_distributions.py
+++ b/llama_stack/scripts/save_distributions.py
@ -0,0 +1,78 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import concurrent.futures
+import importlib
+from functools import partial
+from pathlib import Path
+from typing import Iterator
+
+from rich.progress import Progress, SpinnerColumn, TextColumn
+
+
+REPO_ROOT = Path(__file__).parent.parent.parent
+
+
+def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
+    """Find immediate subdirectories in the templates folder."""
+    if not templates_dir.exists():
+        raise FileNotFoundError(f"Templates directory not found: {templates_dir}")
+
+    return (d for d in templates_dir.iterdir() if d.is_dir())
+
+
+def process_template(template_dir: Path, progress) -> None:
+    """Process a single template directory."""
+    progress.print(f"Processing {template_dir.name}")
+
+    try:
+        # Import the module directly
+        module_name = f"llama_stack.templates.{template_dir.name}"
+        module = importlib.import_module(module_name)
+
+        # Get and save the distribution template
+        if template_func := getattr(module, "get_distribution_template", None):
+            template = template_func()
+
+            template.save_distribution(
+                yaml_output_dir=REPO_ROOT / "distributions" / template.name,
+                doc_output_dir=REPO_ROOT
+                / "docs/source/getting_started/distributions"
+                / f"{template.distro_type}_distro",
+            )
+        else:
+            progress.print(
+                f"[yellow]Warning: {template_dir.name} has no get_distribution_template function"
+            )
+
+    except Exception as e:
+        progress.print(f"[red]Error processing {template_dir.name}: {str(e)}")
+
+
+def main():
+    templates_dir = REPO_ROOT / "llama_stack" / "templates"
+
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+    ) as progress:
+        template_dirs = list(find_template_dirs(templates_dir))
+        task = progress.add_task(
+            "Processing distribution templates...", total=len(template_dirs)
+        )
+
+        # Create a partial function with the progress bar
+        process_func = partial(process_template, progress=progress)
+
+        # Process templates in parallel
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            # Submit all tasks and wait for completion
+            list(executor.map(process_func, template_dirs))
+            progress.update(task, advance=len(template_dirs))
+
+
+if __name__ == "__main__":
+    main()
--- a/llama_stack/templates/remote-vllm/init.py
+++ b/llama_stack/templates/remote-vllm/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .vllm import get_distribution_template  # noqa: F401
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@ -78,7 +78,7 @@ inference:
 If you are using Conda, you can build and run the Llama Stack server with the following commands:
 ```bash
 cd distributions/remote-vllm
-llama stack build --template remote_vllm --image-type conda
+llama stack build --template remote-vllm --image-type conda
 llama stack run run.yaml
 ```

--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -0,0 +1,100 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+
+
+def get_distribution_template() -> DistributionTemplate:
+    providers = {
+        "inference": ["remote::vllm"],
+        "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+    }
+
+    inference_provider = Provider(
+        provider_id="vllm-inference",
+        provider_type="remote::vllm",
+        config=VLLMInferenceAdapterConfig.sample_run_config(
+            url="${env.VLLM_URL}",
+        ),
+    )
+
+    inference_model = ModelInput(
+        model_id="${env.INFERENCE_MODEL}",
+        provider_id="vllm-inference",
+    )
+    safety_model = ModelInput(
+        model_id="${env.SAFETY_MODEL}",
+        provider_id="vllm-safety",
+    )
+
+    return DistributionTemplate(
+        name="remote-vllm",
+        distro_type="self_hosted",
+        description="Use (an external) vLLM server for running LLM inference",
+        template_path=Path(__file__).parent / "doc_template.md",
+        providers=providers,
+        default_models=[inference_model, safety_model],
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider],
+                },
+                default_models=[inference_model],
+            ),
+            "run-with-safety.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [
+                        inference_provider,
+                        Provider(
+                            provider_id="vllm-safety",
+                            provider_type="remote::vllm",
+                            config=VLLMInferenceAdapterConfig.sample_run_config(
+                                url="${env.SAFETY_VLLM_URL}",
+                            ),
+                        ),
+                    ],
+                },
+                default_models=[
+                    inference_model,
+                    safety_model,
+                ],
+                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
+            ),
+        },
+        docker_compose_env_vars={
+            "LLAMASTACK_PORT": (
+                "5001",
+                "Port for the Llama Stack distribution server",
+            ),
+            "INFERENCE_MODEL": (
+                "meta-llama/Llama-3.2-3B-Instruct",
+                "Inference model loaded into the vLLM server",
+            ),
+            "VLLM_URL": (
+                "http://host.docker.internal:5100}/v1",
+                "URL of the vLLM server with the main inference model",
+            ),
+            "MAX_TOKENS": (
+                "4096",
+                "Maximum number of tokens for generation",
+            ),
+            "SAFETY_VLLM_URL": (
+                "http://host.docker.internal:5101/v1",
+                "URL of the vLLM server with the safety model",
+            ),
+            "SAFETY_MODEL": (
+                "meta-llama/Llama-Guard-3-1B",
+                "Name of the safety (Llama-Guard) model to use",
+            ),
+        },
+    )
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -9,7 +9,7 @@ from datetime import datetime
 from io import StringIO

 from pathlib import Path
-from typing import Dict, List, Optional, Set, Tuple
+from typing import Dict, List, Literal, Optional, Set, Tuple

 import jinja2
 import yaml
@ -29,9 +29,6 @@ from llama_stack.distribution.datatypes import (
 )
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
-from llama_stack.providers.remote.inference.vllm.config import (
-    VLLMInferenceAdapterConfig,
-)
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig


@ -70,7 +67,9 @@ class RunConfigSettings(BaseModel):

            config_class = instantiate_class_type(config_class)
            if hasattr(config_class, "sample_run_config"):
-                config = config_class.sample_run_config()
+                config = config_class.sample_run_config(
+                    __distro_dir__=f"distributions/{name}"
+                )
            else:
                config = {}

@ -108,6 +107,7 @@ class DistributionTemplate(BaseModel):

    name: str
    description: str
+    distro_type: Literal["self_hosted", "remote_hosted", "ondevice"]

    providers: Dict[str, List[str]]
    run_configs: Dict[str, RunConfigSettings]
@ -159,140 +159,21 @@ class DistributionTemplate(BaseModel):
            default_models=self.default_models,
        )

-    def save_distribution(self, output_dir: Path) -> None:
-        output_dir.mkdir(parents=True, exist_ok=True)
+    def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None:
+        for output_dir in [yaml_output_dir, doc_output_dir]:
+            output_dir.mkdir(parents=True, exist_ok=True)

        build_config = self.build_config()
-        with open(output_dir / "build.yaml", "w") as f:
+        with open(yaml_output_dir / "build.yaml", "w") as f:
            yaml.safe_dump(build_config.model_dump(), f, sort_keys=False)

        for yaml_pth, settings in self.run_configs.items():
-            print(f"Generating {yaml_pth}")
-            print(f"Providers: {self.providers}")
            run_config = settings.run_config(
                self.name, self.providers, self.docker_image
            )
-            with open(output_dir / yaml_pth, "w") as f:
+            with open(yaml_output_dir / yaml_pth, "w") as f:
                yaml.safe_dump(run_config.model_dump(), f, sort_keys=False)

        docs = self.generate_markdown_docs()
-        with open(output_dir / f"{self.name}.md", "w") as f:
+        with open(doc_output_dir / f"{self.name}.md", "w") as f:
            f.write(docs)
-
-    @classmethod
-    def vllm_distribution(cls) -> "DistributionTemplate":
-        providers = {
-            "inference": ["remote::vllm"],
-            "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-            "safety": ["inline::llama-guard"],
-            "agents": ["inline::meta-reference"],
-            "telemetry": ["inline::meta-reference"],
-        }
-
-        inference_provider = Provider(
-            provider_id="vllm-inference",
-            provider_type="remote::vllm",
-            config=VLLMInferenceAdapterConfig.sample_run_config(
-                url="${env.VLLM_URL}",
-            ),
-        )
-
-        inference_model = ModelInput(
-            model_id="${env.INFERENCE_MODEL}",
-            provider_id="vllm-inference",
-        )
-        safety_model = ModelInput(
-            model_id="${env.SAFETY_MODEL}",
-            provider_id="vllm-safety",
-        )
-
-        return cls(
-            name="remote-vllm",
-            description="Use (an external) vLLM server for running LLM inference",
-            template_path=Path(__file__).parent / "remote-vllm" / "doc_template.md",
-            providers=providers,
-            default_models=[inference_model, safety_model],
-            run_configs={
-                "run.yaml": RunConfigSettings(
-                    provider_overrides={
-                        "inference": [inference_provider],
-                    },
-                    default_models=[inference_model],
-                ),
-                "safety-run.yaml": RunConfigSettings(
-                    provider_overrides={
-                        "inference": [
-                            inference_provider,
-                            Provider(
-                                provider_id="vllm-safety",
-                                provider_type="remote::vllm",
-                                config=VLLMInferenceAdapterConfig.sample_run_config(
-                                    url="${env.SAFETY_VLLM_URL}",
-                                ),
-                            ),
-                        ],
-                    },
-                    default_models=[
-                        inference_model,
-                        safety_model,
-                    ],
-                    default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
-                ),
-            },
-            docker_compose_env_vars={
-                "LLAMASTACK_PORT": (
-                    "5001",
-                    "Port for the Llama Stack distribution server",
-                ),
-                "INFERENCE_MODEL": (
-                    "meta-llama/Llama-3.2-3B-Instruct",
-                    "Inference model loaded into the vLLM server",
-                ),
-                "VLLM_URL": (
-                    "http://host.docker.internal:5100}/v1",
-                    "URL of the vLLM server with the main inference model",
-                ),
-                "MAX_TOKENS": (
-                    "4096",
-                    "Maximum number of tokens for generation",
-                ),
-                "SAFETY_VLLM_URL": (
-                    "http://host.docker.internal:5101/v1",
-                    "URL of the vLLM server with the safety model",
-                ),
-                "SAFETY_MODEL": (
-                    "meta-llama/Llama-Guard-3-1B",
-                    "Name of the safety (Llama-Guard) model to use",
-                ),
-            },
-        )
-
-
-if __name__ == "__main__":
-    import argparse
-    import sys
-    from pathlib import Path
-
-    parser = argparse.ArgumentParser(description="Generate a distribution template")
-    parser.add_argument(
-        "--type",
-        choices=["vllm"],
-        default="vllm",
-        help="Type of distribution template to generate",
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=Path,
-        required=True,
-        help="Output directory for the distribution files",
-    )
-
-    args = parser.parse_args()
-
-    if args.type == "vllm":
-        template = DistributionTemplate.vllm_distribution()
-    else:
-        print(f"Unknown template type: {args.type}", file=sys.stderr)
-        sys.exit(1)
-
-    template.save_distribution(args.output_dir)