Start auto-generating { build, run, doc.md } for distributions

2025-07-31 16:01:46 +00:00 · 2024-11-14 17:44:45 -08:00 · 2024-11-14 17:44:45 -08:00 · cfa913fdd5
commit cfa913fdd5
parent 20bf2f50c2
11 changed files with 362 additions and 23 deletions
--- a/distributions/ollama-gpu/run.yaml
+++ b/distributions/ollama-gpu/run.yaml
@ -13,20 +13,15 @@ apis:
 - safety
 providers:
  inference:
-  - provider_id: ollama0
+  - provider_id: ollama
    provider_type: remote::ollama
    config:
-      url: http://127.0.0.1:14343
+      url: ${env.OLLAMA_URL:http://127.0.0.1:11434}
  safety:
  - provider_id: meta0
    provider_type: inline::llama-guard
    config:
-      model: Llama-Guard-3-1B
      excluded_categories: []
-  - provider_id: meta1
-    provider_type: inline::prompt-guard
-    config:
-      model: Prompt-Guard-86M
  memory:
  - provider_id: meta0
    provider_type: inline::meta-reference
@ -43,3 +38,10 @@ providers:
  - provider_id: meta0
    provider_type: inline::meta-reference
    config: {}
+models:
+  - model_id: ${env.INFERENCE_MODEL:Llama3.2-3B-Instruct}
+    provider_id: ollama
+  - model_id: ${env.SAFETY_MODEL:Llama-Guard-3-1B}
+    provider_id: ollama
+shields:
+  - shield_id: ${env.SAFETY_MODEL:Llama-Guard-3-1B}
--- a/distributions/ollama/run.yaml
+++ b/distributions/ollama/run.yaml
@ -13,20 +13,15 @@ apis:
 - safety
 providers:
  inference:
-  - provider_id: ollama0
+  - provider_id: ollama
    provider_type: remote::ollama
    config:
-      url: http://127.0.0.1:14343
+      url: ${env.LLAMA_INFERENCE_OLLAMA_URL:http://127.0.0.1:11434}
  safety:
  - provider_id: meta0
    provider_type: inline::llama-guard
    config:
-      model: Llama-Guard-3-1B
      excluded_categories: []
-  - provider_id: meta1
-    provider_type: inline::prompt-guard
-    config:
-      model: Prompt-Guard-86M
  memory:
  - provider_id: meta0
    provider_type: inline::meta-reference
@ -43,3 +38,10 @@ providers:
  - provider_id: meta0
    provider_type: inline::meta-reference
    config: {}
+models:
+  - model_id: ${env.LLAMA_INFERENCE_MODEL:Llama3.2-3B-Instruct}
+    provider_id: ollama
+  - model_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}
+    provider_id: ollama
+shields:
+  - shield_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}
--- a/distributions/remote-vllm/run.yaml
+++ b/distributions/remote-vllm/run.yaml
@ -16,7 +16,7 @@ providers:
    provider_type: remote::vllm
    config:
      # NOTE: replace with "localhost" if you are running in "host" network mode
-      url: ${env.LLAMA_INFERENCE_VLLM_URL:http://host.docker.internal:5100/v1}
+      url: ${env.VLLM_URL:http://host.docker.internal:5100/v1}
      max_tokens: ${env.MAX_TOKENS:4096}
      api_token: fake
  # serves safety llama_guard model
@ -24,7 +24,7 @@ providers:
    provider_type: remote::vllm
    config:
      # NOTE: replace with "localhost" if you are running in "host" network mode
-      url: ${env.LLAMA_SAFETY_VLLM_URL:http://host.docker.internal:5101/v1}
+      url: ${env.SAFETY_VLLM_URL:http://host.docker.internal:5101/v1}
      max_tokens: ${env.MAX_TOKENS:4096}
      api_token: fake
  memory:
@ -34,7 +34,7 @@ providers:
      kvstore:
        namespace: null
        type: sqlite
-        db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/faiss_store.db"
+        db_path: "${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db"
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -50,7 +50,7 @@ providers:
      persistence_store:
        namespace: null
        type: sqlite
-        db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/agents_store.db"
+        db_path: "${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db"
  telemetry:
  - provider_id: meta0
    provider_type: inline::meta-reference
@ -58,11 +58,11 @@ providers:
 metadata_store:
  namespace: null
  type: sqlite
-  db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/registry.db"
+  db_path: "${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db"
 models:
-  - model_id: ${env.LLAMA_INFERENCE_MODEL:Llama3.1-8B-Instruct}
+  - model_id: ${env.INFERENCE_MODEL:Llama3.1-8B-Instruct}
    provider_id: vllm-0
-  - model_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}
+  - model_id: ${env.SAFETY_MODEL:Llama-Guard-3-1B}
    provider_id: vllm-1
 shields:
-  - shield_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}
+  - shield_id: ${env.SAFETY_MODEL:Llama-Guard-3-1B}
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -313,7 +313,8 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
                else:
                    value = default_val

-            return value
+            # expand "~" from the values
+            return os.path.expanduser(value)

        try:
            return re.sub(pattern, get_env_var, config)
--- a/llama_stack/providers/inline/agents/meta_reference/config.py
+++ b/llama_stack/providers/inline/agents/meta_reference/config.py
@ -12,3 +12,11 @@ from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig

 class MetaReferenceAgentsImplConfig(BaseModel):
    persistence_store: KVStoreConfig = Field(default=SqliteKVStoreConfig())
+
+    @classmethod
+    def sample_dict(cls):
+        return {
+            "persistence_store": SqliteKVStoreConfig.sample_dict(
+                db_name="agents_store.db"
+            ),
+        }
--- a/llama_stack/providers/inline/inference/vllm/config.py
+++ b/llama_stack/providers/inline/inference/vllm/config.py
@ -34,6 +34,16 @@ class VLLMConfig(BaseModel):
        default=0.3,
    )

+    @classmethod
+    def sample_dict(cls):
+        return {
+            "model": "${env.VLLM_INFERENCE_MODEL:Llama3.2-3B-Instruct}",
+            "tensor_parallel_size": "${env.VLLM_TENSOR_PARALLEL_SIZE:1}",
+            "max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
+            "enforce_eager": "${env.VLLM_ENFORCE_EAGER:False}",
+            "gpu_memory_utilization": "${env.VLLM_GPU_MEMORY_UTILIZATION:0.3}",
+        }
+
    @field_validator("model")
    @classmethod
    def validate_model(cls, model: str) -> str:
--- a/llama_stack/providers/inline/safety/llama_guard/config.py
+++ b/llama_stack/providers/inline/safety/llama_guard/config.py
@ -11,3 +11,9 @@ from pydantic import BaseModel

 class LlamaGuardConfig(BaseModel):
    excluded_categories: List[str] = []
+
+    @classmethod
+    def sample_dict(cls):
+        return {
+            "excluded_categories": [],
+        }
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@ -24,3 +24,12 @@ class VLLMInferenceAdapterConfig(BaseModel):
        default="fake",
        description="The API token",
    )
+
+    @classmethod
+    def sample_dict(cls):
+        # TODO: we may need two modes, one for conda and one for docker
+        return {
+            "url": "${env.VLLM_URL:http://host.docker.internal:5100/v1}",
+            "max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
+            "api_token": "${env.VLLM_API_TOKEN:fake}",
+        }
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@ -36,6 +36,15 @@ class RedisKVStoreConfig(CommonConfig):
    def url(self) -> str:
        return f"redis://{self.host}:{self.port}"

+    @classmethod
+    def sample_dict(cls):
+        return {
+            "type": "redis",
+            "namespace": None,
+            "host": "${env.REDIS_HOST:localhost}",
+            "port": "${env.REDIS_PORT:6379}",
+        }
+

 class SqliteKVStoreConfig(CommonConfig):
    type: Literal[KVStoreType.sqlite.value] = KVStoreType.sqlite.value
@ -44,6 +53,14 @@ class SqliteKVStoreConfig(CommonConfig):
        description="File path for the sqlite database",
    )

+    @classmethod
+    def sample_dict(cls, db_name: str = "kvstore.db"):
+        return {
+            "type": "sqlite",
+            "namespace": None,
+            "db_path": "${env.SQLITE_STORE_DIR:~/.llama/runtime/" + db_name + "}",
+        }
+

 class PostgresKVStoreConfig(CommonConfig):
    type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value
@ -54,6 +71,19 @@ class PostgresKVStoreConfig(CommonConfig):
    password: Optional[str] = None
    table_name: str = "llamastack_kvstore"

+    @classmethod
+    def sample_dict(cls, table_name: str = "llamastack_kvstore"):
+        return {
+            "type": "postgres",
+            "namespace": None,
+            "host": "${env.POSTGRES_HOST:localhost}",
+            "port": "${env.POSTGRES_PORT:5432}",
+            "db": "${env.POSTGRES_DB}",
+            "user": "${env.POSTGRES_USER}",
+            "password": "${env.POSTGRES_PASSWORD}",
+            "table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}",
+        }
+
    @classmethod
    @field_validator("table_name")
    def validate_table_name(cls, v: str) -> str:
--- a/llama_stack/templates/init.py
+++ b/llama_stack/templates/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -0,0 +1,266 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from io import StringIO
+
+from pathlib import Path
+from typing import Dict, List, Optional, Set
+
+import jinja2
+import yaml
+from pydantic import BaseModel
+
+from rich.console import Console
+from rich.table import Table
+
+from llama_stack.distribution.datatypes import (
+    BuildConfig,
+    DistributionSpec,
+    KVStoreConfig,
+    ModelInput,
+    Provider,
+    ShieldInput,
+    StackRunConfig,
+)
+
+
+class DistributionTemplate(BaseModel):
+    """
+    Represents a Llama Stack distribution instance that can generate configuration
+    and documentation files.
+    """
+
+    name: str
+    description: str
+    providers: Dict[str, List[str]]
+    default_models: List[ModelInput]
+    default_shields: Optional[List[ShieldInput]] = None
+
+    # Optional configuration
+    metadata_store: Optional[KVStoreConfig] = None
+    env_vars: Optional[Dict[str, str]] = None
+    docker_image: Optional[str] = None
+
+    @property
+    def distribution_spec(self) -> DistributionSpec:
+        return DistributionSpec(
+            description=self.description,
+            docker_image=self.docker_image,
+            providers=self.providers,
+        )
+
+    def build_config(self) -> BuildConfig:
+        return BuildConfig(
+            name=self.name,
+            distribution_spec=self.distribution_spec,
+            image_type="conda",  # default to conda, can be overridden
+        )
+
+    def run_config(self, provider_configs: Dict[str, List[Provider]]) -> StackRunConfig:
+        from datetime import datetime
+
+        # Get unique set of APIs from providers
+        apis: Set[str] = set(self.providers.keys())
+
+        return StackRunConfig(
+            image_name=self.name,
+            docker_image=self.docker_image,
+            built_at=datetime.now(),
+            apis=list(apis),
+            providers=provider_configs,
+            metadata_store=self.metadata_store,
+            models=self.default_models,
+            shields=self.default_shields or [],
+        )
+
+    def generate_markdown_docs(self) -> str:
+        """Generate markdown documentation using both Jinja2 templates and rich tables."""
+        # First generate the providers table using rich
+        output = StringIO()
+        console = Console(file=output, force_terminal=False)
+
+        table = Table(title="Provider Configuration", show_header=True)
+        table.add_column("API", style="bold")
+        table.add_column("Provider(s)")
+
+        for api, providers in sorted(self.providers.items()):
+            table.add_row(api, ", ".join(f"`{p}`" for p in providers))
+
+        console.print(table)
+        providers_table = output.getvalue()
+
+        # Main documentation template
+        template = """# {{ name }} Distribution
+
+{{ description }}
+
+## Provider Configuration
+
+The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations:
+
+{{ providers_table }}
+
+{%- if env_vars %}
+## Environment Variables
+
+The following environment variables can be configured:
+
+{% for var, description in env_vars.items() %}
+- `{{ var }}`: {{ description }}
+{% endfor %}
+{%- endif %}
+
+## Example Usage
+
+### Using Docker Compose
+
+```bash
+$ cd distributions/{{ name }}
+$ docker compose up
+```
+
+### Manual Configuration
+
+You can also configure the distribution manually by creating a `run.yaml` file:
+
+```yaml
+version: '2'
+image_name: {{ name }}
+apis:
+{% for api in providers.keys() %}
+  - {{ api }}
+{% endfor %}
+
+providers:
+{% for api, provider_list in providers.items() %}
+  {{ api }}:
+  {% for provider in provider_list %}
+    - provider_id: {{ provider.lower() }}-0
+      provider_type: {{ provider }}
+      config: {}
+  {% endfor %}
+{% endfor %}
+```
+
+## Models
+
+The following models are configured by default:
+{% for model in default_models %}
+- `{{ model.model_id }}`
+{% endfor %}
+
+{%- if default_shields %}
+
+## Safety Shields
+
+The following safety shields are configured:
+{% for shield in default_shields %}
+- `{{ shield.shield_id }}`
+{%- endfor %}
+{%- endif %}
+"""
+        # Render template with rich-generated table
+        env = jinja2.Environment(trim_blocks=True, lstrip_blocks=True)
+        template = env.from_string(template)
+        return template.render(
+            name=self.name,
+            description=self.description,
+            providers=self.providers,
+            providers_table=providers_table,
+            env_vars=self.env_vars,
+            default_models=self.default_models,
+            default_shields=self.default_shields,
+        )
+
+    def save_distribution(self, output_dir: Path) -> None:
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        # Save build.yaml
+        build_config = self.build_config()
+        with open(output_dir / "build.yaml", "w") as f:
+            yaml.safe_dump(build_config.model_dump(), f, sort_keys=False)
+
+        # Save run.yaml template
+        # Create a minimal provider config for the template
+        provider_configs = {
+            api: [
+                Provider(
+                    provider_id=f"{provider.lower()}-0",
+                    provider_type=provider,
+                    config={},
+                )
+                for provider in providers
+            ]
+            for api, providers in self.providers.items()
+        }
+        run_config = self.run_config(provider_configs)
+        with open(output_dir / "run.yaml", "w") as f:
+            yaml.safe_dump(run_config.model_dump(), f, sort_keys=False)
+
+        # Save documentation
+        docs = self.generate_markdown_docs()
+        with open(output_dir / f"{self.name}.md", "w") as f:
+            f.write(docs)
+
+    @classmethod
+    def vllm_distribution(cls) -> "DistributionTemplate":
+        return cls(
+            name="remote-vllm",
+            description="Use (an external) vLLM server for running LLM inference",
+            providers={
+                "inference": ["remote::vllm"],
+                "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
+                "safety": ["inline::llama-guard"],
+                "agents": ["inline::meta-reference"],
+                "telemetry": ["inline::meta-reference"],
+            },
+            default_models=[
+                ModelInput(
+                    model_id="${env.LLAMA_INFERENCE_MODEL:Llama3.1-8B-Instruct}"
+                ),
+                ModelInput(model_id="${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}"),
+            ],
+            default_shields=[
+                ShieldInput(shield_id="${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}")
+            ],
+            env_vars={
+                "LLAMA_INFERENCE_VLLM_URL": "URL of the vLLM inference server",
+                "LLAMA_SAFETY_VLLM_URL": "URL of the vLLM safety server",
+                "MAX_TOKENS": "Maximum number of tokens for generation",
+                "LLAMA_INFERENCE_MODEL": "Name of the inference model to use",
+                "LLAMA_SAFETY_MODEL": "Name of the safety model to use",
+            },
+        )
+
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+    from pathlib import Path
+
+    parser = argparse.ArgumentParser(description="Generate a distribution template")
+    parser.add_argument(
+        "--type",
+        choices=["vllm"],
+        default="vllm",
+        help="Type of distribution template to generate",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        required=True,
+        help="Output directory for the distribution files",
+    )
+
+    args = parser.parse_args()
+
+    if args.type == "vllm":
+        template = DistributionTemplate.vllm_distribution()
+    else:
+        print(f"Unknown template type: {args.type}", file=sys.stderr)
+        sys.exit(1)
+
+    template.save_distribution(args.output_dir)