rename toolchain/ --> llama_toolchain/

2025-12-03 18:00:36 +00:00 · 2024-07-21 23:48:38 -07:00 · 2024-07-21 23:48:38 -07:00 · f9111652ef
commit f9111652ef
parent d95f5f863d
73 changed files with 36 additions and 37 deletions
--- a/llama_toolchain/spec/generate.py
+++ b/llama_toolchain/spec/generate.py
@ -0,0 +1,54 @@
+from datetime import datetime
+
+import yaml
+
+from pyopenapi import Info, Options, Server, Specification
+
+from llama_models.llama3_1.api.datatypes import *  # noqa: F403
+from llama_toolchain.dataset.api import *  # noqa: F403
+from llama_toolchain.evaluations.api import *  # noqa: F403
+from llama_toolchain.inference.api import *  # noqa: F403
+from llama_toolchain.memory.api import *  # noqa: F403
+from llama_toolchain.post_training.api import *  # noqa: F403
+from llama_toolchain.reward_scoring.api import *  # noqa: F403
+from llama_toolchain.synthetic_data_generation.api import *  # noqa: F403
+from agentic_system.api import *  # noqa: F403
+
+
+class LlamaStackEndpoints(
+    Inference,
+    AgenticSystem,
+    RewardScoring,
+    SyntheticDataGeneration,
+    Datasets,
+    PostTraining,
+    MemoryBanks,
+    Evaluations,
+): ...
+
+
+if __name__ == "__main__":
+    now = str(datetime.now())
+    print(
+        "Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now
+    )
+    spec = Specification(
+        LlamaStackEndpoints,
+        Options(
+            server=Server(url="http://any-hosted-llama-stack.com"),
+            info=Info(
+                title="[DRAFT] Llama Stack Specification",
+                version="0.0.1",
+                description="""This is the specification of the llama stack that provides
+                a set of endpoints and their corresponding interfaces that are tailored to
+                best leverage Llama Models. The specification is still in draft and subject to change.
+                Generated at """
+                + now,
+            ),
+        ),
+    )
+    with open("openapi.yaml", "w", encoding="utf-8") as fp:
+        yaml.dump(spec.get_json(), fp, allow_unicode=True)
+
+    with open("openapi.html", "w") as fp:
+        spec.write_html(fp, pretty_print=True)
--- a/llama_toolchain/spec/openapi.html
+++ b/llama_toolchain/spec/openapi.html
--- a/llama_toolchain/spec/openapi.yaml
+++ b/llama_toolchain/spec/openapi.yaml
--- a/llama_toolchain/spec/package.sh
+++ b/llama_toolchain/spec/package.sh
@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -euo pipefail
+
+TMPDIR=$(mktemp -d)
+echo "Using temporary directory: $TMPDIR"
+
+rootdir=$(git rev-parse --show-toplevel)
+
+files_to_copy=("toolchain/spec/openapi*" "llama_models.llama3_1.api.datatypes.py" "toolchain/inference/api/*.py" "agentic_system/api/*.py" "toolchain/common/*.py" "toolchain/dataset/api/*.py" "toolchain/evaluations/api/*.py" "toolchain/reward_scoring/api/*.py" "toolchain/post_training/api/*.py" "toolchain/safety/api/*.py")
+for file in "${files_to_copy[@]}"; do
+    relpath="$file"
+    set -x
+    mkdir -p "$TMPDIR/$(dirname $relpath)"
+    eval cp "$rootdir/$relpath" "$TMPDIR/$(dirname $relpath)"
+    set +x
+done
+
+cd "$TMPDIR"
+zip -r output.zip .
+
+echo "Zip at: $TMPDIR/output.zip"
--- a/llama_toolchain/spec/post_training_types.py
+++ b/llama_toolchain/spec/post_training_types.py
@ -0,0 +1,105 @@
+from enum import Enum
+from typing import Any, Dict, List
+
+from llama_models.llama3_1.api.datatypes import URL
+from pydantic import BaseModel, Field
+
+from strong_typing.schema import json_schema_type
+
+
+class TrainEvalDatasetColumnType(Enum):
+    dialog = "dialog"
+    text = "text"
+    media = "media"
+    number = "number"
+    json = "json"
+
+
+@json_schema_type
+class TrainEvalDataset(BaseModel):
+    """Dataset to be used for training or evaluating language models."""
+
+    # TODO(ashwin): figure out if we need to add an enum for a "dataset type"
+
+    columns: Dict[str, TrainEvalDatasetColumnType]
+    content_url: URL
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+
+
+class OptimizerType(Enum):
+    adam = "adam"
+    adamw = "adamw"
+    sgd = "sgd"
+
+
+@json_schema_type
+class OptimizerConfig(BaseModel):
+    optimizer_type: OptimizerType
+    lr: float
+    lr_min: float
+    weight_decay: float
+
+
+@json_schema_type
+class TrainingConfig(BaseModel):
+    n_epochs: int
+    batch_size: int
+    shuffle: bool
+    n_iters: int
+
+    enable_activation_checkpointing: bool
+    memory_efficient_fsdp_wrap: bool
+    fsdp_cpu_offload: bool
+
+
+class FinetuningAlgorithm(Enum):
+    full = "full"
+    lora = "lora"
+    qlora = "qlora"
+    dora = "dora"
+
+
+@json_schema_type
+class LoraFinetuningConfig(BaseModel):
+    lora_attn_modules: List[str]
+    apply_lora_to_mlp: bool
+    apply_lora_to_output: bool
+    rank: int
+    alpha: int
+
+
+@json_schema_type
+class QLoraFinetuningConfig(LoraFinetuningConfig):
+    pass
+
+
+@json_schema_type
+class DoraFinetuningConfig(LoraFinetuningConfig):
+    pass
+
+
+@json_schema_type
+class PostTrainingJobLogStream(BaseModel):
+    """Stream of logs from a finetuning job."""
+
+    job_uuid: str
+    log_lines: List[str]
+
+
+class PostTrainingJobStatus(Enum):
+    running = "running"
+    completed = "completed"
+    failed = "failed"
+    scheduled = "scheduled"
+
+
+class RLHFAlgorithm(Enum):
+    dpo = "dpo"
+
+
+@json_schema_type
+class DPOAlignmentConfig(BaseModel):
+    reward_scale: float
+    reward_clip: float
+    epsilon: float
+    gamma: float
--- a/llama_toolchain/spec/run_openapi_generator.sh
+++ b/llama_toolchain/spec/run_openapi_generator.sh
@ -0,0 +1,5 @@
+#!/bin/bash
+
+set -x
+
+PYTHONPATH=/data/users/rsm/llama-models:/data/users/rsm/llama-toolchain:/data/users/rsm/llama-agentic-system:../../../oss-ops:../.. python -m toolchain.spec.generate