rename toolchain/ --> llama_toolchain/

This commit is contained in:
Hardik Shah 2024-07-21 23:48:38 -07:00
parent d95f5f863d
commit f9111652ef
73 changed files with 36 additions and 37 deletions

View file

@ -0,0 +1,54 @@
from datetime import datetime
import yaml
from pyopenapi import Info, Options, Server, Specification
from llama_models.llama3_1.api.datatypes import * # noqa: F403
from llama_toolchain.dataset.api import * # noqa: F403
from llama_toolchain.evaluations.api import * # noqa: F403
from llama_toolchain.inference.api import * # noqa: F403
from llama_toolchain.memory.api import * # noqa: F403
from llama_toolchain.post_training.api import * # noqa: F403
from llama_toolchain.reward_scoring.api import * # noqa: F403
from llama_toolchain.synthetic_data_generation.api import * # noqa: F403
from agentic_system.api import * # noqa: F403
class LlamaStackEndpoints(
Inference,
AgenticSystem,
RewardScoring,
SyntheticDataGeneration,
Datasets,
PostTraining,
MemoryBanks,
Evaluations,
): ...
if __name__ == "__main__":
now = str(datetime.now())
print(
"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now
)
spec = Specification(
LlamaStackEndpoints,
Options(
server=Server(url="http://any-hosted-llama-stack.com"),
info=Info(
title="[DRAFT] Llama Stack Specification",
version="0.0.1",
description="""This is the specification of the llama stack that provides
a set of endpoints and their corresponding interfaces that are tailored to
best leverage Llama Models. The specification is still in draft and subject to change.
Generated at """
+ now,
),
),
)
with open("openapi.yaml", "w", encoding="utf-8") as fp:
yaml.dump(spec.get_json(), fp, allow_unicode=True)
with open("openapi.html", "w") as fp:
spec.write_html(fp, pretty_print=True)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,22 @@
#!/bin/bash
set -euo pipefail
TMPDIR=$(mktemp -d)
echo "Using temporary directory: $TMPDIR"
rootdir=$(git rev-parse --show-toplevel)
files_to_copy=("toolchain/spec/openapi*" "llama_models.llama3_1.api.datatypes.py" "toolchain/inference/api/*.py" "agentic_system/api/*.py" "toolchain/common/*.py" "toolchain/dataset/api/*.py" "toolchain/evaluations/api/*.py" "toolchain/reward_scoring/api/*.py" "toolchain/post_training/api/*.py" "toolchain/safety/api/*.py")
for file in "${files_to_copy[@]}"; do
relpath="$file"
set -x
mkdir -p "$TMPDIR/$(dirname $relpath)"
eval cp "$rootdir/$relpath" "$TMPDIR/$(dirname $relpath)"
set +x
done
cd "$TMPDIR"
zip -r output.zip .
echo "Zip at: $TMPDIR/output.zip"

View file

@ -0,0 +1,105 @@
from enum import Enum
from typing import Any, Dict, List
from llama_models.llama3_1.api.datatypes import URL
from pydantic import BaseModel, Field
from strong_typing.schema import json_schema_type
class TrainEvalDatasetColumnType(Enum):
dialog = "dialog"
text = "text"
media = "media"
number = "number"
json = "json"
@json_schema_type
class TrainEvalDataset(BaseModel):
"""Dataset to be used for training or evaluating language models."""
# TODO(ashwin): figure out if we need to add an enum for a "dataset type"
columns: Dict[str, TrainEvalDatasetColumnType]
content_url: URL
metadata: Dict[str, Any] = Field(default_factory=dict)
class OptimizerType(Enum):
adam = "adam"
adamw = "adamw"
sgd = "sgd"
@json_schema_type
class OptimizerConfig(BaseModel):
optimizer_type: OptimizerType
lr: float
lr_min: float
weight_decay: float
@json_schema_type
class TrainingConfig(BaseModel):
n_epochs: int
batch_size: int
shuffle: bool
n_iters: int
enable_activation_checkpointing: bool
memory_efficient_fsdp_wrap: bool
fsdp_cpu_offload: bool
class FinetuningAlgorithm(Enum):
full = "full"
lora = "lora"
qlora = "qlora"
dora = "dora"
@json_schema_type
class LoraFinetuningConfig(BaseModel):
lora_attn_modules: List[str]
apply_lora_to_mlp: bool
apply_lora_to_output: bool
rank: int
alpha: int
@json_schema_type
class QLoraFinetuningConfig(LoraFinetuningConfig):
pass
@json_schema_type
class DoraFinetuningConfig(LoraFinetuningConfig):
pass
@json_schema_type
class PostTrainingJobLogStream(BaseModel):
"""Stream of logs from a finetuning job."""
job_uuid: str
log_lines: List[str]
class PostTrainingJobStatus(Enum):
running = "running"
completed = "completed"
failed = "failed"
scheduled = "scheduled"
class RLHFAlgorithm(Enum):
dpo = "dpo"
@json_schema_type
class DPOAlignmentConfig(BaseModel):
reward_scale: float
reward_clip: float
epsilon: float
gamma: float

View file

@ -0,0 +1,5 @@
#!/bin/bash
set -x
PYTHONPATH=/data/users/rsm/llama-models:/data/users/rsm/llama-toolchain:/data/users/rsm/llama-agentic-system:../../../oss-ops:../.. python -m toolchain.spec.generate