forked from phoenix-oss/llama-stack-mirror
### Context This is the 1st of series PRs that integrate torchtune with llama-stack as meta reference post-training implementation. For MVP, we will focus on single device LoRA SFT. Though this PR is still WIP, we want to get early feedback on the high level design of this skeleton while still working on several details ### Scope To limit the scope of this PR, we focus on the skeleton of the implementation. **What are included?** - refine the post-training SFT apis - skeleton of supervised_fine_tune implementation. We verified that we can call the supervised_fine_tune API successfully from llama stack client SDK (client side PR: https://github.com/meta-llama/llama-stack-client-python/pull/51) - a very basic single device LoRA training recipe based on torchtune core components - parity check with torchtune library and post training api unit test **What are not includes?** - implementation of other job management, get training artifacts apis (separate PR) - refactor the meta reference inference logic to support eval on finetuned model (separate PR) - several necessary functionality in the training recipe such as logging, validation etc (separate PR) - interop with telemetry for tracing and metrics logging, currently temporarily log to local disk (separate PR) ### Testing **e2e test** Although we haven't added detailed testing and numerical parity check with torchtune yet, we did a simple E2E test from client to server 1. setup server with` llama stack build --template experimental-post-training --image-type conda` and `llama stack run experimental-post-training ` 2. On client, run `llama-stack-client --endpoint http://devgpu018.nha2.facebook.com:5000 post_training supervised_fine_tune` 3. Training finishes successfully. On server side, get the finetune checkpoints under output dir. On client side, get the job uuid server <img width="1110" alt="Screenshot 2024-12-02 at 5 52 32 PM" src="https://github.com/user-attachments/assets/b548eb90-7a9b-4edc-a858-ee237cc4361d"> client <img width="807" alt="Screenshot 2024-12-02 at 5 52 37 PM" src="https://github.com/user-attachments/assets/1138ffa8-4698-40fa-b190-3d7b99646838"> **parity check** torchtune dataloader output and llama-stack post training dataloader output are same <img width="1116" alt="Screenshot 2024-12-04 at 8 18 46 PM" src="https://github.com/user-attachments/assets/5e295cdc-4c24-4ea6-82c0-ca96ef1bd6ee"> torchtune LoRA SFT and llama-stack post training LoRA SFT on alpaca dataset with llama3.2 3B instruct model are numerical match <img width="860" alt="Screenshot 2024-12-04 at 8 17 01 PM" src="https://github.com/user-attachments/assets/c05cf0a8-c674-4d2e-9f0a-c5d01b2dca99"> <img width="1049" alt="Screenshot 2024-12-04 at 8 17 06 PM" src="https://github.com/user-attachments/assets/b911d4e2-e7b1-41a9-b62c-d75529b6d443"> **unit test ** ![Uploading Screenshot 2024-12-09 at 1.35.10 PM.png…]()
160 lines
4.9 KiB
Python
160 lines
4.9 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import pytest
|
|
from dotenv import load_dotenv
|
|
from pydantic import BaseModel
|
|
from termcolor import colored
|
|
|
|
from llama_stack.distribution.datatypes import Provider
|
|
from llama_stack.providers.datatypes import RemoteProviderConfig
|
|
|
|
from .env import get_env_or_fail
|
|
|
|
|
|
class ProviderFixture(BaseModel):
|
|
providers: List[Provider]
|
|
provider_data: Optional[Dict[str, Any]] = None
|
|
|
|
|
|
def remote_stack_fixture() -> ProviderFixture:
|
|
if url := os.getenv("REMOTE_STACK_URL", None):
|
|
config = RemoteProviderConfig.from_url(url)
|
|
else:
|
|
config = RemoteProviderConfig(
|
|
host=get_env_or_fail("REMOTE_STACK_HOST"),
|
|
port=int(get_env_or_fail("REMOTE_STACK_PORT")),
|
|
)
|
|
return ProviderFixture(
|
|
providers=[
|
|
Provider(
|
|
provider_id="test::remote",
|
|
provider_type="test::remote",
|
|
config=config.model_dump(),
|
|
)
|
|
],
|
|
)
|
|
|
|
|
|
def pytest_configure(config):
|
|
config.option.tbstyle = "short"
|
|
config.option.disable_warnings = True
|
|
|
|
"""Load environment variables at start of test run"""
|
|
# Load from .env file if it exists
|
|
env_file = Path(__file__).parent / ".env"
|
|
if env_file.exists():
|
|
load_dotenv(env_file)
|
|
|
|
# Load any environment variables passed via --env
|
|
env_vars = config.getoption("--env") or []
|
|
for env_var in env_vars:
|
|
key, value = env_var.split("=", 1)
|
|
os.environ[key] = value
|
|
|
|
|
|
def pytest_addoption(parser):
|
|
parser.addoption(
|
|
"--providers",
|
|
default="",
|
|
help=(
|
|
"Provider configuration in format: api1=provider1,api2=provider2. "
|
|
"Example: --providers inference=ollama,safety=meta-reference"
|
|
),
|
|
)
|
|
"""Add custom command line options"""
|
|
parser.addoption(
|
|
"--env", action="append", help="Set environment variables, e.g. --env KEY=value"
|
|
)
|
|
|
|
|
|
def make_provider_id(providers: Dict[str, str]) -> str:
|
|
return ":".join(f"{api}={provider}" for api, provider in sorted(providers.items()))
|
|
|
|
|
|
def get_provider_marks(providers: Dict[str, str]) -> List[Any]:
|
|
marks = []
|
|
for provider in providers.values():
|
|
marks.append(getattr(pytest.mark, provider))
|
|
return marks
|
|
|
|
|
|
def get_provider_fixture_overrides(
|
|
config, available_fixtures: Dict[str, List[str]]
|
|
) -> Optional[List[pytest.param]]:
|
|
provider_str = config.getoption("--providers")
|
|
if not provider_str:
|
|
return None
|
|
|
|
fixture_dict = parse_fixture_string(provider_str, available_fixtures)
|
|
return [
|
|
pytest.param(
|
|
fixture_dict,
|
|
id=make_provider_id(fixture_dict),
|
|
marks=get_provider_marks(fixture_dict),
|
|
)
|
|
]
|
|
|
|
|
|
def parse_fixture_string(
|
|
provider_str: str, available_fixtures: Dict[str, List[str]]
|
|
) -> Dict[str, str]:
|
|
"""Parse provider string of format 'api1=provider1,api2=provider2'"""
|
|
if not provider_str:
|
|
return {}
|
|
|
|
fixtures = {}
|
|
pairs = provider_str.split(",")
|
|
for pair in pairs:
|
|
if "=" not in pair:
|
|
raise ValueError(
|
|
f"Invalid provider specification: {pair}. Expected format: api=provider"
|
|
)
|
|
api, fixture = pair.split("=")
|
|
if api not in available_fixtures:
|
|
raise ValueError(
|
|
f"Unknown API: {api}. Available APIs: {list(available_fixtures.keys())}"
|
|
)
|
|
if fixture not in available_fixtures[api]:
|
|
raise ValueError(
|
|
f"Unknown provider '{fixture}' for API '{api}'. "
|
|
f"Available providers: {list(available_fixtures[api])}"
|
|
)
|
|
fixtures[api] = fixture
|
|
|
|
# Check that all provided APIs are supported
|
|
for api in available_fixtures.keys():
|
|
if api not in fixtures:
|
|
raise ValueError(
|
|
f"Missing provider fixture for API '{api}'. Available providers: "
|
|
f"{list(available_fixtures[api])}"
|
|
)
|
|
return fixtures
|
|
|
|
|
|
def pytest_itemcollected(item):
|
|
# Get all markers as a list
|
|
filtered = ("asyncio", "parametrize")
|
|
marks = [mark.name for mark in item.iter_markers() if mark.name not in filtered]
|
|
if marks:
|
|
marks = colored(",".join(marks), "yellow")
|
|
item.name = f"{item.name}[{marks}]"
|
|
|
|
|
|
pytest_plugins = [
|
|
"llama_stack.providers.tests.inference.fixtures",
|
|
"llama_stack.providers.tests.safety.fixtures",
|
|
"llama_stack.providers.tests.memory.fixtures",
|
|
"llama_stack.providers.tests.agents.fixtures",
|
|
"llama_stack.providers.tests.datasetio.fixtures",
|
|
"llama_stack.providers.tests.scoring.fixtures",
|
|
"llama_stack.providers.tests.eval.fixtures",
|
|
"llama_stack.providers.tests.post_training.fixtures",
|
|
]
|