mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-06 02:32:40 +00:00
fix changes post merge
This commit is contained in:
parent
e95b1e9739
commit
e4b39aacb8
5 changed files with 65 additions and 20 deletions
|
@ -8,7 +8,7 @@ from datetime import datetime
|
|||
from enum import Enum
|
||||
from typing import Any, Dict, List, Literal, Optional, Protocol
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from typing_extensions import Annotated
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
|
@ -71,6 +71,7 @@ class TrainingConfig(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class LoraFinetuningConfig(BaseModel):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
type: Literal["LoRA"] = "LoRA"
|
||||
lora_attn_modules: List[str]
|
||||
apply_lora_to_mlp: bool
|
||||
|
|
|
@ -26,7 +26,7 @@ def available_providers() -> List[ProviderSpec]:
|
|||
api=Api.post_training,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="nvidia",
|
||||
pip_packages=["requests"],
|
||||
pip_packages=["requests", "aiohttp"],
|
||||
module="llama_stack.providers.remote.post_training.nvidia",
|
||||
config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig",
|
||||
),
|
||||
|
|
|
@ -14,7 +14,7 @@ from llama_stack.apis.post_training import (
|
|||
AlgorithmConfig,
|
||||
DPOAlignmentConfig,
|
||||
JobStatus,
|
||||
PostTraining,
|
||||
LoraFinetuningConfig,
|
||||
PostTrainingJob,
|
||||
PostTrainingJobArtifactsResponse,
|
||||
PostTrainingJobStatusResponse,
|
||||
|
@ -53,7 +53,11 @@ class ListNvidiaPostTrainingJobs(BaseModel):
|
|||
data: List[NvidiaPostTrainingJob]
|
||||
|
||||
|
||||
class NvidiaPostTrainingAdapter(PostTraining, ModelRegistryHelper):
|
||||
class NvidiaPostTrainingJobStatusResponse(PostTrainingJobStatusResponse):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
|
||||
class NvidiaPostTrainingAdapter(ModelRegistryHelper):
|
||||
def __init__(self, config: NvidiaPostTrainingConfig):
|
||||
self.config = config
|
||||
self.headers = {}
|
||||
|
@ -146,7 +150,7 @@ class NvidiaPostTrainingAdapter(PostTraining, ModelRegistryHelper):
|
|||
|
||||
return ListNvidiaPostTrainingJobs(data=jobs)
|
||||
|
||||
async def get_training_job_status(self, job_uuid: str) -> Optional[NvidiaPostTrainingJob]:
|
||||
async def get_training_job_status(self, job_uuid: str) -> NvidiaPostTrainingJobStatusResponse:
|
||||
"""Get the status of a customization job.
|
||||
Updated the base class return type from PostTrainingJobResponse to NvidiaPostTrainingJob.
|
||||
|
||||
|
@ -175,10 +179,10 @@ class NvidiaPostTrainingAdapter(PostTraining, ModelRegistryHelper):
|
|||
api_status = response.pop("status").lower()
|
||||
mapped_status = STATUS_MAPPING.get(api_status, "unknown")
|
||||
|
||||
return NvidiaPostTrainingJob(
|
||||
return NvidiaPostTrainingJobStatusResponse(
|
||||
status=JobStatus(mapped_status),
|
||||
job_uuid=job_uuid,
|
||||
created_at=datetime.fromisoformat(response.pop("created_at")),
|
||||
started_at=datetime.fromisoformat(response.pop("created_at")),
|
||||
updated_at=datetime.fromisoformat(response.pop("updated_at")),
|
||||
**response,
|
||||
)
|
||||
|
@ -188,10 +192,10 @@ class NvidiaPostTrainingAdapter(PostTraining, ModelRegistryHelper):
|
|||
method="POST", path=f"/v1/customization/jobs/{job_uuid}/cancel", params={"job_id": job_uuid}
|
||||
)
|
||||
|
||||
async def get_training_job_artifacts(self, job_uuid: str) -> Optional[PostTrainingJobArtifactsResponse]:
|
||||
async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
|
||||
raise NotImplementedError("Job artifacts are not implemented yet")
|
||||
|
||||
async def get_post_training_artifacts(self, job_uuid: str) -> Optional[PostTrainingJobArtifactsResponse]:
|
||||
async def get_post_training_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
|
||||
raise NotImplementedError("Job artifacts are not implemented yet")
|
||||
|
||||
async def supervised_fine_tune(
|
||||
|
@ -389,14 +393,14 @@ class NvidiaPostTrainingAdapter(PostTraining, ModelRegistryHelper):
|
|||
|
||||
# Handle LoRA-specific configuration
|
||||
if algorithm_config:
|
||||
if isinstance(algorithm_config, dict) and algorithm_config.get("type") == "LoRA":
|
||||
if isinstance(algorithm_config, LoraFinetuningConfig) and algorithm_config.type == "LoRA":
|
||||
warn_unsupported_params(algorithm_config, supported_params["lora_config"], "LoRA config")
|
||||
job_config["hyperparameters"]["lora"] = {
|
||||
k: v
|
||||
for k, v in {
|
||||
"adapter_dim": algorithm_config.get("adapter_dim"),
|
||||
"alpha": algorithm_config.get("alpha"),
|
||||
"adapter_dropout": algorithm_config.get("adapter_dropout"),
|
||||
"adapter_dim": getattr(algorithm_config, "adapter_dim", None),
|
||||
"alpha": getattr(algorithm_config, "alpha", None),
|
||||
"adapter_dropout": getattr(algorithm_config, "adapter_dropout", None),
|
||||
}.items()
|
||||
if v is not None
|
||||
}
|
||||
|
@ -432,5 +436,5 @@ class NvidiaPostTrainingAdapter(PostTraining, ModelRegistryHelper):
|
|||
"""Optimize a model based on preference data."""
|
||||
raise NotImplementedError("Preference optimization is not implemented yet")
|
||||
|
||||
async def get_training_job_container_logs(self, job_uuid: str) -> Optional[PostTrainingJobStatusResponse]:
|
||||
async def get_training_job_container_logs(self, job_uuid: str) -> PostTrainingJobStatusResponse:
|
||||
raise NotImplementedError("Job logs are not implemented yet")
|
||||
|
|
|
@ -61,6 +61,11 @@ class TestNvidiaParameters(unittest.TestCase):
|
|||
type="LoRA",
|
||||
adapter_dim=custom_adapter_dim, # Custom value
|
||||
adapter_dropout=0.2, # Custom value
|
||||
apply_lora_to_mlp=True,
|
||||
apply_lora_to_output=True,
|
||||
alpha=16,
|
||||
rank=16,
|
||||
lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
|
||||
)
|
||||
|
||||
data_config = TrainingConfigDataConfig(dataset_id="test-dataset", batch_size=16)
|
||||
|
@ -84,7 +89,7 @@ class TestNvidiaParameters(unittest.TestCase):
|
|||
self._assert_request_params(
|
||||
{
|
||||
"hyperparameters": {
|
||||
"lora": {"adapter_dim": custom_adapter_dim, "adapter_dropout": 0.2},
|
||||
"lora": {"adapter_dim": custom_adapter_dim, "adapter_dropout": 0.2, "alpha": 16},
|
||||
"epochs": 3,
|
||||
"learning_rate": 0.0002,
|
||||
"batch_size": 16,
|
||||
|
@ -98,7 +103,16 @@ class TestNvidiaParameters(unittest.TestCase):
|
|||
required_dataset_id = "required-dataset"
|
||||
required_job_uuid = "required-job"
|
||||
|
||||
algorithm_config = LoraFinetuningConfig(type="LoRA", adapter_dim=8)
|
||||
algorithm_config = LoraFinetuningConfig(
|
||||
type="LoRA",
|
||||
adapter_dim=16,
|
||||
adapter_dropout=0.1,
|
||||
apply_lora_to_mlp=True,
|
||||
apply_lora_to_output=True,
|
||||
alpha=16,
|
||||
rank=16,
|
||||
lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
|
||||
)
|
||||
|
||||
data_config = TrainingConfigDataConfig(
|
||||
dataset_id=required_dataset_id, # Required parameter
|
||||
|
@ -173,7 +187,16 @@ class TestNvidiaParameters(unittest.TestCase):
|
|||
job_uuid="test-job",
|
||||
model="meta-llama/Llama-3.1-8B-Instruct",
|
||||
checkpoint_dir="test-dir", # Unsupported parameter
|
||||
algorithm_config=LoraFinetuningConfig(type="LoRA"),
|
||||
algorithm_config=LoraFinetuningConfig(
|
||||
type="LoRA",
|
||||
adapter_dim=16,
|
||||
adapter_dropout=0.1,
|
||||
apply_lora_to_mlp=True,
|
||||
apply_lora_to_output=True,
|
||||
alpha=16,
|
||||
rank=16,
|
||||
lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
|
||||
),
|
||||
training_config=training_config,
|
||||
logger_config={"test": "value"}, # Unsupported parameter
|
||||
hyperparam_search_config={"test": "value"}, # Unsupported parameter
|
||||
|
|
|
@ -102,7 +102,16 @@ class TestNvidiaPostTraining(unittest.TestCase):
|
|||
"ownership": {"created_by": "me", "access_policies": {}},
|
||||
}
|
||||
|
||||
algorithm_config = LoraFinetuningConfig(type="LoRA", adapter_dim=16, adapter_dropout=0.1)
|
||||
algorithm_config = LoraFinetuningConfig(
|
||||
type="LoRA",
|
||||
adapter_dim=16,
|
||||
adapter_dropout=0.1,
|
||||
apply_lora_to_mlp=True,
|
||||
apply_lora_to_output=True,
|
||||
alpha=16,
|
||||
rank=16,
|
||||
lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
|
||||
)
|
||||
|
||||
data_config = TrainingConfigDataConfig(dataset_id="sample-basic-test", batch_size=16)
|
||||
|
||||
|
@ -147,7 +156,7 @@ class TestNvidiaPostTraining(unittest.TestCase):
|
|||
"epochs": 2,
|
||||
"batch_size": 16,
|
||||
"learning_rate": 0.0001,
|
||||
"lora": {"adapter_dim": 16, "adapter_dropout": 0.1},
|
||||
"lora": {"alpha": 16, "adapter_dim": 16, "adapter_dropout": 0.1},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
@ -277,7 +286,15 @@ class TestNvidiaPostTraining(unittest.TestCase):
|
|||
"output_model": "default/job-1234",
|
||||
}
|
||||
|
||||
algorithm_config = LoraFinetuningConfig(type="LoRA", adapter_dim=16, adapter_dropout=0.1)
|
||||
algorithm_config = LoraFinetuningConfig(
|
||||
alpha=16,
|
||||
rank=16,
|
||||
type="LoRA",
|
||||
adapter_dim=16,
|
||||
adapter_dropout=0.1,
|
||||
apply_lora_to_mlp=True,
|
||||
apply_lora_to_output=True,
|
||||
)
|
||||
|
||||
data_config = TrainingConfigDataConfig(dataset_id="sample-basic-test", batch_size=16)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue