fix: Pass model param as configuration name to NeMo Customizer

This commit is contained in:
Jash Gulabrai 2025-05-20 09:43:51 -04:00
parent ed7b4731aa
commit 1d94f3617a
2 changed files with 5 additions and 8 deletions

View file

@ -224,7 +224,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
Parameters:
training_config: TrainingConfig - Configuration for training
model: str - Model identifier
model: str - NeMo Customizer configuration name
algorithm_config: Optional[AlgorithmConfig] - Algorithm-specific configuration
checkpoint_dir: Optional[str] - Directory containing model checkpoints, ignored atm
job_uuid: str - Unique identifier for the job, ignored atm
@ -299,9 +299,6 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
User is informed about unsupported parameters via warnings.
"""
# Map model to nvidia model name
# See `_MODEL_ENTRIES` for supported models
nvidia_model = self.get_provider_model_id(model)
# Check for unsupported method parameters
unsupported_method_params = []
@ -347,7 +344,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
# Prepare base job configuration
job_config = {
"config": nvidia_model,
"config": model,
"dataset": {
"name": training_config["data_config"]["dataset_id"],
"namespace": self.config.dataset_namespace,

View file

@ -165,7 +165,7 @@ class TestNvidiaPostTraining(unittest.TestCase):
training_job = self.run_async(
self.adapter.supervised_fine_tune(
job_uuid="1234",
model="meta-llama/Llama-3.1-8B-Instruct",
model="meta/llama-3.2-1b-instruct@v1.0.0+L40",
checkpoint_dir="",
algorithm_config=algorithm_config,
training_config=convert_pydantic_to_json_value(training_config),
@ -184,7 +184,7 @@ class TestNvidiaPostTraining(unittest.TestCase):
"POST",
"/v1/customization/jobs",
expected_json={
"config": "meta/llama-3.1-8b-instruct",
"config": "meta/llama-3.2-1b-instruct@v1.0.0+L40",
"dataset": {"name": "sample-basic-test", "namespace": "default"},
"hyperparameters": {
"training_type": "sft",
@ -219,7 +219,7 @@ class TestNvidiaPostTraining(unittest.TestCase):
self.run_async(
self.adapter.supervised_fine_tune(
job_uuid="1234",
model="meta-llama/Llama-3.1-8B-Instruct",
model="meta/llama-3.2-1b-instruct@v1.0.0+L40",
checkpoint_dir="",
algorithm_config=algorithm_config,
training_config=convert_pydantic_to_json_value(training_config),