From 4e4a40bd643af376c0231da91fef26a486c8db6b Mon Sep 17 00:00:00 2001 From: raspawar Date: Mon, 24 Mar 2025 16:06:24 +0530 Subject: [PATCH] remove llamastackclient from unit tests --- .../post_training/nvidia/post_training.py | 9 +- tests/unit/providers/nvidia/conftest.py | 25 ++ .../nvidia/mock_llama_stack_client.py | 147 ----------- .../unit/providers/nvidia/test_parameters.py | 148 +++++------ .../nvidia/test_supervised_fine_tuning.py | 244 ++++++------------ 5 files changed, 171 insertions(+), 402 deletions(-) delete mode 100644 tests/unit/providers/nvidia/mock_llama_stack_client.py diff --git a/llama_stack/providers/remote/post_training/nvidia/post_training.py b/llama_stack/providers/remote/post_training/nvidia/post_training.py index f8766cdbb..bacfdba0b 100644 --- a/llama_stack/providers/remote/post_training/nvidia/post_training.py +++ b/llama_stack/providers/remote/post_training/nvidia/post_training.py @@ -14,7 +14,6 @@ from llama_stack.apis.post_training import ( AlgorithmConfig, DPOAlignmentConfig, JobStatus, - LoraFinetuningConfig, PostTrainingJob, PostTrainingJobArtifactsResponse, PostTrainingJobStatusResponse, @@ -393,14 +392,14 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper): # Handle LoRA-specific configuration if algorithm_config: - if isinstance(algorithm_config, LoraFinetuningConfig) and algorithm_config.type == "LoRA": + if isinstance(algorithm_config, dict) and algorithm_config.get("type") == "LoRA": warn_unsupported_params(algorithm_config, supported_params["lora_config"], "LoRA config") job_config["hyperparameters"]["lora"] = { k: v for k, v in { - "adapter_dim": getattr(algorithm_config, "adapter_dim", None), - "alpha": getattr(algorithm_config, "alpha", None), - "adapter_dropout": getattr(algorithm_config, "adapter_dropout", None), + "adapter_dim": algorithm_config.get("adapter_dim"), + "alpha": algorithm_config.get("alpha"), + "adapter_dropout": algorithm_config.get("adapter_dropout"), }.items() if v is not None } diff --git a/tests/unit/providers/nvidia/conftest.py b/tests/unit/providers/nvidia/conftest.py index b114853e3..1c4a1d145 100644 --- a/tests/unit/providers/nvidia/conftest.py +++ b/tests/unit/providers/nvidia/conftest.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio import pytest from unittest.mock import AsyncMock, MagicMock, patch @@ -18,3 +19,27 @@ mock_session.__aexit__ = AsyncMock() def patch_aiohttp_session(): with patch("aiohttp.ClientSession", return_value=mock_session): yield + + +@pytest.fixture +def event_loop(): + """Create and provide a new event loop for each test.""" + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + yield loop + loop.close() + + +@pytest.fixture +def run_async(): + """Fixture to run async functions in tests.""" + + def _run_async(coro): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(coro) + finally: + loop.close() + + return _run_async diff --git a/tests/unit/providers/nvidia/mock_llama_stack_client.py b/tests/unit/providers/nvidia/mock_llama_stack_client.py deleted file mode 100644 index 9cc1386c4..000000000 --- a/tests/unit/providers/nvidia/mock_llama_stack_client.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack_client.types.algorithm_config_param import QatFinetuningConfig -from llama_stack_client.types.post_training.job_status_response import JobStatusResponse -from llama_stack_client.types.post_training_job import PostTrainingJob - - -class MockLlamaStackClient: - """Mock client for testing NVIDIA post-training functionality.""" - - def __init__(self, provider="nvidia"): - self.provider = provider - self.post_training = MockPostTraining() - self.inference = MockInference() - self._session = None - - def initialize(self): - """Mock initialization method.""" - return True - - def close(self): - """Close any open resources.""" - pass - - -class MockPostTraining: - """Mock post-training module.""" - - def __init__(self): - self.job = MockPostTrainingJob() - - def supervised_fine_tune( - self, - job_uuid, - model, - checkpoint_dir, - algorithm_config, - training_config, - logger_config, - hyperparam_search_config, - ): - """Mock supervised fine-tuning method.""" - if isinstance(algorithm_config, QatFinetuningConfig): - raise NotImplementedError("QAT fine-tuning is not supported by NVIDIA provider") - - # Return a mock PostTrainingJob - return PostTrainingJob( - job_uuid="cust-JGTaMbJMdqjJU8WbQdN9Q2", - status="created", - created_at="2024-12-09T04:06:28.542884", - updated_at="2024-12-09T04:06:28.542884", - model=model, - dataset_id=training_config.data_config.dataset_id, - output_model="default/job-1234", - ) - - async def supervised_fine_tune_async( - self, - job_uuid, - model, - checkpoint_dir, - algorithm_config, - training_config, - logger_config, - hyperparam_search_config, - ): - """Mock async supervised fine-tuning method.""" - if isinstance(algorithm_config, QatFinetuningConfig): - raise NotImplementedError("QAT fine-tuning is not supported by NVIDIA provider") - - # Return a mock response dictionary - return { - "job_uuid": "cust-JGTaMbJMdqjJU8WbQdN9Q2", - "status": "created", - "created_at": "2024-12-09T04:06:28.542884", - "updated_at": "2024-12-09T04:06:28.542884", - "model": model, - "dataset_id": training_config.data_config.dataset_id, - "output_model": "default/job-1234", - } - - -class MockPostTrainingJob: - """Mock post-training job module.""" - - def status(self, job_uuid): - """Mock job status method.""" - return JobStatusResponse( - status="completed", - steps_completed=1210, - epochs_completed=2, - percentage_done=100.0, - best_epoch=2, - train_loss=1.718016266822815, - val_loss=1.8661999702453613, - ) - - def list(self): - """Mock job list method.""" - return [ - PostTrainingJob( - job_uuid="cust-JGTaMbJMdqjJU8WbQdN9Q2", - status="completed", - created_at="2024-12-09T04:06:28.542884", - updated_at="2024-12-09T04:21:19.852832", - model="meta-llama/Llama-3.1-8B-Instruct", - dataset_id="sample-basic-test", - output_model="default/job-1234", - ) - ] - - def cancel(self, job_uuid): - """Mock job cancel method.""" - return None - - -class MockInference: - """Mock inference module.""" - - async def completion( - self, - content, - stream=False, - model_id=None, - sampling_params=None, - ): - """Mock completion method.""" - return { - "id": "cmpl-123456", - "object": "text_completion", - "created": 1677858242, - "model": model_id, - "choices": [ - { - "text": "The next GTC will take place in the middle of March, 2023.", - "index": 0, - "logprobs": None, - "finish_reason": "stop", - } - ], - "usage": {"prompt_tokens": 100, "completion_tokens": 12, "total_tokens": 112}, - } diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py index 0f01ae1f7..bf579fbfb 100644 --- a/tests/unit/providers/nvidia/test_parameters.py +++ b/tests/unit/providers/nvidia/test_parameters.py @@ -7,11 +7,9 @@ import os import unittest import warnings -from unittest.mock import patch, AsyncMock, MagicMock +from unittest.mock import patch import pytest -import atexit -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient from llama_stack_client.types.algorithm_config_param import LoraFinetuningConfig from llama_stack_client.types.post_training_supervised_fine_tune_params import ( TrainingConfig, @@ -19,30 +17,21 @@ from llama_stack_client.types.post_training_supervised_fine_tune_params import ( TrainingConfigOptimizerConfig, TrainingConfigEfficiencyConfig, ) -from .mock_llama_stack_client import MockLlamaStackClient - -# Create a mock session -mock_session = MagicMock() -mock_session.closed = False -mock_session.close = AsyncMock() -mock_session.__aenter__ = AsyncMock(return_value=mock_session) -mock_session.__aexit__ = AsyncMock() - -patch("aiohttp.ClientSession", return_value=mock_session).start() - -atexit.register(lambda: patch.stopall()) +from llama_stack.providers.remote.post_training.nvidia.post_training import ( + NvidiaPostTrainingAdapter, + NvidiaPostTrainingConfig, +) class TestNvidiaParameters(unittest.TestCase): def setUp(self): os.environ["NVIDIA_BASE_URL"] = "http://nemo.test" os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" - os.environ["LLAMA_STACK_BASE_URL"] = "http://localhost:5002" - # Use the mock client - with patch("llama_stack.distribution.library_client.LlamaStackAsLibraryClient", MockLlamaStackClient): - self.llama_stack_client = LlamaStackAsLibraryClient("nvidia") - _ = self.llama_stack_client.initialize() + config = NvidiaPostTrainingConfig( + base_url=os.environ["NVIDIA_BASE_URL"], customizer_url=os.environ["NVIDIA_CUSTOMIZER_URL"], api_key=None + ) + self.adapter = NvidiaPostTrainingAdapter(config) self.make_request_patcher = patch( "llama_stack.providers.remote.post_training.nvidia.post_training.NvidiaPostTrainingAdapter._make_request" @@ -56,10 +45,6 @@ class TestNvidiaParameters(unittest.TestCase): } def tearDown(self): - # Close the client if it has a close method - if hasattr(self.llama_stack_client, "close"): - self.llama_stack_client.close() - self.make_request_patcher.stop() def _assert_request_params(self, expected_json): @@ -74,13 +59,17 @@ class TestNvidiaParameters(unittest.TestCase): else: assert actual_json[key] == value + @pytest.fixture(autouse=True) + def inject_fixtures(self, run_async): + self.run_async = run_async + def test_customizer_parameters_passed(self): """Test scenario 1: When an optional parameter is passed and value is correctly set.""" custom_adapter_dim = 32 # Different from default of 8 algorithm_config = LoraFinetuningConfig( type="LoRA", - adapter_dim=custom_adapter_dim, # Custom value - adapter_dropout=0.2, # Custom value + adapter_dim=custom_adapter_dim, + adapter_dropout=0.2, apply_lora_to_mlp=True, apply_lora_to_output=True, alpha=16, @@ -98,14 +87,17 @@ class TestNvidiaParameters(unittest.TestCase): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - self.llama_stack_client.post_training.supervised_fine_tune( - job_uuid="test-job", - model="meta-llama/Llama-3.1-8B-Instruct", - checkpoint_dir="", - algorithm_config=algorithm_config, - training_config=training_config, - logger_config={}, - hyperparam_search_config={}, + + self.run_async( + self.adapter.supervised_fine_tune( + job_uuid="test-job", + model="meta-llama/Llama-3.1-8B-Instruct", + checkpoint_dir="", + algorithm_config=algorithm_config, + training_config=training_config, + logger_config={}, + hyperparam_search_config={}, + ) ) warning_texts = [str(warning.message) for warning in w] @@ -159,31 +151,25 @@ class TestNvidiaParameters(unittest.TestCase): optimizer_config=optimizer_config, ) - # catch required unsupported parameters warnings with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - self.llama_stack_client.post_training.supervised_fine_tune( - job_uuid=required_job_uuid, # Required parameter - model=required_model, # Required parameter - checkpoint_dir="", - algorithm_config=algorithm_config, - training_config=training_config, - logger_config={}, - hyperparam_search_config={}, + + self.run_async( + self.adapter.supervised_fine_tune( + job_uuid=required_job_uuid, # Required parameter + model=required_model, # Required parameter + checkpoint_dir="", + algorithm_config=algorithm_config, + training_config=training_config, + logger_config={}, + hyperparam_search_config={}, + ) ) - self.mock_make_request.assert_called_once() - call_args = self.mock_make_request.call_args - - assert call_args[1]["json"]["config"] == "meta/llama-3.1-8b-instruct" - assert call_args[1]["json"]["dataset"]["name"] == required_dataset_id - warning_texts = [str(warning.message) for warning in w] fields = [ "rank", - "use_dora", - "quantize_base", "apply_lora_to_output", "lora_attn_modules", "apply_lora_to_mlp", @@ -191,6 +177,12 @@ class TestNvidiaParameters(unittest.TestCase): for field in fields: assert any(field in text for text in warning_texts) + self.mock_make_request.assert_called_once() + call_args = self.mock_make_request.call_args + + assert call_args[1]["json"]["config"] == "meta/llama-3.1-8b-instruct" + assert call_args[1]["json"]["dataset"]["name"] == required_dataset_id + def test_unsupported_parameters_warning(self): """Test that warnings are raised for unsupported parameters.""" data_config = TrainingConfigDataConfig( @@ -230,24 +222,27 @@ class TestNvidiaParameters(unittest.TestCase): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - self.llama_stack_client.post_training.supervised_fine_tune( - job_uuid="test-job", - model="meta-llama/Llama-3.1-8B-Instruct", - checkpoint_dir="test-dir", # Unsupported parameter - algorithm_config=LoraFinetuningConfig( - type="LoRA", - adapter_dim=16, - adapter_dropout=0.1, - apply_lora_to_mlp=True, - apply_lora_to_output=True, - alpha=16, - rank=16, - lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"], - ), - training_config=training_config, - logger_config={"test": "value"}, # Unsupported parameter - hyperparam_search_config={"test": "value"}, # Unsupported parameter + self.run_async( + self.adapter.supervised_fine_tune( + job_uuid="test-job", + model="meta-llama/Llama-3.1-8B-Instruct", + checkpoint_dir="test-dir", # Unsupported parameter + algorithm_config=LoraFinetuningConfig( + type="LoRA", + adapter_dim=16, + adapter_dropout=0.1, + apply_lora_to_mlp=True, + apply_lora_to_output=True, + alpha=16, + rank=16, + lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"], + ), + training_config=training_config, + logger_config={"test": "value"}, # Unsupported parameter + hyperparam_search_config={"test": "value"}, # Unsupported parameter + ) ) + assert len(w) >= 4 warning_texts = [str(warning.message) for warning in w] @@ -264,8 +259,6 @@ class TestNvidiaParameters(unittest.TestCase): "dtype", # required unsupported parameters "rank", - "use_dora", - "quantize_base", "apply_lora_to_output", "lora_attn_modules", "apply_lora_to_mlp", @@ -274,20 +267,5 @@ class TestNvidiaParameters(unittest.TestCase): assert any(field in text for text in warning_texts) -@pytest.fixture -def llama_stack_client(): - os.environ["NVIDIA_BASE_URL"] = "http://nemo.test" - os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" - os.environ["LLAMA_STACK_BASE_URL"] = "http://localhost:5002" - - with patch("llama_stack.distribution.library_client.LlamaStackAsLibraryClient", MockLlamaStackClient): - client = LlamaStackAsLibraryClient("nvidia") - _ = client.initialize() - yield client - - if hasattr(client, "close"): - client.close() - - if __name__ == "__main__": unittest.main() diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py index 7c0cb93fe..8ef48e05b 100644 --- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py +++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py @@ -6,32 +6,46 @@ import os import unittest -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import patch import warnings import pytest + from llama_stack_client.types.algorithm_config_param import LoraFinetuningConfig, QatFinetuningConfig -from llama_stack_client.types.post_training.job_status_response import JobStatusResponse -from llama_stack_client.types.post_training_job import PostTrainingJob from llama_stack_client.types.post_training_supervised_fine_tune_params import ( TrainingConfig, TrainingConfigDataConfig, TrainingConfigOptimizerConfig, ) -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient -from .mock_llama_stack_client import MockLlamaStackClient +from llama_stack.providers.remote.post_training.nvidia.post_training import ( + NvidiaPostTrainingAdapter, + NvidiaPostTrainingConfig, + NvidiaPostTrainingJobStatusResponse, + ListNvidiaPostTrainingJobs, + NvidiaPostTrainingJob, +) class TestNvidiaPostTraining(unittest.TestCase): - # ToDo: add tests for env variables, models supported. def setUp(self): os.environ["NVIDIA_BASE_URL"] = "http://nemo.test" # needed for llm inference os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" # needed for nemo customizer - os.environ["LLAMA_STACK_BASE_URL"] = "http://localhost:5002" # mocking llama stack base url - with patch("llama_stack.distribution.library_client.LlamaStackAsLibraryClient", MockLlamaStackClient): - self.llama_stack_client = LlamaStackAsLibraryClient("nvidia") - _ = self.llama_stack_client.initialize() + config = NvidiaPostTrainingConfig( + base_url=os.environ["NVIDIA_BASE_URL"], customizer_url=os.environ["NVIDIA_CUSTOMIZER_URL"], api_key=None + ) + self.adapter = NvidiaPostTrainingAdapter(config) + self.make_request_patcher = patch( + "llama_stack.providers.remote.post_training.nvidia.post_training.NvidiaPostTrainingAdapter._make_request" + ) + self.mock_make_request = self.make_request_patcher.start() + + def tearDown(self): + self.make_request_patcher.stop() + + @pytest.fixture(autouse=True) + def inject_fixtures(self, run_async): + self.run_async = run_async def _assert_request(self, mock_call, expected_method, expected_path, expected_params=None, expected_json=None): """Helper method to verify request details in mock calls.""" @@ -51,11 +65,9 @@ class TestNvidiaPostTraining(unittest.TestCase): for key, value in expected_json.items(): assert call_args[1]["json"][key] == value - @patch("llama_stack.providers.remote.post_training.nvidia.post_training.NvidiaPostTrainingAdapter._make_request") - def test_supervised_fine_tune(self, mock_make_request): - """Test the supervised fine-tuning API call. - ToDo: add tests for env variables.""" - mock_make_request.return_value = { + def test_supervised_fine_tune(self): + """Test the supervised fine-tuning API call.""" + self.mock_make_request.return_value = { "id": "cust-JGTaMbJMdqjJU8WbQdN9Q2", "created_at": "2024-12-09T04:06:28.542884", "updated_at": "2024-12-09T04:06:28.542884", @@ -125,40 +137,27 @@ class TestNvidiaPostTraining(unittest.TestCase): optimizer_config=optimizer_config, ) - with warnings.catch_warnings(record=True) as w: + with warnings.catch_warnings(record=True): warnings.simplefilter("always") - training_job = self.llama_stack_client.post_training.supervised_fine_tune( - job_uuid="1234", - model="meta-llama/Llama-3.1-8B-Instruct", - checkpoint_dir="", - algorithm_config=algorithm_config, - training_config=training_config, - logger_config={}, - hyperparam_search_config={}, + training_job = self.run_async( + self.adapter.supervised_fine_tune( + job_uuid="1234", + model="meta-llama/Llama-3.1-8B-Instruct", + checkpoint_dir="", + algorithm_config=algorithm_config, + training_config=training_config, + logger_config={}, + hyperparam_search_config={}, + ) ) - # required lora config unsupported parameters warnings - fields = [ - "apply_lora_to_mlp", - "rank", - "use_dora", - "lora_attn_modules", - "quantize_base", - "apply_lora_to_output", - ] - for field in fields: - assert any(field in str(warning.message) for warning in w) - # check the output is a PostTrainingJob - # Note: Although the type is PostTrainingJob: llama_stack.apis.post_training.PostTrainingJob, - # post llama_stack_client initialization it gets translated to llama_stack_client.types.post_training_job.PostTrainingJob - assert isinstance(training_job, PostTrainingJob) - + assert isinstance(training_job, NvidiaPostTrainingJob) assert training_job.job_uuid == "cust-JGTaMbJMdqjJU8WbQdN9Q2" - mock_make_request.assert_called_once() + self.mock_make_request.assert_called_once() self._assert_request( - mock_make_request, + self.mock_make_request, "POST", "/v1/customization/jobs", expected_json={ @@ -188,19 +187,20 @@ class TestNvidiaPostTraining(unittest.TestCase): ) # This will raise NotImplementedError since QAT is not supported with self.assertRaises(NotImplementedError): - self.llama_stack_client.post_training.supervised_fine_tune( - job_uuid="1234", - model="meta-llama/Llama-3.1-8B-Instruct", - checkpoint_dir="", - algorithm_config=algorithm_config, - training_config=training_config, - logger_config={}, - hyperparam_search_config={}, + self.run_async( + self.adapter.supervised_fine_tune( + job_uuid="1234", + model="meta-llama/Llama-3.1-8B-Instruct", + checkpoint_dir="", + algorithm_config=algorithm_config, + training_config=training_config, + logger_config={}, + hyperparam_search_config={}, + ) ) - @patch("llama_stack.providers.remote.post_training.nvidia.post_training.NvidiaPostTrainingAdapter._make_request") - def test_get_job_status(self, mock_make_request): - mock_make_request.return_value = { + def test_get_training_job_status(self): + self.mock_make_request.return_value = { "created_at": "2024-12-09T04:06:28.580220", "updated_at": "2024-12-09T04:21:19.852832", "status": "completed", @@ -213,10 +213,11 @@ class TestNvidiaPostTraining(unittest.TestCase): } job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2" - status = self.llama_stack_client.post_training.job.status(job_uuid=job_id) - assert isinstance(status, JobStatusResponse) - assert status.status == "completed" + status = self.run_async(self.adapter.get_training_job_status(job_uuid=job_id)) + + assert isinstance(status, NvidiaPostTrainingJobStatusResponse) + assert status.status.value == "completed" assert status.steps_completed == 1210 assert status.epochs_completed == 2 assert status.percentage_done == 100.0 @@ -224,15 +225,14 @@ class TestNvidiaPostTraining(unittest.TestCase): assert status.train_loss == 1.718016266822815 assert status.val_loss == 1.8661999702453613 - mock_make_request.assert_called_once() + self.mock_make_request.assert_called_once() self._assert_request( - mock_make_request, "GET", f"/v1/customization/jobs/{job_id}/status", expected_params={"job_id": job_id} + self.mock_make_request, "GET", f"/v1/customization/jobs/{job_id}/status", expected_params={"job_id": job_id} ) - @patch("llama_stack.providers.remote.post_training.nvidia.post_training.NvidiaPostTrainingAdapter._make_request") - def test_get_job(self, mock_make_request): + def test_get_training_jobs(self): job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2" - mock_make_request.return_value = { + self.mock_make_request.return_value = { "data": [ { "id": job_id, @@ -258,124 +258,38 @@ class TestNvidiaPostTraining(unittest.TestCase): ] } - jobs = self.llama_stack_client.post_training.job.list() - assert isinstance(jobs, list) - assert len(jobs) == 1 - job = jobs[0] - assert job.job_uuid == job_id - assert job.status == "completed" + jobs = self.run_async(self.adapter.get_training_jobs()) - mock_make_request.assert_called_once() + assert isinstance(jobs, ListNvidiaPostTrainingJobs) + assert len(jobs.data) == 1 + job = jobs.data[0] + assert job.job_uuid == job_id + assert job.status.value == "completed" + + self.mock_make_request.assert_called_once() self._assert_request( - mock_make_request, + self.mock_make_request, "GET", "/v1/customization/jobs", expected_params={"page": 1, "page_size": 10, "sort": "created_at"}, ) - @patch("llama_stack.providers.remote.post_training.nvidia.post_training.NvidiaPostTrainingAdapter._make_request") - def test_cancel_job(self, mock_make_request): - mock_make_request.return_value = {} # Empty response for successful cancellation + def test_cancel_training_job(self): + self.mock_make_request.return_value = {} # Empty response for successful cancellation job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2" - result = self.llama_stack_client.post_training.job.cancel(job_uuid=job_id) + result = self.run_async(self.adapter.cancel_training_job(job_uuid=job_id)) + assert result is None - # Verify the correct request was made - mock_make_request.assert_called_once() + self.mock_make_request.assert_called_once() self._assert_request( - mock_make_request, "POST", f"/v1/customization/jobs/{job_id}/cancel", expected_params={"job_id": job_id} + self.mock_make_request, + "POST", + f"/v1/customization/jobs/{job_id}/cancel", + expected_params={"job_id": job_id}, ) - @pytest.mark.asyncio - @patch("llama_stack.providers.remote.post_training.nvidia.post_training.NvidiaPostTrainingAdapter._make_request") - async def test_async_supervised_fine_tune(self, mock_make_request): - mock_make_request.return_value = { - "id": "cust-JGTaMbJMdqjJU8WbQdN9Q2", - "status": "created", - "created_at": "2024-12-09T04:06:28.542884", - "updated_at": "2024-12-09T04:06:28.542884", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "dataset_id": "sample-basic-test", - "output_model": "default/job-1234", - } - - algorithm_config = LoraFinetuningConfig( - alpha=16, - rank=16, - type="LoRA", - adapter_dim=16, - adapter_dropout=0.1, - apply_lora_to_mlp=True, - apply_lora_to_output=True, - ) - - data_config = TrainingConfigDataConfig(dataset_id="sample-basic-test", batch_size=16) - - optimizer_config = TrainingConfigOptimizerConfig( - lr=0.0001, - ) - - training_config = TrainingConfig( - n_epochs=2, - data_config=data_config, - optimizer_config=optimizer_config, - ) - - training_job = await self.llama_stack_client.post_training.supervised_fine_tune_async( - job_uuid="1234", - model="meta-llama/Llama-3.1-8B-Instruct", - checkpoint_dir="", - algorithm_config=algorithm_config, - training_config=training_config, - logger_config={}, - hyperparam_search_config={}, - ) - - assert training_job["job_uuid"] == "cust-JGTaMbJMdqjJU8WbQdN9Q2" - assert training_job["status"] == "created" - - mock_make_request.assert_called_once() - call_args = mock_make_request.call_args - assert call_args[1]["method"] == "POST" - assert call_args[1]["path"] == "/v1/customization/jobs" - - @pytest.mark.asyncio - @patch("aiohttp.ClientSession.post") - async def test_inference_with_fine_tuned_model(self, mock_post): - mock_response = MagicMock() - mock_response.status = 200 - mock_response.json = AsyncMock( - return_value={ - "id": "cmpl-123456", - "object": "text_completion", - "created": 1677858242, - "model": "job-1234", - "choices": [ - { - "text": "The next GTC will take place in the middle of March, 2023.", - "index": 0, - "logprobs": None, - "finish_reason": "stop", - } - ], - "usage": {"prompt_tokens": 100, "completion_tokens": 12, "total_tokens": 112}, - } - ) - mock_post.return_value.__aenter__.return_value = mock_response - - response = await self.llama_stack_client.inference.completion( - content="When is the upcoming GTC event? GTC 2018 attracted over 8,400 attendees. Due to the COVID pandemic of 2020, GTC 2020 was converted to a digital event and drew roughly 59,000 registrants. The 2021 GTC keynote, which was streamed on YouTube on April 12, included a portion that was made with CGI using the Nvidia Omniverse real-time rendering platform. This next GTC will take place in the middle of March, 2023. Answer: ", - stream=False, - model_id="job-1234", - sampling_params={ - "max_tokens": 128, - }, - ) - - assert response["model"] == "job-1234" - assert response["choices"][0]["text"] == "The next GTC will take place in the middle of March, 2023." - if __name__ == "__main__": unittest.main()