chore(test): migrate unit tests from unittest to pytest nvidia test f… (#2794)
Some checks failed
Integration Tests / discover-tests (push) Successful in 3s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 11s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 10s
Test Llama Stack Build / generate-matrix (push) Successful in 10s
Python Package Build Test / build (3.13) (push) Failing after 11s
Test Llama Stack Build / build-single-provider (push) Failing after 14s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 18s
Test External Providers / test-external-providers (venv) (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 18s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 19s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 21s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 23s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 16s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 20s
Integration Tests / test-matrix (push) Failing after 13s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 16s
Unit Tests / unit-tests (3.13) (push) Failing after 17s
Test Llama Stack Build / build (push) Failing after 6s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 20s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 20s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 20s
Unit Tests / unit-tests (3.12) (push) Failing after 29s
Python Package Build Test / build (3.12) (push) Failing after 1m46s
Update ReadTheDocs / update-readthedocs (push) Failing after 1m44s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 1m51s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 1m53s
Pre-commit / pre-commit (push) Successful in 3m17s

This PR replaces unittest with pytest.

Part of https://github.com/meta-llama/llama-stack/issues/2680

cc @leseb

Signed-off-by: Mustafa Elbehery <melbeher@redhat.com>
This commit is contained in:
Mustafa Elbehery 2025-07-18 12:32:19 +02:00 committed by GitHub
parent b78b8e1486
commit fe6af7dc8b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -5,13 +5,11 @@
# the root directory of this source tree. # the root directory of this source tree.
import os import os
import unittest
import warnings import warnings
from unittest.mock import AsyncMock, patch from unittest.mock import patch
import pytest import pytest
from llama_stack.apis.models import Model, ModelType
from llama_stack.apis.post_training.post_training import ( from llama_stack.apis.post_training.post_training import (
DataConfig, DataConfig,
DatasetFormat, DatasetFormat,
@ -22,7 +20,6 @@ from llama_stack.apis.post_training.post_training import (
TrainingConfig, TrainingConfig,
) )
from llama_stack.distribution.library_client import convert_pydantic_to_json_value from llama_stack.distribution.library_client import convert_pydantic_to_json_value
from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAConfig, NVIDIAInferenceAdapter
from llama_stack.providers.remote.post_training.nvidia.post_training import ( from llama_stack.providers.remote.post_training.nvidia.post_training import (
ListNvidiaPostTrainingJobs, ListNvidiaPostTrainingJobs,
NvidiaPostTrainingAdapter, NvidiaPostTrainingAdapter,
@ -32,43 +29,19 @@ from llama_stack.providers.remote.post_training.nvidia.post_training import (
) )
class TestNvidiaPostTraining(unittest.TestCase): @pytest.fixture
def setUp(self): def nvidia_post_training_adapter():
os.environ["NVIDIA_BASE_URL"] = "http://nemo.test" # needed for llm inference """Fixture to create and configure the NVIDIA post training adapter."""
os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" # needed for nemo customizer os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" # needed for nemo customizer
config = NvidiaPostTrainingConfig( config = NvidiaPostTrainingConfig(customizer_url=os.environ["NVIDIA_CUSTOMIZER_URL"], api_key=None)
base_url=os.environ["NVIDIA_BASE_URL"], customizer_url=os.environ["NVIDIA_CUSTOMIZER_URL"], api_key=None adapter = NvidiaPostTrainingAdapter(config)
)
self.adapter = NvidiaPostTrainingAdapter(config)
self.make_request_patcher = patch(
"llama_stack.providers.remote.post_training.nvidia.post_training.NvidiaPostTrainingAdapter._make_request"
)
self.mock_make_request = self.make_request_patcher.start()
# Mock the inference client with patch.object(adapter, "_make_request") as mock_make_request:
inference_config = NVIDIAConfig(base_url=os.environ["NVIDIA_BASE_URL"], api_key=None) yield adapter, mock_make_request
self.inference_adapter = NVIDIAInferenceAdapter(inference_config)
self.mock_client = unittest.mock.MagicMock()
self.mock_client.chat.completions.create = unittest.mock.AsyncMock()
self.inference_mock_make_request = self.mock_client.chat.completions.create
self.inference_make_request_patcher = patch(
"llama_stack.providers.remote.inference.nvidia.nvidia.NVIDIAInferenceAdapter._client",
new_callable=unittest.mock.PropertyMock,
return_value=self.mock_client,
)
self.inference_make_request_patcher.start()
def tearDown(self): def _assert_request(mock_call, expected_method, expected_path, expected_params=None, expected_json=None):
self.make_request_patcher.stop()
self.inference_make_request_patcher.stop()
@pytest.fixture(autouse=True)
def inject_fixtures(self, run_async):
self.run_async = run_async
def _assert_request(self, mock_call, expected_method, expected_path, expected_params=None, expected_json=None):
"""Helper method to verify request details in mock calls.""" """Helper method to verify request details in mock calls."""
call_args = mock_call.call_args call_args = mock_call.call_args
@ -86,9 +59,11 @@ class TestNvidiaPostTraining(unittest.TestCase):
for key, value in expected_json.items(): for key, value in expected_json.items():
assert call_args[1]["json"][key] == value assert call_args[1]["json"][key] == value
def test_supervised_fine_tune(self):
async def test_supervised_fine_tune(nvidia_post_training_adapter):
"""Test the supervised fine-tuning API call.""" """Test the supervised fine-tuning API call."""
self.mock_make_request.return_value = { adapter, mock_make_request = nvidia_post_training_adapter
mock_make_request.return_value = {
"id": "cust-JGTaMbJMdqjJU8WbQdN9Q2", "id": "cust-JGTaMbJMdqjJU8WbQdN9Q2",
"created_at": "2024-12-09T04:06:28.542884", "created_at": "2024-12-09T04:06:28.542884",
"updated_at": "2024-12-09T04:06:28.542884", "updated_at": "2024-12-09T04:06:28.542884",
@ -163,8 +138,7 @@ class TestNvidiaPostTraining(unittest.TestCase):
with warnings.catch_warnings(record=True): with warnings.catch_warnings(record=True):
warnings.simplefilter("always") warnings.simplefilter("always")
training_job = self.run_async( training_job = await adapter.supervised_fine_tune(
self.adapter.supervised_fine_tune(
job_uuid="1234", job_uuid="1234",
model="meta/llama-3.2-1b-instruct@v1.0.0+L40", model="meta/llama-3.2-1b-instruct@v1.0.0+L40",
checkpoint_dir="", checkpoint_dir="",
@ -173,15 +147,14 @@ class TestNvidiaPostTraining(unittest.TestCase):
logger_config={}, logger_config={},
hyperparam_search_config={}, hyperparam_search_config={},
) )
)
# check the output is a PostTrainingJob # check the output is a PostTrainingJob
assert isinstance(training_job, NvidiaPostTrainingJob) assert isinstance(training_job, NvidiaPostTrainingJob)
assert training_job.job_uuid == "cust-JGTaMbJMdqjJU8WbQdN9Q2" assert training_job.job_uuid == "cust-JGTaMbJMdqjJU8WbQdN9Q2"
self.mock_make_request.assert_called_once() mock_make_request.assert_called_once()
self._assert_request( _assert_request(
self.mock_make_request, mock_make_request,
"POST", "POST",
"/v1/customization/jobs", "/v1/customization/jobs",
expected_json={ expected_json={
@ -199,7 +172,11 @@ class TestNvidiaPostTraining(unittest.TestCase):
}, },
) )
def test_supervised_fine_tune_with_qat(self):
async def test_supervised_fine_tune_with_qat(nvidia_post_training_adapter):
"""Test that QAT configuration raises NotImplementedError."""
adapter, mock_make_request = nvidia_post_training_adapter
algorithm_config = QATFinetuningConfig(type="QAT", quantizer_name="quantizer_name", group_size=1) algorithm_config = QATFinetuningConfig(type="QAT", quantizer_name="quantizer_name", group_size=1)
data_config = DataConfig( data_config = DataConfig(
dataset_id="sample-basic-test", batch_size=16, shuffle=False, data_format=DatasetFormat.instruct dataset_id="sample-basic-test", batch_size=16, shuffle=False, data_format=DatasetFormat.instruct
@ -215,10 +192,10 @@ class TestNvidiaPostTraining(unittest.TestCase):
data_config=data_config, data_config=data_config,
optimizer_config=optimizer_config, optimizer_config=optimizer_config,
) )
# This will raise NotImplementedError since QAT is not supported # This will raise NotImplementedError since QAT is not supported
with self.assertRaises(NotImplementedError): with pytest.raises(NotImplementedError):
self.run_async( await adapter.supervised_fine_tune(
self.adapter.supervised_fine_tune(
job_uuid="1234", job_uuid="1234",
model="meta/llama-3.2-1b-instruct@v1.0.0+L40", model="meta/llama-3.2-1b-instruct@v1.0.0+L40",
checkpoint_dir="", checkpoint_dir="",
@ -227,9 +204,12 @@ class TestNvidiaPostTraining(unittest.TestCase):
logger_config={}, logger_config={},
hyperparam_search_config={}, hyperparam_search_config={},
) )
)
def test_get_training_job_status(self):
async def test_get_training_job_status(nvidia_post_training_adapter):
"""Test getting training job status with different statuses."""
adapter, mock_make_request = nvidia_post_training_adapter
customizer_status_to_job_status = [ customizer_status_to_job_status = [
("running", "in_progress"), ("running", "in_progress"),
("completed", "completed"), ("completed", "completed"),
@ -240,8 +220,7 @@ class TestNvidiaPostTraining(unittest.TestCase):
] ]
for customizer_status, expected_status in customizer_status_to_job_status: for customizer_status, expected_status in customizer_status_to_job_status:
with self.subTest(customizer_status=customizer_status, expected_status=expected_status): mock_make_request.return_value = {
self.mock_make_request.return_value = {
"created_at": "2024-12-09T04:06:28.580220", "created_at": "2024-12-09T04:06:28.580220",
"updated_at": "2024-12-09T04:21:19.852832", "updated_at": "2024-12-09T04:21:19.852832",
"status": customizer_status, "status": customizer_status,
@ -255,27 +234,35 @@ class TestNvidiaPostTraining(unittest.TestCase):
job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2" job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2"
status = self.run_async(self.adapter.get_training_job_status(job_uuid=job_id)) status = await adapter.get_training_job_status(job_uuid=job_id)
assert isinstance(status, NvidiaPostTrainingJobStatusResponse) assert isinstance(status, NvidiaPostTrainingJobStatusResponse)
assert status.status.value == expected_status assert status.status.value == expected_status
assert status.steps_completed == 1210 # Note: The response object inherits extra fields via ConfigDict(extra="allow")
assert status.epochs_completed == 2 # So these attributes should be accessible using getattr with defaults
assert status.percentage_done == 100.0 assert getattr(status, "steps_completed", None) == 1210
assert status.best_epoch == 2 assert getattr(status, "epochs_completed", None) == 2
assert status.train_loss == 1.718016266822815 assert getattr(status, "percentage_done", None) == 100.0
assert status.val_loss == 1.8661999702453613 assert getattr(status, "best_epoch", None) == 2
assert getattr(status, "train_loss", None) == 1.718016266822815
assert getattr(status, "val_loss", None) == 1.8661999702453613
self._assert_request( _assert_request(
self.mock_make_request, mock_make_request,
"GET", "GET",
f"/v1/customization/jobs/{job_id}/status", f"/v1/customization/jobs/{job_id}/status",
expected_params={"job_id": job_id}, expected_params={"job_id": job_id},
) )
def test_get_training_jobs(self): mock_make_request.reset_mock()
async def test_get_training_jobs(nvidia_post_training_adapter):
"""Test getting list of training jobs."""
adapter, mock_make_request = nvidia_post_training_adapter
job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2" job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2"
self.mock_make_request.return_value = { mock_make_request.return_value = {
"data": [ "data": [
{ {
"id": job_id, "id": job_id,
@ -301,7 +288,7 @@ class TestNvidiaPostTraining(unittest.TestCase):
] ]
} }
jobs = self.run_async(self.adapter.get_training_jobs()) jobs = await adapter.get_training_jobs()
assert isinstance(jobs, ListNvidiaPostTrainingJobs) assert isinstance(jobs, ListNvidiaPostTrainingJobs)
assert len(jobs.data) == 1 assert len(jobs.data) == 1
@ -309,59 +296,30 @@ class TestNvidiaPostTraining(unittest.TestCase):
assert job.job_uuid == job_id assert job.job_uuid == job_id
assert job.status.value == "completed" assert job.status.value == "completed"
self.mock_make_request.assert_called_once() mock_make_request.assert_called_once()
self._assert_request( _assert_request(
self.mock_make_request, mock_make_request,
"GET", "GET",
"/v1/customization/jobs", "/v1/customization/jobs",
expected_params={"page": 1, "page_size": 10, "sort": "created_at"}, expected_params={"page": 1, "page_size": 10, "sort": "created_at"},
) )
def test_cancel_training_job(self):
self.mock_make_request.return_value = {} # Empty response for successful cancellation async def test_cancel_training_job(nvidia_post_training_adapter):
"""Test canceling a training job."""
adapter, mock_make_request = nvidia_post_training_adapter
mock_make_request.return_value = {} # Empty response for successful cancellation
job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2" job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2"
result = self.run_async(self.adapter.cancel_training_job(job_uuid=job_id)) result = await adapter.cancel_training_job(job_uuid=job_id)
assert result is None assert result is None
self.mock_make_request.assert_called_once() mock_make_request.assert_called_once()
self._assert_request( _assert_request(
self.mock_make_request, mock_make_request,
"POST", "POST",
f"/v1/customization/jobs/{job_id}/cancel", f"/v1/customization/jobs/{job_id}/cancel",
expected_params={"job_id": job_id}, expected_params={"job_id": job_id},
) )
def test_inference_register_model(self):
model_id = "default/job-1234"
model_type = ModelType.llm
model = Model(
identifier=model_id,
provider_id="nvidia",
provider_model_id=model_id,
provider_resource_id=model_id,
model_type=model_type,
)
# simulate a NIM where default/job-1234 is an available model
with patch.object(self.inference_adapter, "check_model_availability", new_callable=AsyncMock) as mock_check:
mock_check.return_value = True
result = self.run_async(self.inference_adapter.register_model(model))
assert result == model
assert len(self.inference_adapter.alias_to_provider_id_map) > 1
assert self.inference_adapter.get_provider_model_id(model.provider_model_id) == model_id
with patch.object(self.inference_adapter, "chat_completion") as mock_chat_completion:
self.run_async(
self.inference_adapter.chat_completion(
model_id=model_id,
messages=[{"role": "user", "content": "Hello, model"}],
)
)
mock_chat_completion.assert_called()
if __name__ == "__main__":
unittest.main()