mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
# What does this PR do? Implemetation of NeMO Datastore register, unregister API. Open Issues: - provider_id gets set to `localfs` in client.datasets.register() as it is specified in routing_tables.py: DatasetsRoutingTable see: #1860 Currently I have passed `"provider_id":"nvidia"` in metadata and have parsed that in `DatasetsRoutingTable` (Not the best approach, but just a quick workaround to make it work for now.) ## Test Plan - Unit test cases: `pytest tests/unit/providers/nvidia/test_datastore.py` ```bash ========================================================== test session starts =========================================================== platform linux -- Python 3.10.0, pytest-8.3.5, pluggy-1.5.0 rootdir: /home/ubuntu/llama-stack configfile: pyproject.toml plugins: anyio-4.9.0, asyncio-0.26.0, nbval-0.11.0, metadata-3.1.1, html-4.1.1, cov-6.1.0 asyncio: mode=strict, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function collected 2 items tests/unit/providers/nvidia/test_datastore.py .. [100%] ============================================================ warnings summary ============================================================ ====================================================== 2 passed, 1 warning in 0.84s ====================================================== ``` cc: @dglogo, @mattf, @yanxi0830
138 lines
4.7 KiB
Python
138 lines
4.7 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import os
|
|
import unittest
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
from llama_stack.apis.datasets import Dataset, DatasetPurpose, URIDataSource
|
|
from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
|
|
from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter
|
|
|
|
|
|
class TestNvidiaDatastore(unittest.TestCase):
|
|
def setUp(self):
|
|
os.environ["NVIDIA_DATASETS_URL"] = "http://nemo.test/datasets"
|
|
|
|
config = NvidiaDatasetIOConfig(
|
|
datasets_url=os.environ["NVIDIA_DATASETS_URL"], dataset_namespace="default", project_id="default"
|
|
)
|
|
self.adapter = NvidiaDatasetIOAdapter(config)
|
|
self.make_request_patcher = patch(
|
|
"llama_stack.providers.remote.datasetio.nvidia.datasetio.NvidiaDatasetIOAdapter._make_request"
|
|
)
|
|
self.mock_make_request = self.make_request_patcher.start()
|
|
|
|
def tearDown(self):
|
|
self.make_request_patcher.stop()
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def inject_fixtures(self, run_async):
|
|
self.run_async = run_async
|
|
|
|
def _assert_request(self, mock_call, expected_method, expected_path, expected_json=None):
|
|
"""Helper method to verify request details in mock calls."""
|
|
call_args = mock_call.call_args
|
|
|
|
assert call_args[0][0] == expected_method
|
|
assert call_args[0][1] == expected_path
|
|
|
|
if expected_json:
|
|
for key, value in expected_json.items():
|
|
assert call_args[1]["json"][key] == value
|
|
|
|
def test_register_dataset(self):
|
|
self.mock_make_request.return_value = {
|
|
"id": "dataset-123456",
|
|
"name": "test-dataset",
|
|
"namespace": "default",
|
|
}
|
|
|
|
dataset_def = Dataset(
|
|
identifier="test-dataset",
|
|
type="dataset",
|
|
provider_resource_id="",
|
|
provider_id="",
|
|
purpose=DatasetPurpose.post_training_messages,
|
|
source=URIDataSource(uri="https://example.com/data.jsonl"),
|
|
metadata={"provider_id": "nvidia", "format": "jsonl", "description": "Test dataset description"},
|
|
)
|
|
|
|
self.run_async(self.adapter.register_dataset(dataset_def))
|
|
|
|
self.mock_make_request.assert_called_once()
|
|
self._assert_request(
|
|
self.mock_make_request,
|
|
"POST",
|
|
"/v1/datasets",
|
|
expected_json={
|
|
"name": "test-dataset",
|
|
"namespace": "default",
|
|
"files_url": "https://example.com/data.jsonl",
|
|
"project": "default",
|
|
"format": "jsonl",
|
|
"description": "Test dataset description",
|
|
},
|
|
)
|
|
|
|
def test_unregister_dataset(self):
|
|
self.mock_make_request.return_value = {
|
|
"message": "Resource deleted successfully.",
|
|
"id": "dataset-81RSQp7FKX3rdBtKvF9Skn",
|
|
"deleted_at": None,
|
|
}
|
|
dataset_id = "test-dataset"
|
|
|
|
self.run_async(self.adapter.unregister_dataset(dataset_id))
|
|
|
|
self.mock_make_request.assert_called_once()
|
|
self._assert_request(self.mock_make_request, "DELETE", "/v1/datasets/default/test-dataset")
|
|
|
|
def test_register_dataset_with_custom_namespace_project(self):
|
|
custom_config = NvidiaDatasetIOConfig(
|
|
datasets_url=os.environ["NVIDIA_DATASETS_URL"],
|
|
dataset_namespace="custom-namespace",
|
|
project_id="custom-project",
|
|
)
|
|
custom_adapter = NvidiaDatasetIOAdapter(custom_config)
|
|
|
|
self.mock_make_request.return_value = {
|
|
"id": "dataset-123456",
|
|
"name": "test-dataset",
|
|
"namespace": "custom-namespace",
|
|
}
|
|
|
|
dataset_def = Dataset(
|
|
identifier="test-dataset",
|
|
type="dataset",
|
|
provider_resource_id="",
|
|
provider_id="",
|
|
purpose=DatasetPurpose.post_training_messages,
|
|
source=URIDataSource(uri="https://example.com/data.jsonl"),
|
|
metadata={"format": "jsonl"},
|
|
)
|
|
|
|
self.run_async(custom_adapter.register_dataset(dataset_def))
|
|
|
|
self.mock_make_request.assert_called_once()
|
|
self._assert_request(
|
|
self.mock_make_request,
|
|
"POST",
|
|
"/v1/datasets",
|
|
expected_json={
|
|
"name": "test-dataset",
|
|
"namespace": "custom-namespace",
|
|
"files_url": "https://example.com/data.jsonl",
|
|
"project": "custom-project",
|
|
"format": "jsonl",
|
|
},
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|