mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-20 19:56:59 +00:00
Merge branch 'main' into nvidia-e2e-notebook
This commit is contained in:
commit
c3d8940c95
8 changed files with 214 additions and 11 deletions
|
@ -439,8 +439,8 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
|
||||||
|
|
||||||
# infer provider from source
|
# infer provider from source
|
||||||
if metadata:
|
if metadata:
|
||||||
if metadata.get("provider"):
|
if metadata.get("provider_id"):
|
||||||
provider_id = metadata.get("provider") # pass through from nvidia datasetio
|
provider_id = metadata.get("provider_id") # pass through from nvidia datasetio
|
||||||
elif source.type == DatasetType.rows.value:
|
elif source.type == DatasetType.rows.value:
|
||||||
provider_id = "localfs"
|
provider_id = "localfs"
|
||||||
elif source.type == DatasetType.uri.value:
|
elif source.type == DatasetType.uri.value:
|
||||||
|
|
|
@ -44,8 +44,7 @@ class NvidiaDatasetIOAdapter:
|
||||||
request_headers.update(headers)
|
request_headers.update(headers)
|
||||||
|
|
||||||
async with aiohttp.ClientSession(headers=request_headers) as session:
|
async with aiohttp.ClientSession(headers=request_headers) as session:
|
||||||
# TODO: Remove `verify_ssl=False`. Added for testing purposes to call NMP int environment from `docs/notebooks/nvidia/`
|
async with session.request(method, url, params=params, json=json, **kwargs) as response:
|
||||||
async with session.request(method, url, params=params, json=json, verify_ssl=False, **kwargs) as response:
|
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
error_data = await response.json()
|
error_data = await response.json()
|
||||||
raise Exception(f"API request failed: {error_data}")
|
raise Exception(f"API request failed: {error_data}")
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput
|
||||||
|
from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig
|
||||||
from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
|
from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
|
||||||
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
||||||
from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
|
from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
|
||||||
|
@ -38,6 +39,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="remote::nvidia",
|
provider_type="remote::nvidia",
|
||||||
config=NVIDIASafetyConfig.sample_run_config(),
|
config=NVIDIASafetyConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
datasetio_provider = Provider(
|
||||||
|
provider_id="nvidia",
|
||||||
|
provider_type="remote::nvidia",
|
||||||
|
config=NvidiaDatasetIOConfig.sample_run_config(),
|
||||||
|
)
|
||||||
eval_provider = Provider(
|
eval_provider = Provider(
|
||||||
provider_id="nvidia",
|
provider_id="nvidia",
|
||||||
provider_type="remote::nvidia",
|
provider_type="remote::nvidia",
|
||||||
|
@ -75,6 +81,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"datasetio": [datasetio_provider],
|
||||||
"eval": [eval_provider],
|
"eval": [eval_provider],
|
||||||
},
|
},
|
||||||
default_models=default_models,
|
default_models=default_models,
|
||||||
|
|
|
@ -62,13 +62,6 @@ providers:
|
||||||
project_id: ${env.NVIDIA_PROJECT_ID:test-project}
|
project_id: ${env.NVIDIA_PROJECT_ID:test-project}
|
||||||
customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}
|
customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: localfs
|
|
||||||
provider_type: inline::localfs
|
|
||||||
config:
|
|
||||||
kvstore:
|
|
||||||
type: sqlite
|
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db
|
|
||||||
- provider_id: nvidia
|
- provider_id: nvidia
|
||||||
provider_type: remote::nvidia
|
provider_type: remote::nvidia
|
||||||
config:
|
config:
|
||||||
|
|
5
tests/integration/providers/nvidia/__init__.py
Normal file
5
tests/integration/providers/nvidia/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
14
tests/integration/providers/nvidia/conftest.py
Normal file
14
tests/integration/providers/nvidia/conftest.py
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
# Skip all tests in this directory when running in GitHub Actions
|
||||||
|
in_github_actions = os.environ.get("GITHUB_ACTIONS") == "true"
|
||||||
|
if in_github_actions:
|
||||||
|
pytest.skip("Skipping NVIDIA tests in GitHub Actions environment", allow_module_level=True)
|
47
tests/integration/providers/nvidia/test_datastore.py
Normal file
47
tests/integration/providers/nvidia/test_datastore.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
# How to run this test:
|
||||||
|
#
|
||||||
|
# LLAMA_STACK_CONFIG="nvidia" pytest -v tests/integration/providers/nvidia/test_datastore.py
|
||||||
|
|
||||||
|
|
||||||
|
# nvidia provider only
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"provider_id",
|
||||||
|
[
|
||||||
|
"nvidia",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_register_and_unregister(llama_stack_client, provider_id):
|
||||||
|
purpose = "eval/messages-answer"
|
||||||
|
source = {
|
||||||
|
"type": "uri",
|
||||||
|
"uri": "hf://datasets/llamastack/simpleqa?split=train",
|
||||||
|
}
|
||||||
|
dataset_id = f"test-dataset-{provider_id}"
|
||||||
|
dataset = llama_stack_client.datasets.register(
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
purpose=purpose,
|
||||||
|
source=source,
|
||||||
|
metadata={"provider_id": provider_id, "format": "json", "description": "Test dataset description"},
|
||||||
|
)
|
||||||
|
assert dataset.identifier is not None
|
||||||
|
assert dataset.provider_id == provider_id
|
||||||
|
assert dataset.identifier == dataset_id
|
||||||
|
|
||||||
|
dataset_list = llama_stack_client.datasets.list()
|
||||||
|
provider_datasets = [d for d in dataset_list if d.provider_id == provider_id]
|
||||||
|
assert any(provider_datasets)
|
||||||
|
assert any(d.identifier == dataset_id for d in provider_datasets)
|
||||||
|
|
||||||
|
llama_stack_client.datasets.unregister(dataset.identifier)
|
||||||
|
dataset_list = llama_stack_client.datasets.list()
|
||||||
|
provider_datasets = [d for d in dataset_list if d.identifier == dataset.identifier]
|
||||||
|
assert not any(provider_datasets)
|
138
tests/unit/providers/nvidia/test_datastore.py
Normal file
138
tests/unit/providers/nvidia/test_datastore.py
Normal file
|
@ -0,0 +1,138 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from llama_stack.apis.datasets import Dataset, DatasetPurpose, URIDataSource
|
||||||
|
from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
|
||||||
|
from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter
|
||||||
|
|
||||||
|
|
||||||
|
class TestNvidiaDatastore(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
os.environ["NVIDIA_DATASETS_URL"] = "http://nemo.test/datasets"
|
||||||
|
|
||||||
|
config = NvidiaDatasetIOConfig(
|
||||||
|
datasets_url=os.environ["NVIDIA_DATASETS_URL"], dataset_namespace="default", project_id="default"
|
||||||
|
)
|
||||||
|
self.adapter = NvidiaDatasetIOAdapter(config)
|
||||||
|
self.make_request_patcher = patch(
|
||||||
|
"llama_stack.providers.remote.datasetio.nvidia.datasetio.NvidiaDatasetIOAdapter._make_request"
|
||||||
|
)
|
||||||
|
self.mock_make_request = self.make_request_patcher.start()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.make_request_patcher.stop()
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def inject_fixtures(self, run_async):
|
||||||
|
self.run_async = run_async
|
||||||
|
|
||||||
|
def _assert_request(self, mock_call, expected_method, expected_path, expected_json=None):
|
||||||
|
"""Helper method to verify request details in mock calls."""
|
||||||
|
call_args = mock_call.call_args
|
||||||
|
|
||||||
|
assert call_args[0][0] == expected_method
|
||||||
|
assert call_args[0][1] == expected_path
|
||||||
|
|
||||||
|
if expected_json:
|
||||||
|
for key, value in expected_json.items():
|
||||||
|
assert call_args[1]["json"][key] == value
|
||||||
|
|
||||||
|
def test_register_dataset(self):
|
||||||
|
self.mock_make_request.return_value = {
|
||||||
|
"id": "dataset-123456",
|
||||||
|
"name": "test-dataset",
|
||||||
|
"namespace": "default",
|
||||||
|
}
|
||||||
|
|
||||||
|
dataset_def = Dataset(
|
||||||
|
identifier="test-dataset",
|
||||||
|
type="dataset",
|
||||||
|
provider_resource_id="",
|
||||||
|
provider_id="",
|
||||||
|
purpose=DatasetPurpose.post_training_messages,
|
||||||
|
source=URIDataSource(uri="https://example.com/data.jsonl"),
|
||||||
|
metadata={"provider_id": "nvidia", "format": "jsonl", "description": "Test dataset description"},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.run_async(self.adapter.register_dataset(dataset_def))
|
||||||
|
|
||||||
|
self.mock_make_request.assert_called_once()
|
||||||
|
self._assert_request(
|
||||||
|
self.mock_make_request,
|
||||||
|
"POST",
|
||||||
|
"/v1/datasets",
|
||||||
|
expected_json={
|
||||||
|
"name": "test-dataset",
|
||||||
|
"namespace": "default",
|
||||||
|
"files_url": "https://example.com/data.jsonl",
|
||||||
|
"project": "default",
|
||||||
|
"format": "jsonl",
|
||||||
|
"description": "Test dataset description",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_unregister_dataset(self):
|
||||||
|
self.mock_make_request.return_value = {
|
||||||
|
"message": "Resource deleted successfully.",
|
||||||
|
"id": "dataset-81RSQp7FKX3rdBtKvF9Skn",
|
||||||
|
"deleted_at": None,
|
||||||
|
}
|
||||||
|
dataset_id = "test-dataset"
|
||||||
|
|
||||||
|
self.run_async(self.adapter.unregister_dataset(dataset_id))
|
||||||
|
|
||||||
|
self.mock_make_request.assert_called_once()
|
||||||
|
self._assert_request(self.mock_make_request, "DELETE", "/v1/datasets/default/test-dataset")
|
||||||
|
|
||||||
|
def test_register_dataset_with_custom_namespace_project(self):
|
||||||
|
custom_config = NvidiaDatasetIOConfig(
|
||||||
|
datasets_url=os.environ["NVIDIA_DATASETS_URL"],
|
||||||
|
dataset_namespace="custom-namespace",
|
||||||
|
project_id="custom-project",
|
||||||
|
)
|
||||||
|
custom_adapter = NvidiaDatasetIOAdapter(custom_config)
|
||||||
|
|
||||||
|
self.mock_make_request.return_value = {
|
||||||
|
"id": "dataset-123456",
|
||||||
|
"name": "test-dataset",
|
||||||
|
"namespace": "custom-namespace",
|
||||||
|
}
|
||||||
|
|
||||||
|
dataset_def = Dataset(
|
||||||
|
identifier="test-dataset",
|
||||||
|
type="dataset",
|
||||||
|
provider_resource_id="",
|
||||||
|
provider_id="",
|
||||||
|
purpose=DatasetPurpose.post_training_messages,
|
||||||
|
source=URIDataSource(uri="https://example.com/data.jsonl"),
|
||||||
|
metadata={"format": "jsonl"},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.run_async(custom_adapter.register_dataset(dataset_def))
|
||||||
|
|
||||||
|
self.mock_make_request.assert_called_once()
|
||||||
|
self._assert_request(
|
||||||
|
self.mock_make_request,
|
||||||
|
"POST",
|
||||||
|
"/v1/datasets",
|
||||||
|
expected_json={
|
||||||
|
"name": "test-dataset",
|
||||||
|
"namespace": "custom-namespace",
|
||||||
|
"files_url": "https://example.com/data.jsonl",
|
||||||
|
"project": "custom-project",
|
||||||
|
"format": "jsonl",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
Loading…
Add table
Add a link
Reference in a new issue