From 63825eb493e0be9665c312f3878fee7345a879d8 Mon Sep 17 00:00:00 2001 From: raspawar Date: Wed, 26 Mar 2025 13:46:37 +0530 Subject: [PATCH] move nvidia to remote datasetio --- llama_stack/distribution/distribution.py | 8 +-- llama_stack/providers/registry/datasetio.py | 11 ++++ llama_stack/providers/registry/datasets.py | 30 ----------- .../{datasets => datasetio}/nvidia/README.md | 0 .../nvidia/__init__.py | 12 ++--- .../{datasets => datasetio}/nvidia/config.py | 4 +- .../nvidia/datasetio.py} | 54 ++++--------------- llama_stack/templates/nvidia/build.yaml | 1 - llama_stack/templates/nvidia/nvidia.py | 22 +++++--- llama_stack/templates/nvidia/run.yaml | 1 - 10 files changed, 47 insertions(+), 96 deletions(-) delete mode 100644 llama_stack/providers/registry/datasets.py rename llama_stack/providers/remote/{datasets => datasetio}/nvidia/README.md (100%) rename llama_stack/providers/remote/{datasets => datasetio}/nvidia/__init__.py (54%) rename llama_stack/providers/remote/{datasets => datasetio}/nvidia/config.py (96%) rename llama_stack/providers/remote/{datasets/nvidia/datasets.py => datasetio/nvidia/datasetio.py} (66%) diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py index d6af2a956..ddb727663 100644 --- a/llama_stack/distribution/distribution.py +++ b/llama_stack/distribution/distribution.py @@ -35,10 +35,10 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]: routing_table_api=Api.vector_dbs, router_api=Api.vector_io, ), - # AutoRoutedApiInfo( - # routing_table_api=Api.datasets, - # router_api=Api.datasetio, - # ), + AutoRoutedApiInfo( + routing_table_api=Api.datasets, + router_api=Api.datasetio, + ), AutoRoutedApiInfo( routing_table_api=Api.scoring_functions, router_api=Api.scoring, diff --git a/llama_stack/providers/registry/datasetio.py b/llama_stack/providers/registry/datasetio.py index f83dcbc60..7db136136 100644 --- a/llama_stack/providers/registry/datasetio.py +++ b/llama_stack/providers/registry/datasetio.py @@ -36,4 +36,15 @@ def available_providers() -> List[ProviderSpec]: config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig", ), ), + remote_provider_spec( + api=Api.datasetio, + adapter=AdapterSpec( + adapter_type="nvidia", + pip_packages=[ + "datasets", + ], + module="llama_stack.providers.remote.datasetio.nvidia", + config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig", + ), + ), ] diff --git a/llama_stack/providers/registry/datasets.py b/llama_stack/providers/registry/datasets.py deleted file mode 100644 index 01d448627..000000000 --- a/llama_stack/providers/registry/datasets.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import List - -from llama_stack.providers.datatypes import ( - AdapterSpec, - Api, - ProviderSpec, - remote_provider_spec, -) - - -def available_providers() -> List[ProviderSpec]: - return [ - remote_provider_spec( - api=Api.datasets, - adapter=AdapterSpec( - adapter_type="nvidia", - pip_packages=[ - "datasets", - ], - module="llama_stack.providers.remote.datasets.nvidia", - config_class="llama_stack.providers.remote.datasets.nvidia.NvidiaDatasetConfig", - ), - ), - ] diff --git a/llama_stack/providers/remote/datasets/nvidia/README.md b/llama_stack/providers/remote/datasetio/nvidia/README.md similarity index 100% rename from llama_stack/providers/remote/datasets/nvidia/README.md rename to llama_stack/providers/remote/datasetio/nvidia/README.md diff --git a/llama_stack/providers/remote/datasets/nvidia/__init__.py b/llama_stack/providers/remote/datasetio/nvidia/__init__.py similarity index 54% rename from llama_stack/providers/remote/datasets/nvidia/__init__.py rename to llama_stack/providers/remote/datasetio/nvidia/__init__.py index 649bf6ec6..418daec8d 100644 --- a/llama_stack/providers/remote/datasets/nvidia/__init__.py +++ b/llama_stack/providers/remote/datasetio/nvidia/__init__.py @@ -4,20 +4,20 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .config import NvidiaDatasetConfig +from .config import NvidiaDatasetIOConfig async def get_adapter_impl( - config: NvidiaDatasetConfig, + config: NvidiaDatasetIOConfig, _deps, ): - from .datasets import NvidiaDatasetAdapter + from .datasetio import NvidiaDatasetIOAdapter - if not isinstance(config, NvidiaDatasetConfig): + if not isinstance(config, NvidiaDatasetIOConfig): raise RuntimeError(f"Unexpected config type: {type(config)}") - impl = NvidiaDatasetAdapter(config) + impl = NvidiaDatasetIOAdapter(config) return impl -__all__ = ["get_adapter_impl", "NvidiaDatasetAdapter"] +__all__ = ["get_adapter_impl", "NvidiaDatasetIOAdapter"] diff --git a/llama_stack/providers/remote/datasets/nvidia/config.py b/llama_stack/providers/remote/datasetio/nvidia/config.py similarity index 96% rename from llama_stack/providers/remote/datasets/nvidia/config.py rename to llama_stack/providers/remote/datasetio/nvidia/config.py index 77ddd6e6c..46aa68e5f 100644 --- a/llama_stack/providers/remote/datasets/nvidia/config.py +++ b/llama_stack/providers/remote/datasetio/nvidia/config.py @@ -11,8 +11,8 @@ from typing import Any, Dict, Optional from pydantic import BaseModel, Field -class NvidiaDatasetConfig(BaseModel): - """Configuration for NVIDIA Dataset implementation.""" +class NvidiaDatasetIOConfig(BaseModel): + """Configuration for NVIDIA DatasetIO implementation.""" api_key: Optional[str] = Field( default_factory=lambda: os.getenv("NVIDIA_API_KEY"), diff --git a/llama_stack/providers/remote/datasets/nvidia/datasets.py b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py similarity index 66% rename from llama_stack/providers/remote/datasets/nvidia/datasets.py rename to llama_stack/providers/remote/datasetio/nvidia/datasetio.py index a82fafb53..9a5c8e46b 100644 --- a/llama_stack/providers/remote/datasets/nvidia/datasets.py +++ b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from datetime import datetime from typing import Any, Dict, Literal, Optional import aiohttp @@ -15,15 +14,15 @@ from llama_stack.apis.datasets.datasets import Dataset, ListDatasetsResponse from llama_stack.apis.resource import ResourceType from llama_stack.schema_utils import webmethod -from .config import NvidiaDatasetConfig +from .config import NvidiaDatasetIOConfig -class NvidiaDatasetAdapter: - """Nvidia NeMo Dataset API.""" +class NvidiaDatasetIOAdapter: + """Nvidia NeMo DatasetIO API.""" type: Literal[ResourceType.dataset.value] = ResourceType.dataset.value - def __init__(self, config: NvidiaDatasetConfig): + def __init__(self, config: NvidiaDatasetIOConfig): self.config = config self.headers = {} if config.api_key: @@ -86,48 +85,13 @@ class NvidiaDatasetAdapter: self, dataset_id: str, ) -> Optional[Dataset]: - dataset_id, namespace = dataset_id.split(":") - dataset = await self._make_request( - method="GET", - path=f"/v1/datasets/{namespace}/{dataset_id}", - ) - created_at = datetime.fromisoformat(dataset.pop("created_at")) if "created_at" in dataset else datetime.now() - identifier = dataset.pop("name") - url = URL(uri=dataset.pop("files_url")) - return Dataset( - identifier=identifier, - provider_id="nvidia", # confirm this - url=url, - dataset_schema={}, # ToDo: get schema from the dataset - created_at=created_at, - metadata=dataset, - ) + raise NotImplementedError("Not implemented") @webmethod(route="/datasets", method="GET") async def list_datasets( self, ) -> ListDatasetsResponse: - ## ToDo: add pagination - response = await self._make_request(method="GET", path="/v1/datasets") - datasets = [] - for dataset in response["data"]: - created_at = ( - datetime.fromisoformat(dataset.pop("created_at")) if "created_at" in dataset else datetime.now() - ) - identifier = dataset.pop("name") - url = URL(uri=dataset.pop("files_url")) - datasets.append( - Dataset( - identifier=identifier, - provider_id="nvidia", # confirm this - url=url, - dataset_schema={}, - created_at=created_at, - metadata=dataset, - ) - ) # add remaining fields as metadata - - return ListDatasetsResponse(data=datasets) + raise NotImplementedError("Not implemented") @webmethod(route="/datasets/{dataset_id:path}", method="POST") async def update_dataset( @@ -138,11 +102,13 @@ class NvidiaDatasetAdapter: provider_dataset_id: Optional[str] = None, provider_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, - ) -> None: ... + ) -> None: + raise NotImplementedError("Not implemented") @webmethod(route="/datasets/{dataset_id:path}", method="DELETE") async def unregister_dataset( self, dataset_id: str, namespace: Optional[str] = "default", - ) -> None: ... + ) -> None: + raise NotImplementedError("Not implemented") diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index e289f4874..708d8ba69 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -16,7 +16,6 @@ distribution_spec: - inline::meta-reference datasetio: - inline::localfs - datasets: - remote::nvidia scoring: - inline::basic diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index d3e00c2d5..18bf2518c 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -7,7 +7,7 @@ from pathlib import Path from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput -from llama_stack.providers.remote.datasets.nvidia import NvidiaDatasetConfig +from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES from llama_stack.providers.remote.post_training.nvidia import NvidiaPostTrainingConfig @@ -24,10 +24,15 @@ def get_distribution_template() -> DistributionTemplate: "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], "eval": ["inline::meta-reference"], - "datasetio": ["inline::localfs"], - "datasets": ["remote::nvidia"], - "scoring": ["inline::basic"], - "tool_runtime": ["inline::rag-runtime"], + "datasetio": ["remote::huggingface", "inline::localfs", "remote::nvidia"], + "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::rag-runtime", + "remote::model-context-protocol", + ], } inference_provider = Provider( @@ -42,11 +47,12 @@ def get_distribution_template() -> DistributionTemplate: config=NvidiaPostTrainingConfig.sample_run_config(), ) - datasets_provider = Provider( + datasetio_provider = Provider( provider_id="nvidia", provider_type="remote::nvidia", - config=NvidiaDatasetConfig.sample_run_config(), + config=NvidiaDatasetIOConfig.sample_run_config(), ) + safety_provider = Provider( provider_id="nvidia", provider_type="remote::nvidia", @@ -85,7 +91,7 @@ def get_distribution_template() -> DistributionTemplate: provider_overrides={ "inference": [inference_provider], "post_training": [post_training_provider], - "datasets": [datasets_provider], + "datasetio": [datasetio_provider], }, default_models=default_models, default_tool_groups=default_tool_groups, diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index 9f84e2925..9a7824efe 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -59,7 +59,6 @@ providers: - provider_id: localfs provider_type: inline::localfs config: {} - datasets: - provider_id: nvidia provider_type: remote::nvidia config: {}