Merge branch 'main' into nvidia-e2e-notebook

This commit is contained in:
Jash Gulabrai 2025-05-06 11:12:34 -04:00
commit b1d941e1f0
447 changed files with 6462 additions and 64778 deletions

View file

@ -3,7 +3,7 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any, Dict
from typing import Any
from pydantic import BaseModel
@ -17,7 +17,7 @@ class HuggingfaceDatasetIOConfig(BaseModel):
kvstore: KVStoreConfig
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> Dict[str, Any]:
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return {
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,

View file

@ -3,7 +3,7 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any, Dict, List, Optional
from typing import Any
from urllib.parse import parse_qs, urlparse
import datasets as hf_datasets
@ -70,8 +70,8 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
async def iterrows(
self,
dataset_id: str,
start_index: Optional[int] = None,
limit: Optional[int] = None,
start_index: int | None = None,
limit: int | None = None,
) -> PaginatedResponse:
dataset_def = self.dataset_infos[dataset_id]
path, params = parse_hf_params(dataset_def)
@ -80,7 +80,7 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
records = [loaded_dataset[i] for i in range(len(loaded_dataset))]
return paginate_records(records, start_index, limit)
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
dataset_def = self.dataset_infos[dataset_id]
path, params = parse_hf_params(dataset_def)
loaded_dataset = hf_datasets.load_dataset(path, **params)

View file

@ -6,7 +6,7 @@
import os
import warnings
from typing import Any, Dict, Optional
from typing import Any
from pydantic import BaseModel, Field
@ -14,17 +14,17 @@ from pydantic import BaseModel, Field
class NvidiaDatasetIOConfig(BaseModel):
"""Configuration for NVIDIA DatasetIO implementation."""
api_key: Optional[str] = Field(
api_key: str | None = Field(
default_factory=lambda: os.getenv("NVIDIA_API_KEY"),
description="The NVIDIA API key.",
)
dataset_namespace: Optional[str] = Field(
dataset_namespace: str | None = Field(
default_factory=lambda: os.getenv("NVIDIA_DATASET_NAMESPACE", "default"),
description="The NVIDIA dataset namespace.",
)
project_id: Optional[str] = Field(
project_id: str | None = Field(
default_factory=lambda: os.getenv("NVIDIA_PROJECT_ID", "test-project"),
description="The NVIDIA project ID.",
)
@ -52,7 +52,7 @@ class NvidiaDatasetIOConfig(BaseModel):
)
@classmethod
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return {
"api_key": "${env.NVIDIA_API_KEY:}",
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any, Dict, List, Optional
from typing import Any
import aiohttp
@ -27,11 +27,11 @@ class NvidiaDatasetIOAdapter:
self,
method: str,
path: str,
headers: Optional[Dict[str, Any]] = None,
params: Optional[Dict[str, Any]] = None,
json: Optional[Dict[str, Any]] = None,
headers: dict[str, Any] | None = None,
params: dict[str, Any] | None = None,
json: dict[str, Any] | None = None,
**kwargs,
) -> Dict[str, Any]:
) -> dict[str, Any]:
"""Helper method to make HTTP requests to the Customizer API."""
url = f"{self.config.datasets_url}{path}"
request_headers = self.headers.copy()
@ -86,11 +86,11 @@ class NvidiaDatasetIOAdapter:
async def update_dataset(
self,
dataset_id: str,
dataset_schema: Dict[str, ParamType],
dataset_schema: dict[str, ParamType],
url: URL,
provider_dataset_id: Optional[str] = None,
provider_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
provider_dataset_id: str | None = None,
provider_id: str | None = None,
metadata: dict[str, Any] | None = None,
) -> None:
raise NotImplementedError("Not implemented")
@ -107,10 +107,10 @@ class NvidiaDatasetIOAdapter:
async def iterrows(
self,
dataset_id: str,
start_index: Optional[int] = None,
limit: Optional[int] = None,
start_index: int | None = None,
limit: int | None = None,
) -> PaginatedResponse:
raise NotImplementedError("Not implemented")
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
raise NotImplementedError("Not implemented")