mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
# What does this PR do? This PR contains two sets of notebooks that serve as reference material for developers getting started with Llama Stack using the NVIDIA Provider. Developers should be able to execute these notebooks end-to-end, pointing to their NeMo Microservices deployment. 1. `beginner_e2e/`: Notebook that walks through a beginner end-to-end workflow that covers creating datasets, running inference, customizing and evaluating models, and running safety checks. 2. `tool_calling/`: Notebook that is ported over from the [Data Flywheel & Tool Calling notebook](https://github.com/NVIDIA/GenerativeAIExamples/tree/main/nemo/data-flywheel) that is referenced in the NeMo Microservices docs. I updated the notebook to use the Llama Stack client wherever possible, and added relevant instructions. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - Both notebook folders contain READMEs with pre-requisites. To manually test these notebooks, you'll need to have a deployment of the NeMo Microservices Platform and update the `config.py` file with your deployment's information. - I've run through these notebooks manually end-to-end to verify each step works. [//]: # (## Documentation) --------- Co-authored-by: Jash Gulabrai <jgulabrai@nvidia.com>
116 lines
3.8 KiB
Python
116 lines
3.8 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from typing import Any
|
|
|
|
import aiohttp
|
|
|
|
from llama_stack.apis.common.content_types import URL
|
|
from llama_stack.apis.common.responses import PaginatedResponse
|
|
from llama_stack.apis.common.type_system import ParamType
|
|
from llama_stack.apis.datasets import Dataset
|
|
|
|
from .config import NvidiaDatasetIOConfig
|
|
|
|
|
|
class NvidiaDatasetIOAdapter:
|
|
"""Nvidia NeMo DatasetIO API."""
|
|
|
|
def __init__(self, config: NvidiaDatasetIOConfig):
|
|
self.config = config
|
|
self.headers = {}
|
|
|
|
async def _make_request(
|
|
self,
|
|
method: str,
|
|
path: str,
|
|
headers: dict[str, Any] | None = None,
|
|
params: dict[str, Any] | None = None,
|
|
json: dict[str, Any] | None = None,
|
|
**kwargs,
|
|
) -> dict[str, Any]:
|
|
"""Helper method to make HTTP requests to the Customizer API."""
|
|
url = f"{self.config.datasets_url}{path}"
|
|
request_headers = self.headers.copy()
|
|
|
|
# Set default Content-Type for JSON requests
|
|
if json is not None:
|
|
request_headers["Content-Type"] = "application/json"
|
|
|
|
if headers:
|
|
request_headers.update(headers)
|
|
|
|
async with aiohttp.ClientSession(headers=request_headers) as session:
|
|
async with session.request(method, url, params=params, json=json, **kwargs) as response:
|
|
if response.status != 200:
|
|
error_data = await response.json()
|
|
raise Exception(f"API request failed: {error_data}")
|
|
return await response.json()
|
|
|
|
async def register_dataset(
|
|
self,
|
|
dataset_def: Dataset,
|
|
) -> Dataset:
|
|
"""Register a new dataset.
|
|
|
|
Args:
|
|
dataset_def [Dataset]: The dataset definition.
|
|
dataset_id [str]: The ID of the dataset.
|
|
source [DataSource]: The source of the dataset.
|
|
metadata [Dict[str, Any]]: The metadata of the dataset.
|
|
format [str]: The format of the dataset.
|
|
description [str]: The description of the dataset.
|
|
Returns:
|
|
Dataset
|
|
"""
|
|
## add warnings for unsupported params
|
|
request_body = {
|
|
"name": dataset_def.identifier,
|
|
"namespace": self.config.dataset_namespace,
|
|
"files_url": dataset_def.source.uri,
|
|
"project": self.config.project_id,
|
|
}
|
|
if dataset_def.metadata:
|
|
request_body["format"] = dataset_def.metadata.get("format")
|
|
request_body["description"] = dataset_def.metadata.get("description")
|
|
await self._make_request(
|
|
"POST",
|
|
"/v1/datasets",
|
|
json=request_body,
|
|
)
|
|
return dataset_def
|
|
|
|
async def update_dataset(
|
|
self,
|
|
dataset_id: str,
|
|
dataset_schema: dict[str, ParamType],
|
|
url: URL,
|
|
provider_dataset_id: str | None = None,
|
|
provider_id: str | None = None,
|
|
metadata: dict[str, Any] | None = None,
|
|
) -> None:
|
|
raise NotImplementedError("Not implemented")
|
|
|
|
async def unregister_dataset(
|
|
self,
|
|
dataset_id: str,
|
|
) -> None:
|
|
await self._make_request(
|
|
"DELETE",
|
|
f"/v1/datasets/{self.config.dataset_namespace}/{dataset_id}",
|
|
headers={"Accept": "application/json", "Content-Type": "application/json"},
|
|
)
|
|
|
|
async def iterrows(
|
|
self,
|
|
dataset_id: str,
|
|
start_index: int | None = None,
|
|
limit: int | None = None,
|
|
) -> PaginatedResponse:
|
|
raise NotImplementedError("Not implemented")
|
|
|
|
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
|
|
raise NotImplementedError("Not implemented")
|