From f5ebad130ce9e7341db4853bc1d65453e7341305 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 8 Mar 2025 12:07:33 +0000 Subject: [PATCH] add documentation --- .../remote_hosted_distro/nvidia.md | 8 + .../remote/post_training/nvidia/README.md | 138 ++++++++++++++++++ .../remote/post_training/nvidia/config.py | 2 +- 3 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 llama_stack/providers/remote/post_training/nvidia/README.md diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index 774d5ec1b..03a152a3b 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -9,6 +9,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov | datasetio | `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::nvidia` | +| post_training | `remote::nvidia` | | safety | `remote::nvidia` | | scoring | `inline::basic` | | telemetry | `inline::meta-reference` | @@ -21,6 +22,13 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov The following environment variables can be configured: - `NVIDIA_API_KEY`: NVIDIA API Key (default: ``) +- `NVIDIA_CUSTOMIZER_URL`: NVIDIA Customizer URL (default: `http://nemo.test`) +- `NVIDIA_USER_ID`: NVIDIA user ID (default: `llama-stack-user`) +- `NVIDIA_DATASET_NAMESPACE`: NVIDIA dataset namespace (default: `default`) +- `NVIDIA_ACCESS_POLICIES`: NVIDIA access policies (default: `{}`) +- `NVIDIA_PROJECT_ID`: NVIDIA project ID (default: `test-project`) +- `NVIDIA_OUTPUT_MODEL_DIR`: Directory to save the output model (default: `test-example-model@v1`) + - `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`) - `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`) - `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`) diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/llama_stack/providers/remote/post_training/nvidia/README.md new file mode 100644 index 000000000..230587d66 --- /dev/null +++ b/llama_stack/providers/remote/post_training/nvidia/README.md @@ -0,0 +1,138 @@ +# NVIDIA Post-Training Provider for LlamaStack + +This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service. + +## Features + +- Supervised fine-tuning of Llama models +- LoRA fine-tuning support +- Job management and status tracking + +## Getting Started + +### Prerequisites + +- LlamaStack with NVIDIA configuration +- Access to Hosted NVIDIA NeMo Customizer service +- Dataset registered in the Hosted NVIDIA NeMo Customizer service +- Base model downloaded and available in the Hosted NVIDIA NeMo Customizer service + +### Setup + +Build the NVIDIA environment: + +```bash +llama stack build --template nvidia --image-type conda +``` + +### Basic Usage using the LlamaStack Python Client + +### Create Customization Job + +#### Initialize the client + +```python +import os + +os.environ["NVIDIA_API_KEY"] = "your-api-key" +os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" +os.environ["NVIDIA_USER_ID"] = "llama-stack-user" +os.environ["NVIDIA_DATASET_NAMESPACE"] = "default" +os.environ["NVIDIA_PROJECT_ID"] = "test-project" +os.environ["NVIDIA_OUTPUT_MODEL_DIR"] = "test-example-model@v1" + +from llama_stack.distribution.library_client import LlamaStackAsLibraryClient + +client = LlamaStackAsLibraryClient("nvidia") +client.initialize() +``` + +#### Configure fine-tuning parameters + +```python +from llama_stack_client.types.post_training_supervised_fine_tune_params import ( + TrainingConfig, + TrainingConfigDataConfig, + TrainingConfigOptimizerConfig, +) +from llama_stack_client.types.algorithm_config_param import LoraFinetuningConfig +``` + +#### Set up LoRA configuration + +```python +algorithm_config = LoraFinetuningConfig(type="LoRA", adapter_dim=16) +``` + +#### Configure training data + +```python +data_config = TrainingConfigDataConfig( + dataset_id="your-dataset-id", # Use client.datasets.list() to see available datasets + batch_size=16, +) +``` + +#### Configure optimizer + +```python +optimizer_config = TrainingConfigOptimizerConfig( + lr=0.0001, +) +``` + +#### Set up training configuration + +```python +training_config = TrainingConfig( + n_epochs=2, + data_config=data_config, + optimizer_config=optimizer_config, +) +``` + +#### Start fine-tuning job + +```python +training_job = client.post_training.supervised_fine_tune( + job_uuid="unique-job-id", + model="meta-llama/Llama-3.1-8B-Instruct", + checkpoint_dir="", + algorithm_config=algorithm_config, + training_config=training_config, + logger_config={}, + hyperparam_search_config={}, +) +``` + +### List all jobs + +```python +jobs = client.post_training.job.list() +``` + +### Check job status + +```python +job_status = client.post_training.job.status(job_uuid="your-job-id") +``` + +### Cancel a job + +```python +client.post_training.job.cancel(job_uuid="your-job-id") +``` + +### Inference with the fine-tuned model + +```python +response = client.inference.completion( + content="Complete the sentence using one word: Roses are red, violets are ", + stream=False, + model_id="test-example-model@v1", + sampling_params={ + "max_tokens": 50, + }, +) +print(response.content) +``` diff --git a/llama_stack/providers/remote/post_training/nvidia/config.py b/llama_stack/providers/remote/post_training/nvidia/config.py index 4e7341001..1acd9eddf 100644 --- a/llama_stack/providers/remote/post_training/nvidia/config.py +++ b/llama_stack/providers/remote/post_training/nvidia/config.py @@ -40,7 +40,7 @@ class NvidiaPostTrainingConfig(BaseModel): # ToDO: validate this, add default value customizer_url: str = Field( - default_factory=lambda: os.getenv("NVIDIA_CUSTOMIZER_URL"), + default_factory=lambda: os.getenv("NVIDIA_CUSTOMIZER_URL", "http://nemo.test"), description="Base URL for the NeMo Customizer API", )