mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-06 18:50:44 +00:00
add documentation
This commit is contained in:
parent
409383ae5f
commit
f5ebad130c
3 changed files with 147 additions and 1 deletions
|
@ -9,6 +9,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
|
||||||
| datasetio | `inline::localfs` |
|
| datasetio | `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::nvidia` |
|
| inference | `remote::nvidia` |
|
||||||
|
| post_training | `remote::nvidia` |
|
||||||
| safety | `remote::nvidia` |
|
| safety | `remote::nvidia` |
|
||||||
| scoring | `inline::basic` |
|
| scoring | `inline::basic` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
|
@ -21,6 +22,13 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
|
||||||
The following environment variables can be configured:
|
The following environment variables can be configured:
|
||||||
|
|
||||||
- `NVIDIA_API_KEY`: NVIDIA API Key (default: ``)
|
- `NVIDIA_API_KEY`: NVIDIA API Key (default: ``)
|
||||||
|
- `NVIDIA_CUSTOMIZER_URL`: NVIDIA Customizer URL (default: `http://nemo.test`)
|
||||||
|
- `NVIDIA_USER_ID`: NVIDIA user ID (default: `llama-stack-user`)
|
||||||
|
- `NVIDIA_DATASET_NAMESPACE`: NVIDIA dataset namespace (default: `default`)
|
||||||
|
- `NVIDIA_ACCESS_POLICIES`: NVIDIA access policies (default: `{}`)
|
||||||
|
- `NVIDIA_PROJECT_ID`: NVIDIA project ID (default: `test-project`)
|
||||||
|
- `NVIDIA_OUTPUT_MODEL_DIR`: Directory to save the output model (default: `test-example-model@v1`)
|
||||||
|
|
||||||
- `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`)
|
- `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`)
|
||||||
- `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`)
|
- `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`)
|
||||||
- `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`)
|
- `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`)
|
||||||
|
|
138
llama_stack/providers/remote/post_training/nvidia/README.md
Normal file
138
llama_stack/providers/remote/post_training/nvidia/README.md
Normal file
|
@ -0,0 +1,138 @@
|
||||||
|
# NVIDIA Post-Training Provider for LlamaStack
|
||||||
|
|
||||||
|
This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Supervised fine-tuning of Llama models
|
||||||
|
- LoRA fine-tuning support
|
||||||
|
- Job management and status tracking
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- LlamaStack with NVIDIA configuration
|
||||||
|
- Access to Hosted NVIDIA NeMo Customizer service
|
||||||
|
- Dataset registered in the Hosted NVIDIA NeMo Customizer service
|
||||||
|
- Base model downloaded and available in the Hosted NVIDIA NeMo Customizer service
|
||||||
|
|
||||||
|
### Setup
|
||||||
|
|
||||||
|
Build the NVIDIA environment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llama stack build --template nvidia --image-type conda
|
||||||
|
```
|
||||||
|
|
||||||
|
### Basic Usage using the LlamaStack Python Client
|
||||||
|
|
||||||
|
### Create Customization Job
|
||||||
|
|
||||||
|
#### Initialize the client
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["NVIDIA_API_KEY"] = "your-api-key"
|
||||||
|
os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test"
|
||||||
|
os.environ["NVIDIA_USER_ID"] = "llama-stack-user"
|
||||||
|
os.environ["NVIDIA_DATASET_NAMESPACE"] = "default"
|
||||||
|
os.environ["NVIDIA_PROJECT_ID"] = "test-project"
|
||||||
|
os.environ["NVIDIA_OUTPUT_MODEL_DIR"] = "test-example-model@v1"
|
||||||
|
|
||||||
|
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
||||||
|
|
||||||
|
client = LlamaStackAsLibraryClient("nvidia")
|
||||||
|
client.initialize()
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Configure fine-tuning parameters
|
||||||
|
|
||||||
|
```python
|
||||||
|
from llama_stack_client.types.post_training_supervised_fine_tune_params import (
|
||||||
|
TrainingConfig,
|
||||||
|
TrainingConfigDataConfig,
|
||||||
|
TrainingConfigOptimizerConfig,
|
||||||
|
)
|
||||||
|
from llama_stack_client.types.algorithm_config_param import LoraFinetuningConfig
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Set up LoRA configuration
|
||||||
|
|
||||||
|
```python
|
||||||
|
algorithm_config = LoraFinetuningConfig(type="LoRA", adapter_dim=16)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Configure training data
|
||||||
|
|
||||||
|
```python
|
||||||
|
data_config = TrainingConfigDataConfig(
|
||||||
|
dataset_id="your-dataset-id", # Use client.datasets.list() to see available datasets
|
||||||
|
batch_size=16,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Configure optimizer
|
||||||
|
|
||||||
|
```python
|
||||||
|
optimizer_config = TrainingConfigOptimizerConfig(
|
||||||
|
lr=0.0001,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Set up training configuration
|
||||||
|
|
||||||
|
```python
|
||||||
|
training_config = TrainingConfig(
|
||||||
|
n_epochs=2,
|
||||||
|
data_config=data_config,
|
||||||
|
optimizer_config=optimizer_config,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Start fine-tuning job
|
||||||
|
|
||||||
|
```python
|
||||||
|
training_job = client.post_training.supervised_fine_tune(
|
||||||
|
job_uuid="unique-job-id",
|
||||||
|
model="meta-llama/Llama-3.1-8B-Instruct",
|
||||||
|
checkpoint_dir="",
|
||||||
|
algorithm_config=algorithm_config,
|
||||||
|
training_config=training_config,
|
||||||
|
logger_config={},
|
||||||
|
hyperparam_search_config={},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### List all jobs
|
||||||
|
|
||||||
|
```python
|
||||||
|
jobs = client.post_training.job.list()
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check job status
|
||||||
|
|
||||||
|
```python
|
||||||
|
job_status = client.post_training.job.status(job_uuid="your-job-id")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Cancel a job
|
||||||
|
|
||||||
|
```python
|
||||||
|
client.post_training.job.cancel(job_uuid="your-job-id")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Inference with the fine-tuned model
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = client.inference.completion(
|
||||||
|
content="Complete the sentence using one word: Roses are red, violets are ",
|
||||||
|
stream=False,
|
||||||
|
model_id="test-example-model@v1",
|
||||||
|
sampling_params={
|
||||||
|
"max_tokens": 50,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print(response.content)
|
||||||
|
```
|
|
@ -40,7 +40,7 @@ class NvidiaPostTrainingConfig(BaseModel):
|
||||||
|
|
||||||
# ToDO: validate this, add default value
|
# ToDO: validate this, add default value
|
||||||
customizer_url: str = Field(
|
customizer_url: str = Field(
|
||||||
default_factory=lambda: os.getenv("NVIDIA_CUSTOMIZER_URL"),
|
default_factory=lambda: os.getenv("NVIDIA_CUSTOMIZER_URL", "http://nemo.test"),
|
||||||
description="Base URL for the NeMo Customizer API",
|
description="Base URL for the NeMo Customizer API",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue