mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-31 16:01:46 +00:00
Docs for meta-reference-gpu
This commit is contained in:
parent
38563d7c00
commit
dd732f037f
9 changed files with 374 additions and 101 deletions
70
distributions/meta-reference-gpu/run-with-safety.yaml
Normal file
70
distributions/meta-reference-gpu/run-with-safety.yaml
Normal file
|
@ -0,0 +1,70 @@
|
|||
version: '2'
|
||||
image_name: meta-reference-gpu
|
||||
docker_image: null
|
||||
conda_env: null
|
||||
apis:
|
||||
- agents
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- telemetry
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: meta-reference-inference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
model: ${env.INFERENCE_MODEL}
|
||||
max_seq_len: 4096
|
||||
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
|
||||
- provider_id: meta-reference-safety
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
model: ${env.SAFETY_MODEL}
|
||||
max_seq_len: 4096
|
||||
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
|
||||
memory:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config: {}
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: meta-reference-inference
|
||||
provider_model_id: null
|
||||
- metadata: {}
|
||||
model_id: ${env.SAFETY_MODEL}
|
||||
provider_id: meta-reference-safety
|
||||
provider_model_id: null
|
||||
shields:
|
||||
- params: null
|
||||
shield_id: ${env.SAFETY_MODEL}
|
||||
provider_id: null
|
||||
provider_shield_id: null
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
|
@ -1,68 +1,56 @@
|
|||
version: '2'
|
||||
image_name: local
|
||||
image_name: meta-reference-gpu
|
||||
docker_image: null
|
||||
conda_env: local
|
||||
conda_env: null
|
||||
apis:
|
||||
- shields
|
||||
- agents
|
||||
- models
|
||||
- memory
|
||||
- memory_banks
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- telemetry
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: inference0
|
||||
- provider_id: meta-reference-inference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
model: Llama3.2-3B-Instruct
|
||||
quantization: null
|
||||
torch_seed: null
|
||||
model: ${env.INFERENCE_MODEL}
|
||||
max_seq_len: 4096
|
||||
max_batch_size: 1
|
||||
- provider_id: inference1
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
model: Llama-Guard-3-1B
|
||||
quantization: null
|
||||
torch_seed: null
|
||||
max_seq_len: 2048
|
||||
max_batch_size: 1
|
||||
safety:
|
||||
- provider_id: meta0
|
||||
provider_type: inline::llama-guard
|
||||
config:
|
||||
model: Llama-Guard-3-1B
|
||||
excluded_categories: []
|
||||
- provider_id: meta1
|
||||
provider_type: inline::prompt-guard
|
||||
config:
|
||||
model: Prompt-Guard-86M
|
||||
# Uncomment to use prompt guard
|
||||
# prompt_guard_shield:
|
||||
# model: Prompt-Guard-86M
|
||||
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
|
||||
memory:
|
||||
- provider_id: meta0
|
||||
provider_type: inline::meta-reference
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config: {}
|
||||
# Uncomment to use pgvector
|
||||
# - provider_id: pgvector
|
||||
# provider_type: remote::pgvector
|
||||
# config:
|
||||
# host: 127.0.0.1
|
||||
# port: 5432
|
||||
# db: postgres
|
||||
# user: postgres
|
||||
# password: mysecretpassword
|
||||
agents:
|
||||
- provider_id: meta0
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ~/.llama/runtime/agents_store.db
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
|
||||
telemetry:
|
||||
- provider_id: meta0
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config: {}
|
||||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: meta-reference-inference
|
||||
provider_model_id: null
|
||||
shields: []
|
||||
memory_banks: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -1,15 +1,23 @@
|
|||
# Meta Reference Distribution
|
||||
|
||||
The `llamastack/distribution-meta-reference-gpu` distribution consists of the following provider configurations.
|
||||
The `llamastack/distribution-meta-reference-gpu` distribution consists of the following provider configurations:
|
||||
|
||||
| API | Provider(s) |
|
||||
|-----|-------------|
|
||||
| agents | `inline::meta-reference` |
|
||||
| inference | `inline::meta-reference` |
|
||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
|
||||
|
||||
| **API** | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** |
|
||||
|----------------- |--------------- |---------------- |-------------------------------------------------- |---------------- |---------------- |
|
||||
| **Provider(s)** | meta-reference | meta-reference | meta-reference, remote::pgvector, remote::chroma | meta-reference | meta-reference |
|
||||
Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
|
||||
|
||||
|
||||
### Step 0. Prerequisite - Downloading Models
|
||||
Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/cli_reference/download_models.html) here to download the models.
|
||||
|
||||
## Prerequisite: Downloading Models
|
||||
|
||||
Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
|
||||
|
||||
```
|
||||
$ ls ~/.llama/checkpoints
|
||||
|
@ -17,55 +25,56 @@ Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3
|
|||
Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M
|
||||
```
|
||||
|
||||
### Step 1. Start the Distribution
|
||||
## Running the Distribution
|
||||
|
||||
#### (Option 1) Start with Docker
|
||||
```
|
||||
$ cd distributions/meta-reference-gpu && docker compose up
|
||||
You can do this via Conda (build code) or Docker which has a pre-built image.
|
||||
|
||||
### Via Docker
|
||||
|
||||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
/root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> This assumes you have access to GPU to start a local server with access to your GPU.
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
||||
|
||||
> [!NOTE]
|
||||
> `~/.llama` should be the path containing downloaded weights of Llama models.
|
||||
|
||||
|
||||
This will download and start running a pre-built docker container. Alternatively, you may use the following commands:
|
||||
|
||||
```
|
||||
docker run -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./run.yaml:/root/my-run.yaml --gpus=all distribution-meta-reference-gpu --yaml_config /root/my-run.yaml
|
||||
```bash
|
||||
docker run \
|
||||
-it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run-with-safety.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
/root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
```
|
||||
|
||||
#### (Option 2) Start with Conda
|
||||
### Via Conda
|
||||
|
||||
1. Install the `llama` CLI. See [CLI Reference](https://llama-stack.readthedocs.io/en/latest/cli_reference/index.html)
|
||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
2. Build the `meta-reference-gpu` distribution
|
||||
|
||||
```
|
||||
$ llama stack build --template meta-reference-gpu --image-type conda
|
||||
```bash
|
||||
llama stack build --template meta-reference-gpu --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
--port 5001 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
```
|
||||
|
||||
3. Start running distribution
|
||||
```
|
||||
$ cd distributions/meta-reference-gpu
|
||||
$ llama stack run ./run.yaml
|
||||
```
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
||||
### (Optional) Serving a new model
|
||||
You may change the `config.model` in `run.yaml` to update the model currently being served by the distribution. Make sure you have the model checkpoint downloaded in your `~/.llama`.
|
||||
```bash
|
||||
llama stack run ./run-with-safety.yaml \
|
||||
--port 5001 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
```
|
||||
inference:
|
||||
- provider_id: meta0
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
model: Llama3.2-11B-Vision-Instruct
|
||||
quantization: null
|
||||
torch_seed: null
|
||||
max_seq_len: 4096
|
||||
max_batch_size: 1
|
||||
```
|
||||
|
||||
Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
|
||||
|
|
|
@ -49,6 +49,18 @@ class MetaReferenceInferenceConfig(BaseModel):
|
|||
resolved = resolve_model(self.model)
|
||||
return resolved.pth_file_count
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
model: str = "Llama3.2-3B-Instruct",
|
||||
checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}",
|
||||
) -> Dict[str, Any]:
|
||||
return {
|
||||
"model": model,
|
||||
"max_seq_len": 4096,
|
||||
"checkpoint_dir": checkpoint_dir,
|
||||
}
|
||||
|
||||
|
||||
class MetaReferenceQuantizedInferenceConfig(MetaReferenceInferenceConfig):
|
||||
quantization: QuantizationConfig
|
||||
|
|
|
@ -107,7 +107,7 @@ class Llama:
|
|||
sys.stdout = open(os.devnull, "w")
|
||||
|
||||
start_time = time.time()
|
||||
if config.checkpoint_dir:
|
||||
if config.checkpoint_dir and config.checkpoint_dir != "null":
|
||||
ckpt_dir = config.checkpoint_dir
|
||||
else:
|
||||
ckpt_dir = model_checkpoint_dir(model)
|
||||
|
@ -137,7 +137,6 @@ class Llama:
|
|||
), f"model_args vocab = {model_args.vocab_size} but tokenizer vocab = {tokenizer.n_words}"
|
||||
|
||||
if isinstance(config, MetaReferenceQuantizedInferenceConfig):
|
||||
|
||||
if isinstance(config.quantization, Fp8QuantizationConfig):
|
||||
from .quantization.loader import convert_to_fp8_quantized_model
|
||||
|
||||
|
|
7
llama_stack/templates/meta-reference-gpu/__init__.py
Normal file
7
llama_stack/templates/meta-reference-gpu/__init__.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .meta_reference import get_distribution_template # noqa: F401
|
|
@ -1,13 +1,19 @@
|
|||
version: '2'
|
||||
name: meta-reference-gpu
|
||||
distribution_spec:
|
||||
docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
|
||||
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||
description: Use Meta Reference for running LLM inference
|
||||
docker_image: null
|
||||
providers:
|
||||
inference: meta-reference
|
||||
inference:
|
||||
- inline::meta-reference
|
||||
memory:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
safety: inline::llama-guard
|
||||
agents: inline::meta-reference
|
||||
telemetry: inline::meta-reference
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
image_type: conda
|
||||
|
|
82
llama_stack/templates/meta-reference-gpu/doc_template.md
Normal file
82
llama_stack/templates/meta-reference-gpu/doc_template.md
Normal file
|
@ -0,0 +1,82 @@
|
|||
# Meta Reference Distribution
|
||||
|
||||
The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations:
|
||||
|
||||
{{ providers_table }}
|
||||
|
||||
Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
|
||||
|
||||
{% if run_config_env_vars %}
|
||||
### Environment Variables
|
||||
|
||||
The following environment variables can be configured:
|
||||
|
||||
{% for var, (default_value, description) in run_config_env_vars.items() %}
|
||||
- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
|
||||
## Prerequisite: Downloading Models
|
||||
|
||||
Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
|
||||
|
||||
```
|
||||
$ ls ~/.llama/checkpoints
|
||||
Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B
|
||||
Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M
|
||||
```
|
||||
|
||||
## Running the Distribution
|
||||
|
||||
You can do this via Conda (build code) or Docker which has a pre-built image.
|
||||
|
||||
### Via Docker
|
||||
|
||||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
/root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
||||
```bash
|
||||
docker run \
|
||||
-it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run-with-safety.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
/root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
```
|
||||
|
||||
### Via Conda
|
||||
|
||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --template meta-reference-gpu --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
--port 5001 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
||||
```bash
|
||||
llama stack run ./run-with-safety.yaml \
|
||||
--port 5001 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
```
|
100
llama_stack/templates/meta-reference-gpu/meta_reference.py
Normal file
100
llama_stack/templates/meta-reference-gpu/meta_reference.py
Normal file
|
@ -0,0 +1,100 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||
from llama_stack.providers.inline.inference.meta_reference import (
|
||||
MetaReferenceInferenceConfig,
|
||||
)
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["inline::meta-reference"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
}
|
||||
|
||||
inference_provider = Provider(
|
||||
provider_id="meta-reference-inference",
|
||||
provider_type="inline::meta-reference",
|
||||
config=MetaReferenceInferenceConfig.sample_run_config(
|
||||
model="${env.INFERENCE_MODEL}",
|
||||
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
|
||||
),
|
||||
)
|
||||
|
||||
inference_model = ModelInput(
|
||||
model_id="${env.INFERENCE_MODEL}",
|
||||
provider_id="meta-reference-inference",
|
||||
)
|
||||
safety_model = ModelInput(
|
||||
model_id="${env.SAFETY_MODEL}",
|
||||
provider_id="meta-reference-safety",
|
||||
)
|
||||
|
||||
return DistributionTemplate(
|
||||
name="meta-reference-gpu",
|
||||
distro_type="self_hosted",
|
||||
description="Use Meta Reference for running LLM inference",
|
||||
template_path=Path(__file__).parent / "doc_template.md",
|
||||
providers=providers,
|
||||
default_models=[inference_model, safety_model],
|
||||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider],
|
||||
},
|
||||
default_models=[inference_model],
|
||||
),
|
||||
"run-with-safety.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [
|
||||
inference_provider,
|
||||
Provider(
|
||||
provider_id="meta-reference-safety",
|
||||
provider_type="inline::meta-reference",
|
||||
config=MetaReferenceInferenceConfig.sample_run_config(
|
||||
model="${env.SAFETY_MODEL}",
|
||||
checkpoint_dir="${env.SAFETY_CHECKPOINT_DIR:null}",
|
||||
),
|
||||
),
|
||||
],
|
||||
},
|
||||
default_models=[
|
||||
inference_model,
|
||||
safety_model,
|
||||
],
|
||||
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
|
||||
),
|
||||
},
|
||||
docker_compose_env_vars={
|
||||
"LLAMASTACK_PORT": (
|
||||
"5001",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"INFERENCE_MODEL": (
|
||||
"meta-llama/Llama-3.2-3B-Instruct",
|
||||
"Inference model loaded into the Meta Reference server",
|
||||
),
|
||||
"INFERENCE_CHECKPOINT_DIR": (
|
||||
"null",
|
||||
"Directory containing the Meta Reference model checkpoint",
|
||||
),
|
||||
"SAFETY_MODEL": (
|
||||
"meta-llama/Llama-Guard-3-1B",
|
||||
"Name of the safety (Llama-Guard) model to use",
|
||||
),
|
||||
"SAFETY_CHECKPOINT_DIR": (
|
||||
"null",
|
||||
"Directory containing the Llama-Guard model checkpoint",
|
||||
),
|
||||
},
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue