mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-11 13:44:38 +00:00
chore!: remove model mgmt from CLI for Hugging Face CLI (#3700)
This change removes the `llama model` and `llama download` subcommands from the CLI, replacing them with recommendations to use the Hugging Face CLI instead. Rationale for this change: - The model management functionality was largely duplicating what Hugging Face CLI already provides, leading to unnecessary maintenance overhead (except the download source from Meta?) - Maintaining our own implementation required fixing bugs and keeping up with changes in model repositories and download mechanisms - The Hugging Face CLI is more mature, widely adopted, and better maintained - This allows us to focus on the core Llama Stack functionality rather than reimplementing model management tools Changes made: - Removed all model-related CLI commands and their implementations - Updated documentation to recommend using `huggingface-cli` for model downloads - Removed Meta-specific download logic and statements - Simplified the CLI to focus solely on stack management operations Users should now use: - `huggingface-cli download` for downloading models - `huggingface-cli scan-cache` for listing downloaded models This is a breaking change as it removes previously available CLI commands. Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
841d0c3583
commit
7ee0ee7843
21 changed files with 63 additions and 1612 deletions
2
.github/workflows/python-build-test.yml
vendored
2
.github/workflows/python-build-test.yml
vendored
|
@ -43,7 +43,5 @@ jobs:
|
||||||
uv pip list
|
uv pip list
|
||||||
uv pip show llama-stack
|
uv pip show llama-stack
|
||||||
command -v llama
|
command -v llama
|
||||||
llama model prompt-format -m Llama3.2-90B-Vision-Instruct
|
|
||||||
llama model list
|
|
||||||
llama stack list-apis
|
llama stack list-apis
|
||||||
llama stack list-providers inference
|
llama stack list-providers inference
|
||||||
|
|
|
@ -25,7 +25,7 @@ pip install -U llama_stack
|
||||||
|
|
||||||
MODEL="Llama-4-Scout-17B-16E-Instruct"
|
MODEL="Llama-4-Scout-17B-16E-Instruct"
|
||||||
# get meta url from llama.com
|
# get meta url from llama.com
|
||||||
llama model download --source meta --model-id $MODEL --meta-url <META_URL>
|
huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
|
||||||
|
|
||||||
# start a llama stack server
|
# start a llama stack server
|
||||||
INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu
|
INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu
|
||||||
|
|
|
@ -41,31 +41,7 @@ The following environment variables can be configured:
|
||||||
|
|
||||||
## Prerequisite: Downloading Models
|
## Prerequisite: Downloading Models
|
||||||
|
|
||||||
Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
|
Please check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models using the Hugging Face CLI.
|
||||||
|
|
||||||
```
|
|
||||||
$ llama model list --downloaded
|
|
||||||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
|
|
||||||
┃ Model ┃ Size ┃ Modified Time ┃
|
|
||||||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
|
|
||||||
│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │
|
|
||||||
└─────────────────────────────────────────┴──────────┴─────────────────────┘
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Running the Distribution
|
## Running the Distribution
|
||||||
|
|
|
@ -25,141 +25,42 @@ You have two ways to install Llama Stack:
|
||||||
cd llama-stack
|
cd llama-stack
|
||||||
pip install -e .
|
pip install -e .
|
||||||
|
|
||||||
## Downloading models via CLI
|
## Downloading models via Hugging Face CLI
|
||||||
|
|
||||||
You first need to have models downloaded locally.
|
You first need to have models downloaded locally. We recommend using the [Hugging Face CLI](https://huggingface.co/docs/huggingface_hub/guides/cli) to download models.
|
||||||
|
|
||||||
To download any model you need the **Model Descriptor**.
|
### Install Hugging Face CLI
|
||||||
This can be obtained by running the command
|
|
||||||
```
|
|
||||||
llama model list
|
|
||||||
```
|
|
||||||
|
|
||||||
You should see a table like this:
|
First, install the Hugging Face CLI:
|
||||||
|
|
||||||
```
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Model Descriptor(ID) | Hugging Face Repo | Context Length |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-8B | meta-llama/Llama-3.1-8B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-70B | meta-llama/Llama-3.1-70B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B:bf16-mp8 | meta-llama/Llama-3.1-405B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B | meta-llama/Llama-3.1-405B-FP8 | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B:bf16-mp16 | meta-llama/Llama-3.1-405B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-8B-Instruct | meta-llama/Llama-3.1-8B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-70B-Instruct | meta-llama/Llama-3.1-70B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B-Instruct:bf16-mp8 | meta-llama/Llama-3.1-405B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B-Instruct | meta-llama/Llama-3.1-405B-Instruct-FP8 | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B-Instruct:bf16-mp16 | meta-llama/Llama-3.1-405B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-1B | meta-llama/Llama-3.2-1B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-3B | meta-llama/Llama-3.2-3B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-11B-Vision | meta-llama/Llama-3.2-11B-Vision | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-90B-Vision | meta-llama/Llama-3.2-90B-Vision | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-1B-Instruct | meta-llama/Llama-3.2-1B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-3B-Instruct | meta-llama/Llama-3.2-3B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-11B-Vision-Instruct | meta-llama/Llama-3.2-11B-Vision-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-90B-Vision-Instruct | meta-llama/Llama-3.2-90B-Vision-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-3-11B-Vision | meta-llama/Llama-Guard-3-11B-Vision | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-3-1B:int4-mp1 | meta-llama/Llama-Guard-3-1B-INT4 | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-3-1B | meta-llama/Llama-Guard-3-1B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-3-8B | meta-llama/Llama-Guard-3-8B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-3-8B:int8-mp1 | meta-llama/Llama-Guard-3-8B-INT8 | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Prompt-Guard-86M | meta-llama/Prompt-Guard-86M | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-2-8B | meta-llama/Llama-Guard-2-8B | 4K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
```
|
|
||||||
|
|
||||||
To download models, you can use the llama download command.
|
|
||||||
|
|
||||||
#### Downloading from [Meta](https://llama.meta.com/llama-downloads/)
|
|
||||||
|
|
||||||
Here is an example download command to get the 3B-Instruct/11B-Vision-Instruct model. You will need META_URL which can be obtained from [here](https://llama.meta.com/docs/getting_the_models/meta/). Note: You need to quote the META_URL
|
|
||||||
|
|
||||||
Download the required checkpoints using the following commands:
|
|
||||||
```bash
|
```bash
|
||||||
# download the 8B model, this can be run on a single GPU
|
pip install huggingface_hub[cli]
|
||||||
llama download --source meta --model-id Llama3.2-3B-Instruct --meta-url 'META_URL'
|
|
||||||
|
|
||||||
# you can also get the 70B model, this will require 8 GPUs however
|
|
||||||
llama download --source meta --model-id Llama3.2-11B-Vision-Instruct --meta-url 'META_URL'
|
|
||||||
|
|
||||||
# llama-agents have safety enabled by default. For this, you will need
|
|
||||||
# safety models -- Llama-Guard and Prompt-Guard
|
|
||||||
llama download --source meta --model-id Prompt-Guard-86M --meta-url 'META_URL'
|
|
||||||
llama download --source meta --model-id Llama-Guard-3-1B --meta-url 'META_URL'
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Downloading from [Hugging Face](https://huggingface.co/meta-llama)
|
### Download models from Hugging Face
|
||||||
|
|
||||||
Essentially, the same commands above work, just replace `--source meta` with `--source huggingface`.
|
You can download models using the `huggingface-cli download` command. Here are some examples:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama download --source huggingface --model-id Llama3.1-8B-Instruct --hf-token <HF_TOKEN>
|
# Download Llama 3.2 3B Instruct model
|
||||||
|
huggingface-cli download meta-llama/Llama-3.2-3B-Instruct --local-dir ~/.llama/Llama-3.2-3B-Instruct
|
||||||
|
|
||||||
llama download --source huggingface --model-id Llama3.1-70B-Instruct --hf-token <HF_TOKEN>
|
# Download Llama 3.2 1B Instruct model
|
||||||
|
huggingface-cli download meta-llama/Llama-3.2-1B-Instruct --local-dir ~/.llama/Llama-3.2-1B-Instruct
|
||||||
|
|
||||||
llama download --source huggingface --model-id Llama-Guard-3-1B --ignore-patterns *original*
|
# Download Llama Guard 3 1B model
|
||||||
llama download --source huggingface --model-id Prompt-Guard-86M --ignore-patterns *original*
|
huggingface-cli download meta-llama/Llama-Guard-3-1B --local-dir ~/.llama/Llama-Guard-3-1B
|
||||||
```
|
|
||||||
|
# Download Prompt Guard model
|
||||||
**Important:** Set your environment variable `HF_TOKEN` or pass in `--hf-token` to the command to validate your access. You can find your token at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens).
|
huggingface-cli download meta-llama/Prompt-Guard-86M --local-dir ~/.llama/Prompt-Guard-86M
|
||||||
|
|
||||||
```{tip}
|
|
||||||
Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored.
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Important:** You need to authenticate with Hugging Face to download models. You can do this by:
|
||||||
|
1. Getting your token from [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
|
||||||
|
2. Running `huggingface-cli login` and entering your token
|
||||||
## List the downloaded models
|
## List the downloaded models
|
||||||
|
|
||||||
To list the downloaded models with the following command:
|
To list the downloaded models, you can use the Hugging Face CLI:
|
||||||
```
|
```bash
|
||||||
llama model list --downloaded
|
# List all downloaded models in your local cache
|
||||||
```
|
huggingface-cli scan-cache
|
||||||
|
|
||||||
You should see a table like this:
|
|
||||||
```
|
|
||||||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
|
|
||||||
┃ Model ┃ Size ┃ Modified Time ┃
|
|
||||||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
|
|
||||||
│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │
|
|
||||||
└─────────────────────────────────────────┴──────────┴─────────────────────┘
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -27,9 +27,9 @@ You have two ways to install Llama Stack:
|
||||||
|
|
||||||
|
|
||||||
## `llama` subcommands
|
## `llama` subcommands
|
||||||
1. `download`: Supports downloading models from Meta or Hugging Face. [Downloading models](#downloading-models)
|
1. `stack`: Allows you to build a stack using the `llama stack` distribution and run a Llama Stack server. You can read more about how to build a Llama Stack distribution in the [Build your own Distribution](../distributions/building_distro) documentation.
|
||||||
2. `model`: Lists available models and their properties. [Understanding models](#understand-the-models)
|
|
||||||
3. `stack`: Allows you to build a stack using the `llama stack` distribution and run a Llama Stack server. You can read more about how to build a Llama Stack distribution in the [Build your own Distribution](../distributions/building_distro) documentation.
|
For downloading models, we recommend using the [Hugging Face CLI](https://huggingface.co/docs/huggingface_hub/guides/cli). See [Downloading models](#downloading-models) for more information.
|
||||||
|
|
||||||
### Sample Usage
|
### Sample Usage
|
||||||
|
|
||||||
|
@ -38,239 +38,41 @@ llama --help
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: llama [-h] {download,model,stack} ...
|
usage: llama [-h] {stack} ...
|
||||||
|
|
||||||
Welcome to the Llama CLI
|
Welcome to the Llama CLI
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
|
|
||||||
subcommands:
|
subcommands:
|
||||||
{download,model,stack}
|
{stack}
|
||||||
|
|
||||||
|
stack Operations for the Llama Stack / Distributions
|
||||||
```
|
```
|
||||||
|
|
||||||
## Downloading models
|
## Downloading models
|
||||||
|
|
||||||
You first need to have models downloaded locally.
|
You first need to have models downloaded locally. We recommend using the [Hugging Face CLI](https://huggingface.co/docs/huggingface_hub/guides/cli) to download models.
|
||||||
|
|
||||||
To download any model you need the **Model Descriptor**.
|
First, install the Hugging Face CLI:
|
||||||
This can be obtained by running the command
|
|
||||||
```
|
|
||||||
llama model list
|
|
||||||
```
|
|
||||||
|
|
||||||
You should see a table like this:
|
|
||||||
|
|
||||||
```
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Model Descriptor(ID) | Hugging Face Repo | Context Length |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-8B | meta-llama/Llama-3.1-8B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-70B | meta-llama/Llama-3.1-70B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B:bf16-mp8 | meta-llama/Llama-3.1-405B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B | meta-llama/Llama-3.1-405B-FP8 | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B:bf16-mp16 | meta-llama/Llama-3.1-405B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-8B-Instruct | meta-llama/Llama-3.1-8B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-70B-Instruct | meta-llama/Llama-3.1-70B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B-Instruct:bf16-mp8 | meta-llama/Llama-3.1-405B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B-Instruct | meta-llama/Llama-3.1-405B-Instruct-FP8 | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.1-405B-Instruct:bf16-mp16 | meta-llama/Llama-3.1-405B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-1B | meta-llama/Llama-3.2-1B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-3B | meta-llama/Llama-3.2-3B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-11B-Vision | meta-llama/Llama-3.2-11B-Vision | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-90B-Vision | meta-llama/Llama-3.2-90B-Vision | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-1B-Instruct | meta-llama/Llama-3.2-1B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-3B-Instruct | meta-llama/Llama-3.2-3B-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-11B-Vision-Instruct | meta-llama/Llama-3.2-11B-Vision-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama3.2-90B-Vision-Instruct | meta-llama/Llama-3.2-90B-Vision-Instruct | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-3-11B-Vision | meta-llama/Llama-Guard-3-11B-Vision | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-3-1B:int4-mp1 | meta-llama/Llama-Guard-3-1B-INT4 | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-3-1B | meta-llama/Llama-Guard-3-1B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-3-8B | meta-llama/Llama-Guard-3-8B | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-3-8B:int8-mp1 | meta-llama/Llama-Guard-3-8B-INT8 | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Prompt-Guard-86M | meta-llama/Prompt-Guard-86M | 128K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
| Llama-Guard-2-8B | meta-llama/Llama-Guard-2-8B | 4K |
|
|
||||||
+----------------------------------+------------------------------------------+----------------+
|
|
||||||
```
|
|
||||||
|
|
||||||
To download models, you can use the `llama download` command.
|
|
||||||
|
|
||||||
### Downloading from [Meta](https://llama.meta.com/llama-downloads/)
|
|
||||||
|
|
||||||
Here is an example download command to get the 3B-Instruct/11B-Vision-Instruct model. You will need META_URL which can be obtained from [here](https://llama.meta.com/docs/getting_the_models/meta/)
|
|
||||||
|
|
||||||
Download the required checkpoints using the following commands:
|
|
||||||
```bash
|
```bash
|
||||||
# download the 8B model, this can be run on a single GPU
|
pip install huggingface_hub[cli]
|
||||||
llama download --source meta --model-id Llama3.2-3B-Instruct --meta-url META_URL
|
|
||||||
|
|
||||||
# you can also get the 70B model, this will require 8 GPUs however
|
|
||||||
llama download --source meta --model-id Llama3.2-11B-Vision-Instruct --meta-url META_URL
|
|
||||||
|
|
||||||
# llama-agents have safety enabled by default. For this, you will need
|
|
||||||
# safety models -- Llama-Guard and Prompt-Guard
|
|
||||||
llama download --source meta --model-id Prompt-Guard-86M --meta-url META_URL
|
|
||||||
llama download --source meta --model-id Llama-Guard-3-1B --meta-url META_URL
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Downloading from [Hugging Face](https://huggingface.co/meta-llama)
|
Then authenticate and download models:
|
||||||
|
|
||||||
Essentially, the same commands above work, just replace `--source meta` with `--source huggingface`.
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama download --source huggingface --model-id Llama3.1-8B-Instruct --hf-token <HF_TOKEN>
|
# Authenticate with Hugging Face
|
||||||
|
huggingface-cli login
|
||||||
|
|
||||||
llama download --source huggingface --model-id Llama3.1-70B-Instruct --hf-token <HF_TOKEN>
|
# Download a model
|
||||||
|
huggingface-cli download meta-llama/Llama-3.2-3B-Instruct --local-dir ~/.llama/Llama-3.2-3B-Instruct
|
||||||
llama download --source huggingface --model-id Llama-Guard-3-1B --ignore-patterns *original*
|
|
||||||
llama download --source huggingface --model-id Prompt-Guard-86M --ignore-patterns *original*
|
|
||||||
```
|
|
||||||
|
|
||||||
**Important:** Set your environment variable `HF_TOKEN` or pass in `--hf-token` to the command to validate your access. You can find your token at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens).
|
|
||||||
|
|
||||||
```{tip}
|
|
||||||
Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored.
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## List the downloaded models
|
## List the downloaded models
|
||||||
|
|
||||||
To list the downloaded models with the following command:
|
To list the downloaded models, you can use the Hugging Face CLI:
|
||||||
```
|
```bash
|
||||||
llama model list --downloaded
|
# List all downloaded models in your local cache
|
||||||
```
|
huggingface-cli scan-cache
|
||||||
|
|
||||||
You should see a table like this:
|
|
||||||
```
|
|
||||||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
|
|
||||||
┃ Model ┃ Size ┃ Modified Time ┃
|
|
||||||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
|
|
||||||
│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │
|
|
||||||
└─────────────────────────────────────────┴──────────┴─────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## Understand the models
|
|
||||||
The `llama model` command helps you explore the model’s interface.
|
|
||||||
|
|
||||||
1. `download`: Download the model from different sources. (meta, huggingface)
|
|
||||||
2. `list`: Lists all the models available for download with hardware requirements for deploying the models.
|
|
||||||
3. `prompt-format`: Show llama model message formats.
|
|
||||||
4. `describe`: Describes all the properties of the model.
|
|
||||||
|
|
||||||
### Sample Usage
|
|
||||||
|
|
||||||
`llama model <subcommand> <options>`
|
|
||||||
|
|
||||||
```
|
|
||||||
llama model --help
|
|
||||||
```
|
|
||||||
```
|
|
||||||
usage: llama model [-h] {download,list,prompt-format,describe,verify-download,remove} ...
|
|
||||||
|
|
||||||
Work with llama models
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
|
|
||||||
model_subcommands:
|
|
||||||
{download,list,prompt-format,describe,verify-download,remove}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Describe
|
|
||||||
|
|
||||||
You can use the describe command to know more about a model:
|
|
||||||
```
|
|
||||||
llama model describe -m Llama3.2-3B-Instruct
|
|
||||||
```
|
|
||||||
```
|
|
||||||
+-----------------------------+----------------------------------+
|
|
||||||
| Model | Llama3.2-3B-Instruct |
|
|
||||||
+-----------------------------+----------------------------------+
|
|
||||||
| Hugging Face ID | meta-llama/Llama-3.2-3B-Instruct |
|
|
||||||
+-----------------------------+----------------------------------+
|
|
||||||
| Description | Llama 3.2 3b instruct model |
|
|
||||||
+-----------------------------+----------------------------------+
|
|
||||||
| Context Length | 128K tokens |
|
|
||||||
+-----------------------------+----------------------------------+
|
|
||||||
| Weights format | bf16 |
|
|
||||||
+-----------------------------+----------------------------------+
|
|
||||||
| Model params.json | { |
|
|
||||||
| | "dim": 3072, |
|
|
||||||
| | "n_layers": 28, |
|
|
||||||
| | "n_heads": 24, |
|
|
||||||
| | "n_kv_heads": 8, |
|
|
||||||
| | "vocab_size": 128256, |
|
|
||||||
| | "ffn_dim_multiplier": 1.0, |
|
|
||||||
| | "multiple_of": 256, |
|
|
||||||
| | "norm_eps": 1e-05, |
|
|
||||||
| | "rope_theta": 500000.0, |
|
|
||||||
| | "use_scaled_rope": true |
|
|
||||||
| | } |
|
|
||||||
+-----------------------------+----------------------------------+
|
|
||||||
| Recommended sampling params | { |
|
|
||||||
| | "temperature": 1.0, |
|
|
||||||
| | "top_p": 0.9, |
|
|
||||||
| | "top_k": 0 |
|
|
||||||
| | } |
|
|
||||||
+-----------------------------+----------------------------------+
|
|
||||||
```
|
|
||||||
|
|
||||||
### Prompt Format
|
|
||||||
You can even run `llama model prompt-format` see all of the templates and their tokens:
|
|
||||||
|
|
||||||
```
|
|
||||||
llama model prompt-format -m Llama3.2-3B-Instruct
|
|
||||||
```
|
|
||||||

|
|
||||||
|
|
||||||
|
|
||||||
You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios.
|
|
||||||
|
|
||||||
**NOTE**: Outputs in terminal are color printed to show special tokens.
|
|
||||||
|
|
||||||
### Remove model
|
|
||||||
You can run `llama model remove` to remove an unnecessary model:
|
|
||||||
|
|
||||||
```
|
|
||||||
llama model remove -m Llama-Guard-3-8B-int8
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -51,11 +51,11 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"!pip install uv\n",
|
"!pip install uv \"huggingface_hub[cli]\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"MODEL=\"Llama-4-Scout-17B-16E-Instruct\"\n",
|
"MODEL=\"Llama-4-Scout-17B-16E-Instruct\"\n",
|
||||||
"# get meta url from llama.com\n",
|
"# get meta url from llama.com\n",
|
||||||
"!uv run --with llama-stack llama model download --source meta --model-id $MODEL --meta-url <META_URL>\n",
|
"huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL\n",
|
||||||
"\n",
|
"\n",
|
||||||
"model_id = f\"meta-llama/{MODEL}\""
|
"model_id = f\"meta-llama/{MODEL}\""
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,495 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import asyncio
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
import sys
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from datetime import UTC, datetime
|
|
||||||
from functools import partial
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
from pydantic import BaseModel, ConfigDict
|
|
||||||
from rich.console import Console
|
|
||||||
from rich.progress import (
|
|
||||||
BarColumn,
|
|
||||||
DownloadColumn,
|
|
||||||
Progress,
|
|
||||||
TextColumn,
|
|
||||||
TimeRemainingColumn,
|
|
||||||
TransferSpeedColumn,
|
|
||||||
)
|
|
||||||
from termcolor import cprint
|
|
||||||
|
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
|
||||||
from llama_stack.models.llama.sku_list import LlamaDownloadInfo
|
|
||||||
from llama_stack.models.llama.sku_types import Model
|
|
||||||
|
|
||||||
|
|
||||||
class Download(Subcommand):
|
|
||||||
"""Llama cli for downloading llama toolchain assets"""
|
|
||||||
|
|
||||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
|
||||||
super().__init__()
|
|
||||||
self.parser = subparsers.add_parser(
|
|
||||||
"download",
|
|
||||||
prog="llama download",
|
|
||||||
description="Download a model from llama.meta.com or Hugging Face Hub",
|
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
|
||||||
)
|
|
||||||
setup_download_parser(self.parser)
|
|
||||||
|
|
||||||
|
|
||||||
def setup_download_parser(parser: argparse.ArgumentParser) -> None:
|
|
||||||
parser.add_argument(
|
|
||||||
"--source",
|
|
||||||
choices=["meta", "huggingface"],
|
|
||||||
default="meta",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--model-id",
|
|
||||||
required=False,
|
|
||||||
help="See `llama model list` or `llama model list --show-all` for the list of available models. Specify multiple model IDs with commas, e.g. --model-id Llama3.2-1B,Llama3.2-3B",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--hf-token",
|
|
||||||
type=str,
|
|
||||||
required=False,
|
|
||||||
default=None,
|
|
||||||
help="Hugging Face API token. Needed for gated models like llama2/3. Will also try to read environment variable `HF_TOKEN` as default.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--meta-url",
|
|
||||||
type=str,
|
|
||||||
required=False,
|
|
||||||
help="For source=meta, URL obtained from llama.meta.com after accepting license terms",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--max-parallel",
|
|
||||||
type=int,
|
|
||||||
required=False,
|
|
||||||
default=3,
|
|
||||||
help="Maximum number of concurrent downloads",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--ignore-patterns",
|
|
||||||
type=str,
|
|
||||||
required=False,
|
|
||||||
default="*.safetensors",
|
|
||||||
help="""For source=huggingface, files matching any of the patterns are not downloaded. Defaults to ignoring
|
|
||||||
safetensors files to avoid downloading duplicate weights.
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--manifest-file",
|
|
||||||
type=str,
|
|
||||||
help="For source=meta, you can download models from a manifest file containing a file => URL mapping",
|
|
||||||
required=False,
|
|
||||||
)
|
|
||||||
parser.set_defaults(func=partial(run_download_cmd, parser=parser))
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class DownloadTask:
|
|
||||||
url: str
|
|
||||||
output_file: str
|
|
||||||
total_size: int = 0
|
|
||||||
downloaded_size: int = 0
|
|
||||||
task_id: int | None = None
|
|
||||||
retries: int = 0
|
|
||||||
max_retries: int = 3
|
|
||||||
|
|
||||||
|
|
||||||
class DownloadError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class CustomTransferSpeedColumn(TransferSpeedColumn):
|
|
||||||
def render(self, task):
|
|
||||||
if task.finished:
|
|
||||||
return "-"
|
|
||||||
return super().render(task)
|
|
||||||
|
|
||||||
|
|
||||||
class ParallelDownloader:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
max_concurrent_downloads: int = 3,
|
|
||||||
buffer_size: int = 1024 * 1024,
|
|
||||||
timeout: int = 30,
|
|
||||||
):
|
|
||||||
self.max_concurrent_downloads = max_concurrent_downloads
|
|
||||||
self.buffer_size = buffer_size
|
|
||||||
self.timeout = timeout
|
|
||||||
self.console = Console()
|
|
||||||
self.progress = Progress(
|
|
||||||
TextColumn("[bold blue]{task.description}"),
|
|
||||||
BarColumn(bar_width=40),
|
|
||||||
"[progress.percentage]{task.percentage:>3.1f}%",
|
|
||||||
DownloadColumn(),
|
|
||||||
CustomTransferSpeedColumn(),
|
|
||||||
TimeRemainingColumn(),
|
|
||||||
console=self.console,
|
|
||||||
expand=True,
|
|
||||||
)
|
|
||||||
self.client_options = {
|
|
||||||
"timeout": httpx.Timeout(timeout),
|
|
||||||
"follow_redirects": True,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def retry_with_exponential_backoff(self, task: DownloadTask, func, *args, **kwargs):
|
|
||||||
last_exception = None
|
|
||||||
for attempt in range(task.max_retries):
|
|
||||||
try:
|
|
||||||
return await func(*args, **kwargs)
|
|
||||||
except Exception as e:
|
|
||||||
last_exception = e
|
|
||||||
if attempt < task.max_retries - 1:
|
|
||||||
wait_time = min(30, 2**attempt) # Cap at 30 seconds
|
|
||||||
self.console.print(
|
|
||||||
f"[yellow]Attempt {attempt + 1}/{task.max_retries} failed, "
|
|
||||||
f"retrying in {wait_time} seconds: {str(e)}[/yellow]"
|
|
||||||
)
|
|
||||||
await asyncio.sleep(wait_time)
|
|
||||||
continue
|
|
||||||
raise last_exception
|
|
||||||
|
|
||||||
async def get_file_info(self, client: httpx.AsyncClient, task: DownloadTask) -> None:
|
|
||||||
if task.total_size > 0:
|
|
||||||
self.progress.update(task.task_id, total=task.total_size)
|
|
||||||
return
|
|
||||||
|
|
||||||
async def _get_info():
|
|
||||||
response = await client.head(task.url, headers={"Accept-Encoding": "identity"}, **self.client_options)
|
|
||||||
response.raise_for_status()
|
|
||||||
return response
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = await self.retry_with_exponential_backoff(task, _get_info)
|
|
||||||
|
|
||||||
task.url = str(response.url)
|
|
||||||
task.total_size = int(response.headers.get("Content-Length", 0))
|
|
||||||
|
|
||||||
if task.total_size == 0:
|
|
||||||
raise DownloadError(
|
|
||||||
f"Unable to determine file size for {task.output_file}. "
|
|
||||||
"The server might not support range requests."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update the progress bar's total size once we know it
|
|
||||||
if task.task_id is not None:
|
|
||||||
self.progress.update(task.task_id, total=task.total_size)
|
|
||||||
|
|
||||||
except httpx.HTTPError as e:
|
|
||||||
self.console.print(f"[red]Error getting file info: {str(e)}[/red]")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def verify_file_integrity(self, task: DownloadTask) -> bool:
|
|
||||||
if not os.path.exists(task.output_file):
|
|
||||||
return False
|
|
||||||
return os.path.getsize(task.output_file) == task.total_size
|
|
||||||
|
|
||||||
async def download_chunk(self, client: httpx.AsyncClient, task: DownloadTask, start: int, end: int) -> None:
|
|
||||||
async def _download_chunk():
|
|
||||||
headers = {"Range": f"bytes={start}-{end}"}
|
|
||||||
async with client.stream("GET", task.url, headers=headers, **self.client_options) as response:
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
with open(task.output_file, "ab") as file:
|
|
||||||
file.seek(start)
|
|
||||||
async for chunk in response.aiter_bytes(self.buffer_size):
|
|
||||||
file.write(chunk)
|
|
||||||
task.downloaded_size += len(chunk)
|
|
||||||
self.progress.update(
|
|
||||||
task.task_id,
|
|
||||||
completed=task.downloaded_size,
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
await self.retry_with_exponential_backoff(task, _download_chunk)
|
|
||||||
except Exception as e:
|
|
||||||
raise DownloadError(
|
|
||||||
f"Failed to download chunk {start}-{end} after {task.max_retries} attempts: {str(e)}"
|
|
||||||
) from e
|
|
||||||
|
|
||||||
async def prepare_download(self, task: DownloadTask) -> None:
|
|
||||||
output_dir = os.path.dirname(task.output_file)
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
|
|
||||||
if os.path.exists(task.output_file):
|
|
||||||
task.downloaded_size = os.path.getsize(task.output_file)
|
|
||||||
|
|
||||||
async def download_file(self, task: DownloadTask) -> None:
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(**self.client_options) as client:
|
|
||||||
await self.get_file_info(client, task)
|
|
||||||
|
|
||||||
# Check if file is already downloaded
|
|
||||||
if os.path.exists(task.output_file):
|
|
||||||
if self.verify_file_integrity(task):
|
|
||||||
self.console.print(f"[green]Already downloaded {task.output_file}[/green]")
|
|
||||||
self.progress.update(task.task_id, completed=task.total_size)
|
|
||||||
return
|
|
||||||
|
|
||||||
await self.prepare_download(task)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Split the remaining download into chunks
|
|
||||||
chunk_size = 27_000_000_000 # Cloudfront max chunk size
|
|
||||||
chunks = []
|
|
||||||
|
|
||||||
current_pos = task.downloaded_size
|
|
||||||
while current_pos < task.total_size:
|
|
||||||
chunk_end = min(current_pos + chunk_size - 1, task.total_size - 1)
|
|
||||||
chunks.append((current_pos, chunk_end))
|
|
||||||
current_pos = chunk_end + 1
|
|
||||||
|
|
||||||
# Download chunks in sequence
|
|
||||||
for chunk_start, chunk_end in chunks:
|
|
||||||
await self.download_chunk(client, task, chunk_start, chunk_end)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise DownloadError(f"Download failed: {str(e)}") from e
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.progress.update(task.task_id, description=f"[red]Failed: {task.output_file}[/red]")
|
|
||||||
raise DownloadError(f"Download failed for {task.output_file}: {str(e)}") from e
|
|
||||||
|
|
||||||
def has_disk_space(self, tasks: list[DownloadTask]) -> bool:
|
|
||||||
try:
|
|
||||||
total_remaining_size = sum(task.total_size - task.downloaded_size for task in tasks)
|
|
||||||
dir_path = os.path.dirname(os.path.abspath(tasks[0].output_file))
|
|
||||||
free_space = shutil.disk_usage(dir_path).free
|
|
||||||
|
|
||||||
# Add 10% buffer for safety
|
|
||||||
required_space = int(total_remaining_size * 1.1)
|
|
||||||
|
|
||||||
if free_space < required_space:
|
|
||||||
self.console.print(
|
|
||||||
f"[red]Not enough disk space. Required: {required_space // (1024 * 1024)} MB, "
|
|
||||||
f"Available: {free_space // (1024 * 1024)} MB[/red]"
|
|
||||||
)
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise DownloadError(f"Failed to check disk space: {str(e)}") from e
|
|
||||||
|
|
||||||
async def download_all(self, tasks: list[DownloadTask]) -> None:
|
|
||||||
if not tasks:
|
|
||||||
raise ValueError("No download tasks provided")
|
|
||||||
|
|
||||||
if not os.environ.get("LLAMA_DOWNLOAD_NO_SPACE_CHECK") and not self.has_disk_space(tasks):
|
|
||||||
raise DownloadError("Insufficient disk space for downloads")
|
|
||||||
|
|
||||||
failed_tasks = []
|
|
||||||
|
|
||||||
with self.progress:
|
|
||||||
for task in tasks:
|
|
||||||
desc = f"Downloading {Path(task.output_file).name}"
|
|
||||||
task.task_id = self.progress.add_task(desc, total=task.total_size, completed=task.downloaded_size)
|
|
||||||
|
|
||||||
semaphore = asyncio.Semaphore(self.max_concurrent_downloads)
|
|
||||||
|
|
||||||
async def download_with_semaphore(task: DownloadTask):
|
|
||||||
async with semaphore:
|
|
||||||
try:
|
|
||||||
await self.download_file(task)
|
|
||||||
except Exception as e:
|
|
||||||
failed_tasks.append((task, str(e)))
|
|
||||||
|
|
||||||
await asyncio.gather(*(download_with_semaphore(task) for task in tasks))
|
|
||||||
|
|
||||||
if failed_tasks:
|
|
||||||
self.console.print("\n[red]Some downloads failed:[/red]")
|
|
||||||
for task, error in failed_tasks:
|
|
||||||
self.console.print(f"[red]- {Path(task.output_file).name}: {error}[/red]")
|
|
||||||
raise DownloadError(f"{len(failed_tasks)} downloads failed")
|
|
||||||
|
|
||||||
|
|
||||||
def _hf_download(
|
|
||||||
model: "Model",
|
|
||||||
hf_token: str,
|
|
||||||
ignore_patterns: str,
|
|
||||||
parser: argparse.ArgumentParser,
|
|
||||||
):
|
|
||||||
from huggingface_hub import snapshot_download
|
|
||||||
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
|
|
||||||
|
|
||||||
from llama_stack.core.utils.model_utils import model_local_dir
|
|
||||||
|
|
||||||
repo_id = model.huggingface_repo
|
|
||||||
if repo_id is None:
|
|
||||||
raise ValueError(f"No repo id found for model {model.descriptor()}")
|
|
||||||
|
|
||||||
output_dir = model_local_dir(model.descriptor())
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
try:
|
|
||||||
true_output_dir = snapshot_download(
|
|
||||||
repo_id,
|
|
||||||
local_dir=output_dir,
|
|
||||||
ignore_patterns=ignore_patterns,
|
|
||||||
token=hf_token,
|
|
||||||
library_name="llama-stack",
|
|
||||||
)
|
|
||||||
except GatedRepoError:
|
|
||||||
parser.error(
|
|
||||||
"It looks like you are trying to access a gated repository. Please ensure you "
|
|
||||||
"have access to the repository and have provided the proper Hugging Face API token "
|
|
||||||
"using the option `--hf-token` or by running `huggingface-cli login`."
|
|
||||||
"You can find your token by visiting https://huggingface.co/settings/tokens"
|
|
||||||
)
|
|
||||||
except RepositoryNotFoundError:
|
|
||||||
parser.error(f"Repository '{repo_id}' not found on the Hugging Face Hub or incorrect Hugging Face token.")
|
|
||||||
except Exception as e:
|
|
||||||
parser.error(e)
|
|
||||||
|
|
||||||
print(f"\nSuccessfully downloaded model to {true_output_dir}")
|
|
||||||
|
|
||||||
|
|
||||||
def _meta_download(
|
|
||||||
model: "Model",
|
|
||||||
model_id: str,
|
|
||||||
meta_url: str,
|
|
||||||
info: "LlamaDownloadInfo",
|
|
||||||
max_concurrent_downloads: int,
|
|
||||||
):
|
|
||||||
from llama_stack.core.utils.model_utils import model_local_dir
|
|
||||||
|
|
||||||
output_dir = Path(model_local_dir(model.descriptor()))
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# Create download tasks for each file
|
|
||||||
tasks = []
|
|
||||||
for f in info.files:
|
|
||||||
output_file = str(output_dir / f)
|
|
||||||
url = meta_url.replace("*", f"{info.folder}/{f}")
|
|
||||||
total_size = info.pth_size if "consolidated" in f else 0
|
|
||||||
tasks.append(DownloadTask(url=url, output_file=output_file, total_size=total_size, max_retries=3))
|
|
||||||
|
|
||||||
# Initialize and run parallel downloader
|
|
||||||
downloader = ParallelDownloader(max_concurrent_downloads=max_concurrent_downloads)
|
|
||||||
asyncio.run(downloader.download_all(tasks))
|
|
||||||
|
|
||||||
cprint(f"\nSuccessfully downloaded model to {output_dir}", color="green", file=sys.stderr)
|
|
||||||
cprint(
|
|
||||||
f"\nView MD5 checksum files at: {output_dir / 'checklist.chk'}",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
cprint(
|
|
||||||
f"\n[Optionally] To run MD5 checksums, use the following command: llama model verify-download --model-id {model_id}",
|
|
||||||
color="yellow",
|
|
||||||
file=sys.stderr,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ModelEntry(BaseModel):
|
|
||||||
model_id: str
|
|
||||||
files: dict[str, str]
|
|
||||||
|
|
||||||
model_config = ConfigDict(protected_namespaces=())
|
|
||||||
|
|
||||||
|
|
||||||
class Manifest(BaseModel):
|
|
||||||
models: list[ModelEntry]
|
|
||||||
expires_on: datetime
|
|
||||||
|
|
||||||
|
|
||||||
def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int):
|
|
||||||
from llama_stack.core.utils.model_utils import model_local_dir
|
|
||||||
|
|
||||||
with open(manifest_file) as f:
|
|
||||||
d = json.load(f)
|
|
||||||
manifest = Manifest(**d)
|
|
||||||
|
|
||||||
if datetime.now(UTC) > manifest.expires_on.astimezone(UTC):
|
|
||||||
raise ValueError(f"Manifest URLs have expired on {manifest.expires_on}")
|
|
||||||
|
|
||||||
console = Console()
|
|
||||||
for entry in manifest.models:
|
|
||||||
console.print(f"[blue]Downloading model {entry.model_id}...[/blue]")
|
|
||||||
output_dir = Path(model_local_dir(entry.model_id))
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
|
|
||||||
if any(output_dir.iterdir()):
|
|
||||||
console.print(f"[yellow]Output directory {output_dir} is not empty.[/yellow]")
|
|
||||||
|
|
||||||
while True:
|
|
||||||
resp = input("Do you want to (C)ontinue download or (R)estart completely? (continue/restart): ")
|
|
||||||
if resp.lower() in ["restart", "r"]:
|
|
||||||
shutil.rmtree(output_dir)
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
break
|
|
||||||
elif resp.lower() in ["continue", "c"]:
|
|
||||||
console.print("[blue]Continuing download...[/blue]")
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
console.print("[red]Invalid response. Please try again.[/red]")
|
|
||||||
|
|
||||||
# Create download tasks for all files in the manifest
|
|
||||||
tasks = [
|
|
||||||
DownloadTask(url=url, output_file=str(output_dir / fname), max_retries=3)
|
|
||||||
for fname, url in entry.files.items()
|
|
||||||
]
|
|
||||||
|
|
||||||
# Initialize and run parallel downloader
|
|
||||||
downloader = ParallelDownloader(max_concurrent_downloads=max_concurrent_downloads)
|
|
||||||
asyncio.run(downloader.download_all(tasks))
|
|
||||||
|
|
||||||
|
|
||||||
def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
|
|
||||||
"""Main download command handler"""
|
|
||||||
try:
|
|
||||||
if args.manifest_file:
|
|
||||||
_download_from_manifest(args.manifest_file, args.max_parallel)
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.model_id is None:
|
|
||||||
parser.error("Please provide a model id")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Handle comma-separated model IDs
|
|
||||||
model_ids = [model_id.strip() for model_id in args.model_id.split(",")]
|
|
||||||
|
|
||||||
from llama_stack.models.llama.sku_list import llama_meta_net_info, resolve_model
|
|
||||||
|
|
||||||
from .model.safety_models import (
|
|
||||||
prompt_guard_download_info_map,
|
|
||||||
prompt_guard_model_sku_map,
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt_guard_model_sku_map = prompt_guard_model_sku_map()
|
|
||||||
prompt_guard_download_info_map = prompt_guard_download_info_map()
|
|
||||||
|
|
||||||
for model_id in model_ids:
|
|
||||||
if model_id in prompt_guard_model_sku_map.keys():
|
|
||||||
model = prompt_guard_model_sku_map[model_id]
|
|
||||||
info = prompt_guard_download_info_map[model_id]
|
|
||||||
else:
|
|
||||||
model = resolve_model(model_id)
|
|
||||||
if model is None:
|
|
||||||
parser.error(f"Model {model_id} not found")
|
|
||||||
continue
|
|
||||||
info = llama_meta_net_info(model)
|
|
||||||
|
|
||||||
if args.source == "huggingface":
|
|
||||||
_hf_download(model, args.hf_token, args.ignore_patterns, parser)
|
|
||||||
else:
|
|
||||||
meta_url = args.meta_url or input(
|
|
||||||
f"Please provide the signed URL for model {model_id} you received via email "
|
|
||||||
f"after visiting https://www.llama.com/llama-downloads/ "
|
|
||||||
f"(e.g., https://llama3-1.llamameta.net/*?Policy...): "
|
|
||||||
)
|
|
||||||
if "llamameta.net" not in meta_url:
|
|
||||||
parser.error("Invalid Meta URL provided")
|
|
||||||
_meta_download(model, model_id, meta_url, info, args.max_parallel)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
parser.error(f"Download failed: {str(e)}")
|
|
|
@ -6,11 +6,8 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from .download import Download
|
|
||||||
from .model import ModelParser
|
|
||||||
from .stack import StackParser
|
from .stack import StackParser
|
||||||
from .stack.utils import print_subcommand_description
|
from .stack.utils import print_subcommand_description
|
||||||
from .verify_download import VerifyDownload
|
|
||||||
|
|
||||||
|
|
||||||
class LlamaCLIParser:
|
class LlamaCLIParser:
|
||||||
|
@ -30,10 +27,7 @@ class LlamaCLIParser:
|
||||||
subparsers = self.parser.add_subparsers(title="subcommands")
|
subparsers = self.parser.add_subparsers(title="subcommands")
|
||||||
|
|
||||||
# Add sub-commands
|
# Add sub-commands
|
||||||
ModelParser.create(subparsers)
|
|
||||||
StackParser.create(subparsers)
|
StackParser.create(subparsers)
|
||||||
Download.create(subparsers)
|
|
||||||
VerifyDownload.create(subparsers)
|
|
||||||
|
|
||||||
print_subcommand_description(self.parser, subparsers)
|
print_subcommand_description(self.parser, subparsers)
|
||||||
|
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
from .model import ModelParser # noqa
|
|
|
@ -1,70 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
|
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
|
||||||
from llama_stack.cli.table import print_table
|
|
||||||
from llama_stack.models.llama.sku_list import resolve_model
|
|
||||||
|
|
||||||
|
|
||||||
class ModelDescribe(Subcommand):
|
|
||||||
"""Show details about a model"""
|
|
||||||
|
|
||||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
|
||||||
super().__init__()
|
|
||||||
self.parser = subparsers.add_parser(
|
|
||||||
"describe",
|
|
||||||
prog="llama model describe",
|
|
||||||
description="Show details about a llama model",
|
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
|
||||||
)
|
|
||||||
self._add_arguments()
|
|
||||||
self.parser.set_defaults(func=self._run_model_describe_cmd)
|
|
||||||
|
|
||||||
def _add_arguments(self):
|
|
||||||
self.parser.add_argument(
|
|
||||||
"-m",
|
|
||||||
"--model-id",
|
|
||||||
type=str,
|
|
||||||
required=True,
|
|
||||||
help="See `llama model list` or `llama model list --show-all` for the list of available models",
|
|
||||||
)
|
|
||||||
|
|
||||||
def _run_model_describe_cmd(self, args: argparse.Namespace) -> None:
|
|
||||||
from .safety_models import prompt_guard_model_sku_map
|
|
||||||
|
|
||||||
prompt_guard_model_map = prompt_guard_model_sku_map()
|
|
||||||
if args.model_id in prompt_guard_model_map.keys():
|
|
||||||
model = prompt_guard_model_map[args.model_id]
|
|
||||||
else:
|
|
||||||
model = resolve_model(args.model_id)
|
|
||||||
|
|
||||||
if model is None:
|
|
||||||
self.parser.error(
|
|
||||||
f"Model {args.model_id} not found; try 'llama model list' for a list of available models."
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
headers = [
|
|
||||||
"Model",
|
|
||||||
model.descriptor(),
|
|
||||||
]
|
|
||||||
|
|
||||||
rows = [
|
|
||||||
("Hugging Face ID", model.huggingface_repo or "<Not Available>"),
|
|
||||||
("Description", model.description),
|
|
||||||
("Context Length", f"{model.max_seq_length // 1024}K tokens"),
|
|
||||||
("Weights format", model.quantization_format.value),
|
|
||||||
("Model params.json", json.dumps(model.arch_args, indent=4)),
|
|
||||||
]
|
|
||||||
|
|
||||||
print_table(
|
|
||||||
rows,
|
|
||||||
headers,
|
|
||||||
separate_rows=True,
|
|
||||||
)
|
|
|
@ -1,24 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
|
||||||
|
|
||||||
|
|
||||||
class ModelDownload(Subcommand):
|
|
||||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
|
||||||
super().__init__()
|
|
||||||
self.parser = subparsers.add_parser(
|
|
||||||
"download",
|
|
||||||
prog="llama model download",
|
|
||||||
description="Download a model from llama.meta.com or Hugging Face Hub",
|
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
|
||||||
)
|
|
||||||
|
|
||||||
from llama_stack.cli.download import setup_download_parser
|
|
||||||
|
|
||||||
setup_download_parser(self.parser)
|
|
|
@ -1,119 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
|
||||||
from llama_stack.cli.table import print_table
|
|
||||||
from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
|
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
|
|
||||||
|
|
||||||
def _get_model_size(model_dir):
|
|
||||||
return sum(f.stat().st_size for f in Path(model_dir).rglob("*") if f.is_file())
|
|
||||||
|
|
||||||
|
|
||||||
def _convert_to_model_descriptor(model):
|
|
||||||
for m in all_registered_models():
|
|
||||||
if model == m.descriptor().replace(":", "-"):
|
|
||||||
return str(m.descriptor())
|
|
||||||
return str(model)
|
|
||||||
|
|
||||||
|
|
||||||
def _run_model_list_downloaded_cmd() -> None:
|
|
||||||
headers = ["Model", "Size", "Modified Time"]
|
|
||||||
|
|
||||||
rows = []
|
|
||||||
for model in os.listdir(DEFAULT_CHECKPOINT_DIR):
|
|
||||||
abs_path = os.path.join(DEFAULT_CHECKPOINT_DIR, model)
|
|
||||||
space_usage = _get_model_size(abs_path)
|
|
||||||
model_size = f"{space_usage / (1024**3):.2f} GB"
|
|
||||||
modified_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(os.path.getmtime(abs_path)))
|
|
||||||
rows.append(
|
|
||||||
[
|
|
||||||
_convert_to_model_descriptor(model),
|
|
||||||
model_size,
|
|
||||||
modified_time,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
print_table(
|
|
||||||
rows,
|
|
||||||
headers,
|
|
||||||
separate_rows=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ModelList(Subcommand):
|
|
||||||
"""List available llama models"""
|
|
||||||
|
|
||||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
|
||||||
super().__init__()
|
|
||||||
self.parser = subparsers.add_parser(
|
|
||||||
"list",
|
|
||||||
prog="llama model list",
|
|
||||||
description="Show available llama models",
|
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
|
||||||
)
|
|
||||||
self._add_arguments()
|
|
||||||
self.parser.set_defaults(func=self._run_model_list_cmd)
|
|
||||||
|
|
||||||
def _add_arguments(self):
|
|
||||||
self.parser.add_argument(
|
|
||||||
"--show-all",
|
|
||||||
action="store_true",
|
|
||||||
help="Show all models (not just defaults)",
|
|
||||||
)
|
|
||||||
self.parser.add_argument(
|
|
||||||
"--downloaded",
|
|
||||||
action="store_true",
|
|
||||||
help="List the downloaded models",
|
|
||||||
)
|
|
||||||
self.parser.add_argument(
|
|
||||||
"-s",
|
|
||||||
"--search",
|
|
||||||
type=str,
|
|
||||||
required=False,
|
|
||||||
help="Search for the input string as a substring in the model descriptor(ID)",
|
|
||||||
)
|
|
||||||
|
|
||||||
def _run_model_list_cmd(self, args: argparse.Namespace) -> None:
|
|
||||||
from .safety_models import prompt_guard_model_skus
|
|
||||||
|
|
||||||
if args.downloaded:
|
|
||||||
return _run_model_list_downloaded_cmd()
|
|
||||||
|
|
||||||
headers = [
|
|
||||||
"Model Descriptor(ID)",
|
|
||||||
"Hugging Face Repo",
|
|
||||||
"Context Length",
|
|
||||||
]
|
|
||||||
|
|
||||||
rows = []
|
|
||||||
for model in all_registered_models() + prompt_guard_model_skus():
|
|
||||||
if not args.show_all and not model.is_featured:
|
|
||||||
continue
|
|
||||||
|
|
||||||
descriptor = model.descriptor()
|
|
||||||
if not args.search or args.search.lower() in descriptor.lower():
|
|
||||||
rows.append(
|
|
||||||
[
|
|
||||||
descriptor,
|
|
||||||
model.huggingface_repo,
|
|
||||||
f"{model.max_seq_length // 1024}K",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
if len(rows) == 0:
|
|
||||||
print(f"Did not find any model matching `{args.search}`.")
|
|
||||||
else:
|
|
||||||
print_table(
|
|
||||||
rows,
|
|
||||||
headers,
|
|
||||||
separate_rows=True,
|
|
||||||
)
|
|
|
@ -1,43 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
from llama_stack.cli.model.describe import ModelDescribe
|
|
||||||
from llama_stack.cli.model.download import ModelDownload
|
|
||||||
from llama_stack.cli.model.list import ModelList
|
|
||||||
from llama_stack.cli.model.prompt_format import ModelPromptFormat
|
|
||||||
from llama_stack.cli.model.remove import ModelRemove
|
|
||||||
from llama_stack.cli.model.verify_download import ModelVerifyDownload
|
|
||||||
from llama_stack.cli.stack.utils import print_subcommand_description
|
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
|
||||||
|
|
||||||
|
|
||||||
class ModelParser(Subcommand):
|
|
||||||
"""Llama cli for model interface apis"""
|
|
||||||
|
|
||||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
|
||||||
super().__init__()
|
|
||||||
self.parser = subparsers.add_parser(
|
|
||||||
"model",
|
|
||||||
prog="llama model",
|
|
||||||
description="Work with llama models",
|
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.parser.set_defaults(func=lambda args: self.parser.print_help())
|
|
||||||
|
|
||||||
subparsers = self.parser.add_subparsers(title="model_subcommands")
|
|
||||||
|
|
||||||
# Add sub-commands
|
|
||||||
ModelDownload.create(subparsers)
|
|
||||||
ModelList.create(subparsers)
|
|
||||||
ModelPromptFormat.create(subparsers)
|
|
||||||
ModelDescribe.create(subparsers)
|
|
||||||
ModelVerifyDownload.create(subparsers)
|
|
||||||
ModelRemove.create(subparsers)
|
|
||||||
|
|
||||||
print_subcommand_description(self.parser, subparsers)
|
|
|
@ -1,133 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import textwrap
|
|
||||||
from io import StringIO
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
|
||||||
from llama_stack.cli.table import print_table
|
|
||||||
from llama_stack.models.llama.sku_types import CoreModelId, ModelFamily, is_multimodal, model_family
|
|
||||||
|
|
||||||
ROOT_DIR = Path(__file__).parent.parent.parent
|
|
||||||
|
|
||||||
|
|
||||||
class ModelPromptFormat(Subcommand):
|
|
||||||
"""Llama model cli for describe a model prompt format (message formats)"""
|
|
||||||
|
|
||||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
|
||||||
super().__init__()
|
|
||||||
self.parser = subparsers.add_parser(
|
|
||||||
"prompt-format",
|
|
||||||
prog="llama model prompt-format",
|
|
||||||
description="Show llama model message formats",
|
|
||||||
epilog=textwrap.dedent(
|
|
||||||
"""
|
|
||||||
Example:
|
|
||||||
llama model prompt-format <options>
|
|
||||||
"""
|
|
||||||
),
|
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
|
||||||
)
|
|
||||||
self._add_arguments()
|
|
||||||
self.parser.set_defaults(func=self._run_model_template_cmd)
|
|
||||||
|
|
||||||
def _add_arguments(self):
|
|
||||||
self.parser.add_argument(
|
|
||||||
"-m",
|
|
||||||
"--model-name",
|
|
||||||
type=str,
|
|
||||||
help="Example: Llama3.1-8B or Llama3.2-11B-Vision, etc\n"
|
|
||||||
"(Run `llama model list` to see a list of valid model names)",
|
|
||||||
)
|
|
||||||
self.parser.add_argument(
|
|
||||||
"-l",
|
|
||||||
"--list",
|
|
||||||
action="store_true",
|
|
||||||
help="List all available models",
|
|
||||||
)
|
|
||||||
|
|
||||||
def _run_model_template_cmd(self, args: argparse.Namespace) -> None:
|
|
||||||
import importlib.resources
|
|
||||||
|
|
||||||
# Only Llama 3.1 and 3.2 are supported
|
|
||||||
supported_model_ids = [
|
|
||||||
m for m in CoreModelId if model_family(m) in {ModelFamily.llama3_1, ModelFamily.llama3_2}
|
|
||||||
]
|
|
||||||
|
|
||||||
model_list = [m.value for m in supported_model_ids]
|
|
||||||
|
|
||||||
if args.list:
|
|
||||||
headers = ["Model(s)"]
|
|
||||||
rows = []
|
|
||||||
for m in model_list:
|
|
||||||
rows.append(
|
|
||||||
[
|
|
||||||
m,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
print_table(
|
|
||||||
rows,
|
|
||||||
headers,
|
|
||||||
separate_rows=True,
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
model_id = CoreModelId(args.model_name)
|
|
||||||
except ValueError:
|
|
||||||
self.parser.error(
|
|
||||||
f"{args.model_name} is not a valid Model. Choose one from the list of valid models. "
|
|
||||||
f"Run `llama model list` to see the valid model names."
|
|
||||||
)
|
|
||||||
|
|
||||||
if model_id not in supported_model_ids:
|
|
||||||
self.parser.error(
|
|
||||||
f"{model_id} is not a valid Model. Choose one from the list of valid models. "
|
|
||||||
f"Run `llama model list` to see the valid model names."
|
|
||||||
)
|
|
||||||
|
|
||||||
llama_3_1_file = ROOT_DIR / "models" / "llama" / "llama3_1" / "prompt_format.md"
|
|
||||||
llama_3_2_text_file = ROOT_DIR / "models" / "llama" / "llama3_2" / "text_prompt_format.md"
|
|
||||||
llama_3_2_vision_file = ROOT_DIR / "models" / "llama" / "llama3_2" / "vision_prompt_format.md"
|
|
||||||
if model_family(model_id) == ModelFamily.llama3_1:
|
|
||||||
with importlib.resources.as_file(llama_3_1_file) as f:
|
|
||||||
content = f.open("r").read()
|
|
||||||
elif model_family(model_id) == ModelFamily.llama3_2:
|
|
||||||
if is_multimodal(model_id):
|
|
||||||
with importlib.resources.as_file(llama_3_2_vision_file) as f:
|
|
||||||
content = f.open("r").read()
|
|
||||||
else:
|
|
||||||
with importlib.resources.as_file(llama_3_2_text_file) as f:
|
|
||||||
content = f.open("r").read()
|
|
||||||
|
|
||||||
render_markdown_to_pager(content)
|
|
||||||
|
|
||||||
|
|
||||||
def render_markdown_to_pager(markdown_content: str):
|
|
||||||
from rich.console import Console
|
|
||||||
from rich.markdown import Markdown
|
|
||||||
from rich.style import Style
|
|
||||||
from rich.text import Text
|
|
||||||
|
|
||||||
class LeftAlignedHeaderMarkdown(Markdown):
|
|
||||||
def parse_header(self, token):
|
|
||||||
level = token.type.count("h")
|
|
||||||
content = Text(token.content)
|
|
||||||
header_style = Style(color="bright_blue", bold=True)
|
|
||||||
header = Text(f"{'#' * level} ", style=header_style) + content
|
|
||||||
self.add_text(header)
|
|
||||||
|
|
||||||
# Render the Markdown
|
|
||||||
md = LeftAlignedHeaderMarkdown(markdown_content)
|
|
||||||
|
|
||||||
# Capture the rendered output
|
|
||||||
output = StringIO()
|
|
||||||
console = Console(file=output, force_terminal=True, width=100) # Set a fixed width
|
|
||||||
console.print(md)
|
|
||||||
rendered_content = output.getvalue()
|
|
||||||
print(rendered_content)
|
|
|
@ -1,68 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
|
||||||
from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
|
|
||||||
from llama_stack.models.llama.sku_list import resolve_model
|
|
||||||
|
|
||||||
|
|
||||||
class ModelRemove(Subcommand):
|
|
||||||
"""Remove the downloaded llama model"""
|
|
||||||
|
|
||||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
|
||||||
super().__init__()
|
|
||||||
self.parser = subparsers.add_parser(
|
|
||||||
"remove",
|
|
||||||
prog="llama model remove",
|
|
||||||
description="Remove the downloaded llama model",
|
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
|
||||||
)
|
|
||||||
self._add_arguments()
|
|
||||||
self.parser.set_defaults(func=self._run_model_remove_cmd)
|
|
||||||
|
|
||||||
def _add_arguments(self):
|
|
||||||
self.parser.add_argument(
|
|
||||||
"-m",
|
|
||||||
"--model",
|
|
||||||
required=True,
|
|
||||||
help="Specify the llama downloaded model name, see `llama model list --downloaded`",
|
|
||||||
)
|
|
||||||
self.parser.add_argument(
|
|
||||||
"-f",
|
|
||||||
"--force",
|
|
||||||
action="store_true",
|
|
||||||
help="Used to forcefully remove the llama model from the storage without further confirmation",
|
|
||||||
)
|
|
||||||
|
|
||||||
def _run_model_remove_cmd(self, args: argparse.Namespace) -> None:
|
|
||||||
from .safety_models import prompt_guard_model_sku_map
|
|
||||||
|
|
||||||
prompt_guard_model_map = prompt_guard_model_sku_map()
|
|
||||||
|
|
||||||
if args.model in prompt_guard_model_map.keys():
|
|
||||||
model = prompt_guard_model_map[args.model]
|
|
||||||
else:
|
|
||||||
model = resolve_model(args.model)
|
|
||||||
|
|
||||||
model_path = os.path.join(DEFAULT_CHECKPOINT_DIR, args.model.replace(":", "-"))
|
|
||||||
|
|
||||||
if model is None or not os.path.isdir(model_path):
|
|
||||||
print(f"'{args.model}' is not a valid llama model or does not exist.")
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.force:
|
|
||||||
shutil.rmtree(model_path)
|
|
||||||
print(f"{args.model} removed.")
|
|
||||||
else:
|
|
||||||
if input(f"Are you sure you want to remove {args.model}? (y/n): ").strip().lower() == "y":
|
|
||||||
shutil.rmtree(model_path)
|
|
||||||
print(f"{args.model} removed.")
|
|
||||||
else:
|
|
||||||
print("Removal aborted.")
|
|
|
@ -1,64 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from pydantic import BaseModel, ConfigDict, Field
|
|
||||||
|
|
||||||
from llama_stack.models.llama.sku_list import LlamaDownloadInfo
|
|
||||||
from llama_stack.models.llama.sku_types import CheckpointQuantizationFormat
|
|
||||||
|
|
||||||
|
|
||||||
class PromptGuardModel(BaseModel):
|
|
||||||
"""Make a 'fake' Model-like object for Prompt Guard. Eventually this will be removed."""
|
|
||||||
|
|
||||||
model_id: str
|
|
||||||
huggingface_repo: str
|
|
||||||
description: str = "Prompt Guard. NOTE: this model will not be provided via `llama` CLI soon."
|
|
||||||
is_featured: bool = False
|
|
||||||
max_seq_length: int = 512
|
|
||||||
is_instruct_model: bool = False
|
|
||||||
quantization_format: CheckpointQuantizationFormat = CheckpointQuantizationFormat.bf16
|
|
||||||
arch_args: dict[str, Any] = Field(default_factory=dict)
|
|
||||||
|
|
||||||
def descriptor(self) -> str:
|
|
||||||
return self.model_id
|
|
||||||
|
|
||||||
model_config = ConfigDict(protected_namespaces=())
|
|
||||||
|
|
||||||
|
|
||||||
def prompt_guard_model_skus():
|
|
||||||
return [
|
|
||||||
PromptGuardModel(model_id="Prompt-Guard-86M", huggingface_repo="meta-llama/Prompt-Guard-86M"),
|
|
||||||
PromptGuardModel(
|
|
||||||
model_id="Llama-Prompt-Guard-2-86M",
|
|
||||||
huggingface_repo="meta-llama/Llama-Prompt-Guard-2-86M",
|
|
||||||
),
|
|
||||||
PromptGuardModel(
|
|
||||||
model_id="Llama-Prompt-Guard-2-22M",
|
|
||||||
huggingface_repo="meta-llama/Llama-Prompt-Guard-2-22M",
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def prompt_guard_model_sku_map() -> dict[str, Any]:
|
|
||||||
return {model.model_id: model for model in prompt_guard_model_skus()}
|
|
||||||
|
|
||||||
|
|
||||||
def prompt_guard_download_info_map() -> dict[str, LlamaDownloadInfo]:
|
|
||||||
return {
|
|
||||||
model.model_id: LlamaDownloadInfo(
|
|
||||||
folder="Prompt-Guard" if model.model_id == "Prompt-Guard-86M" else model.model_id,
|
|
||||||
files=[
|
|
||||||
"model.safetensors",
|
|
||||||
"special_tokens_map.json",
|
|
||||||
"tokenizer.json",
|
|
||||||
"tokenizer_config.json",
|
|
||||||
],
|
|
||||||
pth_size=1,
|
|
||||||
)
|
|
||||||
for model in prompt_guard_model_skus()
|
|
||||||
}
|
|
|
@ -1,24 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
|
||||||
|
|
||||||
|
|
||||||
class ModelVerifyDownload(Subcommand):
|
|
||||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
|
||||||
super().__init__()
|
|
||||||
self.parser = subparsers.add_parser(
|
|
||||||
"verify-download",
|
|
||||||
prog="llama model verify-download",
|
|
||||||
description="Verify the downloaded checkpoints' checksums for models downloaded from Meta",
|
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
|
||||||
)
|
|
||||||
|
|
||||||
from llama_stack.cli.verify_download import setup_verify_download_parser
|
|
||||||
|
|
||||||
setup_verify_download_parser(self.parser)
|
|
|
@ -1,141 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import hashlib
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from functools import partial
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from rich.console import Console
|
|
||||||
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
||||||
|
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class VerificationResult:
|
|
||||||
filename: str
|
|
||||||
expected_hash: str
|
|
||||||
actual_hash: str | None
|
|
||||||
exists: bool
|
|
||||||
matches: bool
|
|
||||||
|
|
||||||
|
|
||||||
class VerifyDownload(Subcommand):
|
|
||||||
"""Llama cli for verifying downloaded model files"""
|
|
||||||
|
|
||||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
|
||||||
super().__init__()
|
|
||||||
self.parser = subparsers.add_parser(
|
|
||||||
"verify-download",
|
|
||||||
prog="llama verify-download",
|
|
||||||
description="Verify integrity of downloaded model files",
|
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
|
||||||
)
|
|
||||||
setup_verify_download_parser(self.parser)
|
|
||||||
|
|
||||||
|
|
||||||
def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None:
|
|
||||||
parser.add_argument(
|
|
||||||
"--model-id",
|
|
||||||
required=True,
|
|
||||||
help="Model ID to verify (only for models downloaded from Meta)",
|
|
||||||
)
|
|
||||||
parser.set_defaults(func=partial(run_verify_cmd, parser=parser))
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_sha256(filepath: Path, chunk_size: int = 8192) -> str:
|
|
||||||
sha256_hash = hashlib.sha256()
|
|
||||||
with open(filepath, "rb") as f:
|
|
||||||
for chunk in iter(lambda: f.read(chunk_size), b""):
|
|
||||||
sha256_hash.update(chunk)
|
|
||||||
return sha256_hash.hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
def load_checksums(checklist_path: Path) -> dict[str, str]:
|
|
||||||
checksums = {}
|
|
||||||
with open(checklist_path) as f:
|
|
||||||
for line in f:
|
|
||||||
if line.strip():
|
|
||||||
sha256sum, filepath = line.strip().split(" ", 1)
|
|
||||||
# Remove leading './' if present
|
|
||||||
filepath = filepath.lstrip("./")
|
|
||||||
checksums[filepath] = sha256sum
|
|
||||||
return checksums
|
|
||||||
|
|
||||||
|
|
||||||
def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -> list[VerificationResult]:
|
|
||||||
results = []
|
|
||||||
|
|
||||||
with Progress(
|
|
||||||
SpinnerColumn(),
|
|
||||||
TextColumn("[progress.description]{task.description}"),
|
|
||||||
console=console,
|
|
||||||
) as progress:
|
|
||||||
for filepath, expected_hash in checksums.items():
|
|
||||||
full_path = model_dir / filepath
|
|
||||||
task_id = progress.add_task(f"Verifying {filepath}...", total=None)
|
|
||||||
|
|
||||||
exists = full_path.exists()
|
|
||||||
actual_hash = None
|
|
||||||
matches = False
|
|
||||||
|
|
||||||
if exists:
|
|
||||||
actual_hash = calculate_sha256(full_path)
|
|
||||||
matches = actual_hash == expected_hash
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
VerificationResult(
|
|
||||||
filename=filepath,
|
|
||||||
expected_hash=expected_hash,
|
|
||||||
actual_hash=actual_hash,
|
|
||||||
exists=exists,
|
|
||||||
matches=matches,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
progress.remove_task(task_id)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def run_verify_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
|
|
||||||
from llama_stack.core.utils.model_utils import model_local_dir
|
|
||||||
|
|
||||||
console = Console()
|
|
||||||
model_dir = Path(model_local_dir(args.model_id))
|
|
||||||
checklist_path = model_dir / "checklist.chk"
|
|
||||||
|
|
||||||
if not model_dir.exists():
|
|
||||||
parser.error(f"Model directory not found: {model_dir}")
|
|
||||||
|
|
||||||
if not checklist_path.exists():
|
|
||||||
parser.error(f"Checklist file not found: {checklist_path}")
|
|
||||||
|
|
||||||
checksums = load_checksums(checklist_path)
|
|
||||||
results = verify_files(model_dir, checksums, console)
|
|
||||||
|
|
||||||
# Print results
|
|
||||||
console.print("\nVerification Results:")
|
|
||||||
|
|
||||||
all_good = True
|
|
||||||
for result in results:
|
|
||||||
if not result.exists:
|
|
||||||
console.print(f"[red]❌ {result.filename}: File not found[/red]")
|
|
||||||
all_good = False
|
|
||||||
elif not result.matches:
|
|
||||||
console.print(
|
|
||||||
f"[red]❌ {result.filename}: Hash mismatch[/red]\n"
|
|
||||||
f" Expected: {result.expected_hash}\n"
|
|
||||||
f" Got: {result.actual_hash}"
|
|
||||||
)
|
|
||||||
all_good = False
|
|
||||||
else:
|
|
||||||
console.print(f"[green]✓ {result.filename}: Verified[/green]")
|
|
||||||
|
|
||||||
if all_good:
|
|
||||||
console.print("\n[green]All files verified successfully![/green]")
|
|
|
@ -29,31 +29,7 @@ The following environment variables can be configured:
|
||||||
|
|
||||||
## Prerequisite: Downloading Models
|
## Prerequisite: Downloading Models
|
||||||
|
|
||||||
Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
|
Please check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models using the Hugging Face CLI.
|
||||||
|
|
||||||
```
|
|
||||||
$ llama model list --downloaded
|
|
||||||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
|
|
||||||
┃ Model ┃ Size ┃ Modified Time ┃
|
|
||||||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
|
|
||||||
│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │
|
|
||||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
|
||||||
│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │
|
|
||||||
└─────────────────────────────────────────┴──────────┴─────────────────────┘
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Running the Distribution
|
## Running the Distribution
|
||||||
|
|
|
@ -25,14 +25,13 @@ classifiers = [
|
||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"fastapi>=0.115.0,<1.0", # server
|
"fastapi>=0.115.0,<1.0", # server
|
||||||
"fire", # for MCP in LLS client
|
"fire", # for MCP in LLS client
|
||||||
"httpx",
|
"httpx",
|
||||||
"huggingface-hub>=0.34.0,<1.0",
|
|
||||||
"jinja2>=3.1.6",
|
"jinja2>=3.1.6",
|
||||||
"jsonschema",
|
"jsonschema",
|
||||||
"llama-stack-client>=0.2.23",
|
"llama-stack-client>=0.2.23",
|
||||||
"openai>=1.107", # for expires_after support
|
"openai>=1.107", # for expires_after support
|
||||||
"prompt-toolkit",
|
"prompt-toolkit",
|
||||||
"python-dotenv",
|
"python-dotenv",
|
||||||
"python-jose[cryptography]",
|
"python-jose[cryptography]",
|
||||||
|
@ -43,13 +42,13 @@ dependencies = [
|
||||||
"tiktoken",
|
"tiktoken",
|
||||||
"pillow",
|
"pillow",
|
||||||
"h11>=0.16.0",
|
"h11>=0.16.0",
|
||||||
"python-multipart>=0.0.20", # For fastapi Form
|
"python-multipart>=0.0.20", # For fastapi Form
|
||||||
"uvicorn>=0.34.0", # server
|
"uvicorn>=0.34.0", # server
|
||||||
"opentelemetry-sdk>=1.30.0", # server
|
"opentelemetry-sdk>=1.30.0", # server
|
||||||
"opentelemetry-exporter-otlp-proto-http>=1.30.0", # server
|
"opentelemetry-exporter-otlp-proto-http>=1.30.0", # server
|
||||||
"aiosqlite>=0.21.0", # server - for metadata store
|
"aiosqlite>=0.21.0", # server - for metadata store
|
||||||
"asyncpg", # for metadata store
|
"asyncpg", # for metadata store
|
||||||
"sqlalchemy[asyncio]>=2.0.41", # server - for conversations
|
"sqlalchemy[asyncio]>=2.0.41", # server - for conversations
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
@ -68,14 +67,14 @@ dev = [
|
||||||
"pytest-cov",
|
"pytest-cov",
|
||||||
"pytest-html",
|
"pytest-html",
|
||||||
"pytest-json-report",
|
"pytest-json-report",
|
||||||
"pytest-socket", # For blocking network access in unit tests
|
"pytest-socket", # For blocking network access in unit tests
|
||||||
"nbval", # For notebook testing
|
"nbval", # For notebook testing
|
||||||
"black",
|
"black",
|
||||||
"ruff",
|
"ruff",
|
||||||
"types-requests",
|
"types-requests",
|
||||||
"types-setuptools",
|
"types-setuptools",
|
||||||
"pre-commit",
|
"pre-commit",
|
||||||
"ruamel.yaml", # needed for openapi generator
|
"ruamel.yaml", # needed for openapi generator
|
||||||
]
|
]
|
||||||
# These are the dependencies required for running unit tests.
|
# These are the dependencies required for running unit tests.
|
||||||
unit = [
|
unit = [
|
||||||
|
@ -141,9 +140,7 @@ docs = [
|
||||||
"requests",
|
"requests",
|
||||||
]
|
]
|
||||||
codegen = ["rich", "pydantic>=2.11.9", "jinja2>=3.1.6"]
|
codegen = ["rich", "pydantic>=2.11.9", "jinja2>=3.1.6"]
|
||||||
benchmark = [
|
benchmark = ["locust>=2.39.1"]
|
||||||
"locust>=2.39.1",
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Homepage = "https://github.com/llamastack/llama-stack"
|
Homepage = "https://github.com/llamastack/llama-stack"
|
||||||
|
@ -242,7 +239,6 @@ follow_imports = "silent"
|
||||||
# to exclude the entire directory.
|
# to exclude the entire directory.
|
||||||
exclude = [
|
exclude = [
|
||||||
# As we fix more and more of these, we should remove them from the list
|
# As we fix more and more of these, we should remove them from the list
|
||||||
"^llama_stack/cli/download\\.py$",
|
|
||||||
"^llama_stack.core/build\\.py$",
|
"^llama_stack.core/build\\.py$",
|
||||||
"^llama_stack.core/client\\.py$",
|
"^llama_stack.core/client\\.py$",
|
||||||
"^llama_stack.core/request_headers\\.py$",
|
"^llama_stack.core/request_headers\\.py$",
|
||||||
|
@ -332,6 +328,4 @@ classmethod-decorators = ["classmethod", "pydantic.field_validator"]
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
addopts = ["--durations=10"]
|
addopts = ["--durations=10"]
|
||||||
asyncio_mode = "auto"
|
asyncio_mode = "auto"
|
||||||
markers = [
|
markers = ["allow_network: Allow network access for specific unit tests"]
|
||||||
"allow_network: Allow network access for specific unit tests",
|
|
||||||
]
|
|
||||||
|
|
4
uv.lock
generated
4
uv.lock
generated
|
@ -1,5 +1,5 @@
|
||||||
version = 1
|
version = 1
|
||||||
revision = 3
|
revision = 2
|
||||||
requires-python = ">=3.12"
|
requires-python = ">=3.12"
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
|
@ -1774,7 +1774,6 @@ dependencies = [
|
||||||
{ name = "fire" },
|
{ name = "fire" },
|
||||||
{ name = "h11" },
|
{ name = "h11" },
|
||||||
{ name = "httpx" },
|
{ name = "httpx" },
|
||||||
{ name = "huggingface-hub" },
|
|
||||||
{ name = "jinja2" },
|
{ name = "jinja2" },
|
||||||
{ name = "jsonschema" },
|
{ name = "jsonschema" },
|
||||||
{ name = "llama-stack-client" },
|
{ name = "llama-stack-client" },
|
||||||
|
@ -1896,7 +1895,6 @@ requires-dist = [
|
||||||
{ name = "fire" },
|
{ name = "fire" },
|
||||||
{ name = "h11", specifier = ">=0.16.0" },
|
{ name = "h11", specifier = ">=0.16.0" },
|
||||||
{ name = "httpx" },
|
{ name = "httpx" },
|
||||||
{ name = "huggingface-hub", specifier = ">=0.34.0,<1.0" },
|
|
||||||
{ name = "jinja2", specifier = ">=3.1.6" },
|
{ name = "jinja2", specifier = ">=3.1.6" },
|
||||||
{ name = "jsonschema" },
|
{ name = "jsonschema" },
|
||||||
{ name = "llama-stack-client", specifier = ">=0.2.23" },
|
{ name = "llama-stack-client", specifier = ">=0.2.23" },
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue